From e9734653c523c744f03333ece6ae7a315187f05c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 Jan 2026 16:26:13 +0100 Subject: [PATCH 01/18] fs,fsverity: reject size changes on fsverity files in setattr_prepare Add the check to reject truncates of fsverity files directly to setattr_prepare instead of requiring the file system to handle it. Besides removing boilerplate code, this also fixes the complete lack of such check in btrfs. Fixes: 146054090b08 ("btrfs: initial fsverity support") Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: "Darrick J. Wong" Link: https://lore.kernel.org/r/20260128152630.627409-2-hch@lst.de Signed-off-by: Eric Biggers --- fs/attr.c | 12 +++++++++++- fs/ext4/inode.c | 4 ---- fs/f2fs/file.c | 4 ---- fs/verity/open.c | 8 -------- include/linux/fsverity.h | 25 ------------------------- 5 files changed, 11 insertions(+), 42 deletions(-) diff --git a/fs/attr.c b/fs/attr.c index b9ec6b47bab2..e7d7c6d19fe9 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -169,7 +169,17 @@ int setattr_prepare(struct mnt_idmap *idmap, struct dentry *dentry, * ATTR_FORCE. */ if (ia_valid & ATTR_SIZE) { - int error = inode_newsize_ok(inode, attr->ia_size); + int error; + + /* + * Verity files are immutable, so deny truncates. This isn't + * covered by the open-time check because sys_truncate() takes a + * path, not an open file. + */ + if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode)) + return -EPERM; + + error = inode_newsize_ok(inode, attr->ia_size); if (error) return error; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0c466ccbed69..8c2ef98fa530 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5835,10 +5835,6 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry, if (error) return error; - error = fsverity_prepare_setattr(dentry, attr); - if (error) - return error; - if (is_quota_modification(idmap, inode, attr)) { error = dquot_initialize(inode); if (error) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d7047ca6b98d..da029fed4e5a 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1074,10 +1074,6 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, if (err) return err; - err = fsverity_prepare_setattr(dentry, attr); - if (err) - return err; - if (unlikely(IS_IMMUTABLE(inode))) return -EPERM; diff --git a/fs/verity/open.c b/fs/verity/open.c index 77b1c977af02..2aa5eae5a540 100644 --- a/fs/verity/open.c +++ b/fs/verity/open.c @@ -384,14 +384,6 @@ int __fsverity_file_open(struct inode *inode, struct file *filp) } EXPORT_SYMBOL_GPL(__fsverity_file_open); -int __fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr) -{ - if (attr->ia_valid & ATTR_SIZE) - return -EPERM; - return 0; -} -EXPORT_SYMBOL_GPL(__fsverity_prepare_setattr); - void __fsverity_cleanup_inode(struct inode *inode) { struct fsverity_info **vi_addr = fsverity_info_addr(inode); diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index 5bc7280425a7..86fb1708676b 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -179,7 +179,6 @@ int fsverity_get_digest(struct inode *inode, /* open.c */ int __fsverity_file_open(struct inode *inode, struct file *filp); -int __fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr); void __fsverity_cleanup_inode(struct inode *inode); /** @@ -251,12 +250,6 @@ static inline int __fsverity_file_open(struct inode *inode, struct file *filp) return -EOPNOTSUPP; } -static inline int __fsverity_prepare_setattr(struct dentry *dentry, - struct iattr *attr) -{ - return -EOPNOTSUPP; -} - static inline void fsverity_cleanup_inode(struct inode *inode) { } @@ -338,22 +331,4 @@ static inline int fsverity_file_open(struct inode *inode, struct file *filp) return 0; } -/** - * fsverity_prepare_setattr() - prepare to change a verity inode's attributes - * @dentry: dentry through which the inode is being changed - * @attr: attributes to change - * - * Verity files are immutable, so deny truncates. This isn't covered by the - * open-time check because sys_truncate() takes a path, not a file descriptor. - * - * Return: 0 on success, -errno on failure - */ -static inline int fsverity_prepare_setattr(struct dentry *dentry, - struct iattr *attr) -{ - if (IS_VERITY(d_inode(dentry))) - return __fsverity_prepare_setattr(dentry, attr); - return 0; -} - #endif /* _LINUX_FSVERITY_H */ From 70098d932714e06894da3e46a0b8e7abbea9a961 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 Jan 2026 16:26:14 +0100 Subject: [PATCH 02/18] fs,fsverity: clear out fsverity_info from common code Free the fsverity_info directly in clear_inode instead of requiring file systems to handle it. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: "Darrick J. Wong" Acked-by: David Sterba # btrfs Link: https://lore.kernel.org/r/20260128152630.627409-3-hch@lst.de Signed-off-by: Eric Biggers --- fs/btrfs/inode.c | 10 +++------- fs/ext4/super.c | 1 - fs/f2fs/inode.c | 1 - fs/inode.c | 9 +++++++++ fs/verity/open.c | 3 +-- include/linux/fsverity.h | 26 ++------------------------ 6 files changed, 15 insertions(+), 35 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a2b5b440637e..67c64efc5099 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -34,7 +34,6 @@ #include #include #include -#include #include "misc.h" #include "ctree.h" #include "disk-io.h" @@ -5571,11 +5570,8 @@ void btrfs_evict_inode(struct inode *inode) trace_btrfs_inode_evict(inode); - if (!root) { - fsverity_cleanup_inode(inode); - clear_inode(inode); - return; - } + if (!root) + goto clear_inode; fs_info = inode_to_fs_info(inode); evict_inode_truncate_pages(inode); @@ -5675,7 +5671,7 @@ void btrfs_evict_inode(struct inode *inode) * to retry these periodically in the future. */ btrfs_remove_delayed_node(BTRFS_I(inode)); - fsverity_cleanup_inode(inode); +clear_inode: clear_inode(inode); } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 87205660c5d0..86131f4d8718 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1527,7 +1527,6 @@ void ext4_clear_inode(struct inode *inode) EXT4_I(inode)->jinode = NULL; } fscrypt_put_encryption_info(inode); - fsverity_cleanup_inode(inode); } static struct inode *ext4_nfs_get_inode(struct super_block *sb, diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 38b8994bc1b2..ee332b994348 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -1000,7 +1000,6 @@ void f2fs_evict_inode(struct inode *inode) } out_clear: fscrypt_put_encryption_info(inode); - fsverity_cleanup_inode(inode); clear_inode(inode); } diff --git a/fs/inode.c b/fs/inode.c index 379f4c19845c..38dbdfbb09ba 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include /* for inode_has_buffers */ @@ -773,6 +774,14 @@ void dump_mapping(const struct address_space *mapping) void clear_inode(struct inode *inode) { + /* + * Only IS_VERITY() inodes can have verity info, so start by checking + * for IS_VERITY() (which is faster than retrieving the pointer to the + * verity info). This minimizes overhead for non-verity inodes. + */ + if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode)) + fsverity_cleanup_inode(inode); + /* * We have to cycle the i_pages lock here because reclaim can be in the * process of removing the last page (in __filemap_remove_folio()) diff --git a/fs/verity/open.c b/fs/verity/open.c index 2aa5eae5a540..090cb77326ee 100644 --- a/fs/verity/open.c +++ b/fs/verity/open.c @@ -384,14 +384,13 @@ int __fsverity_file_open(struct inode *inode, struct file *filp) } EXPORT_SYMBOL_GPL(__fsverity_file_open); -void __fsverity_cleanup_inode(struct inode *inode) +void fsverity_cleanup_inode(struct inode *inode) { struct fsverity_info **vi_addr = fsverity_info_addr(inode); fsverity_free_info(*vi_addr); *vi_addr = NULL; } -EXPORT_SYMBOL_GPL(__fsverity_cleanup_inode); void __init fsverity_init_info_cache(void) { diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index 86fb1708676b..ea1ed2e6c2f9 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -179,26 +179,6 @@ int fsverity_get_digest(struct inode *inode, /* open.c */ int __fsverity_file_open(struct inode *inode, struct file *filp); -void __fsverity_cleanup_inode(struct inode *inode); - -/** - * fsverity_cleanup_inode() - free the inode's verity info, if present - * @inode: an inode being evicted - * - * Filesystems must call this on inode eviction to free the inode's verity info. - */ -static inline void fsverity_cleanup_inode(struct inode *inode) -{ - /* - * Only IS_VERITY() inodes can have verity info, so start by checking - * for IS_VERITY() (which is faster than retrieving the pointer to the - * verity info). This minimizes overhead for non-verity inodes. - */ - if (IS_VERITY(inode)) - __fsverity_cleanup_inode(inode); - else - VFS_WARN_ON_ONCE(*fsverity_info_addr(inode) != NULL); -} /* read_metadata.c */ @@ -250,10 +230,6 @@ static inline int __fsverity_file_open(struct inode *inode, struct file *filp) return -EOPNOTSUPP; } -static inline void fsverity_cleanup_inode(struct inode *inode) -{ -} - /* read_metadata.c */ static inline int fsverity_ioctl_read_metadata(struct file *filp, @@ -331,4 +307,6 @@ static inline int fsverity_file_open(struct inode *inode, struct file *filp) return 0; } +void fsverity_cleanup_inode(struct inode *inode); + #endif /* _LINUX_FSVERITY_H */ From fb26616459098323eeda67b4f30786743191171b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 Jan 2026 16:26:15 +0100 Subject: [PATCH 03/18] ext4: don't build the fsverity work handler for !CONFIG_FS_VERITY Use IS_ENABLED to disable this code, leading to a slight size reduction: text data bss dec hex filename 4121 376 16 4513 11a1 fs/ext4/readpage.o.old 4030 328 16 4374 1116 fs/ext4/readpage.o Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: "Darrick J. Wong" Link: https://lore.kernel.org/r/20260128152630.627409-4-hch@lst.de Signed-off-by: Eric Biggers --- fs/ext4/readpage.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index e7f2350c725b..267594ef0b2c 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -130,7 +130,8 @@ static void bio_post_read_processing(struct bio_post_read_ctx *ctx) ctx->cur_step++; fallthrough; case STEP_VERITY: - if (ctx->enabled_steps & (1 << STEP_VERITY)) { + if (IS_ENABLED(CONFIG_FS_VERITY) && + ctx->enabled_steps & (1 << STEP_VERITY)) { INIT_WORK(&ctx->work, verity_work); fsverity_enqueue_verify_work(&ctx->work); return; From 6f9fae2f738c41c22ee90778ff8fc069de379472 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 Jan 2026 16:26:16 +0100 Subject: [PATCH 04/18] f2fs: don't build the fsverity work handler for !CONFIG_FS_VERITY Use IS_ENABLED to disable this code, leading to a slight size reduction: text data bss dec hex filename 25709 2412 24 28145 6df1 fs/f2fs/compress.o.old 25198 2252 24 27474 6b52 fs/f2fs/compress.o Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20260128152630.627409-5-hch@lst.de Signed-off-by: Eric Biggers --- fs/f2fs/compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 7b68bf22989d..40a62f1dee4d 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1833,7 +1833,7 @@ void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed, { int i; - if (!failed && dic->need_verity) { + if (IS_ENABLED(CONFIG_FS_VERITY) && !failed && dic->need_verity) { /* * Note that to avoid deadlocks, the verity work can't be done * on the decompression workqueue. This is because verifying From ac09a30900d81ac10606f650e3c720cfafa37be0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 Jan 2026 16:26:17 +0100 Subject: [PATCH 05/18] fsverity: pass struct file to ->write_merkle_tree_block This will make an iomap implementation of the method easier. Signed-off-by: Christoph Hellwig Reviewed-by: Andrey Albershteyn Reviewed-by: "Darrick J. Wong" Acked-by: David Sterba # btrfs Link: https://lore.kernel.org/r/20260128152630.627409-6-hch@lst.de Signed-off-by: Eric Biggers --- fs/btrfs/verity.c | 5 +++-- fs/ext4/verity.c | 6 +++--- fs/f2fs/verity.c | 6 +++--- fs/verity/enable.c | 9 +++++---- include/linux/fsverity.h | 6 +++--- 5 files changed, 17 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/verity.c b/fs/btrfs/verity.c index a2ac3fb68bc8..e7643c22a6bf 100644 --- a/fs/btrfs/verity.c +++ b/fs/btrfs/verity.c @@ -774,16 +774,17 @@ static struct page *btrfs_read_merkle_tree_page(struct inode *inode, /* * fsverity op that writes a Merkle tree block into the btree. * - * @inode: inode to write a Merkle tree block for + * @file: file to write a Merkle tree block for * @buf: Merkle tree block to write * @pos: the position of the block in the Merkle tree (in bytes) * @size: the Merkle tree block size (in bytes) * * Returns 0 on success or negative error code on failure */ -static int btrfs_write_merkle_tree_block(struct inode *inode, const void *buf, +static int btrfs_write_merkle_tree_block(struct file *file, const void *buf, u64 pos, unsigned int size) { + struct inode *inode = file_inode(file); loff_t merkle_pos = merkle_file_pos(inode); if (merkle_pos < 0) diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c index 415d9c4d8a32..2ce4cf8a1e31 100644 --- a/fs/ext4/verity.c +++ b/fs/ext4/verity.c @@ -380,12 +380,12 @@ static struct page *ext4_read_merkle_tree_page(struct inode *inode, return folio_file_page(folio, index); } -static int ext4_write_merkle_tree_block(struct inode *inode, const void *buf, +static int ext4_write_merkle_tree_block(struct file *file, const void *buf, u64 pos, unsigned int size) { - pos += ext4_verity_metadata_pos(inode); + pos += ext4_verity_metadata_pos(file_inode(file)); - return pagecache_write(inode, buf, size, pos); + return pagecache_write(file_inode(file), buf, size, pos); } const struct fsverity_operations ext4_verityops = { diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c index 05b935b55216..c1c4d8044681 100644 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@ -278,12 +278,12 @@ static struct page *f2fs_read_merkle_tree_page(struct inode *inode, return folio_file_page(folio, index); } -static int f2fs_write_merkle_tree_block(struct inode *inode, const void *buf, +static int f2fs_write_merkle_tree_block(struct file *file, const void *buf, u64 pos, unsigned int size) { - pos += f2fs_verity_metadata_pos(inode); + pos += f2fs_verity_metadata_pos(file_inode(file)); - return pagecache_write(inode, buf, size, pos); + return pagecache_write(file_inode(file), buf, size, pos); } const struct fsverity_operations f2fs_verityops = { diff --git a/fs/verity/enable.c b/fs/verity/enable.c index 95ec42b84797..c56c18e2605b 100644 --- a/fs/verity/enable.c +++ b/fs/verity/enable.c @@ -41,14 +41,15 @@ static int hash_one_block(const struct merkle_tree_params *params, return 0; } -static int write_merkle_tree_block(struct inode *inode, const u8 *buf, +static int write_merkle_tree_block(struct file *file, const u8 *buf, unsigned long index, const struct merkle_tree_params *params) { + struct inode *inode = file_inode(file); u64 pos = (u64)index << params->log_blocksize; int err; - err = inode->i_sb->s_vop->write_merkle_tree_block(inode, buf, pos, + err = inode->i_sb->s_vop->write_merkle_tree_block(file, buf, pos, params->block_size); if (err) fsverity_err(inode, "Error %d writing Merkle tree block %lu", @@ -135,7 +136,7 @@ static int build_merkle_tree(struct file *filp, err = hash_one_block(params, &buffers[level]); if (err) goto out; - err = write_merkle_tree_block(inode, + err = write_merkle_tree_block(filp, buffers[level].data, level_offset[level], params); @@ -155,7 +156,7 @@ static int build_merkle_tree(struct file *filp, err = hash_one_block(params, &buffers[level]); if (err) goto out; - err = write_merkle_tree_block(inode, + err = write_merkle_tree_block(filp, buffers[level].data, level_offset[level], params); diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index ea1ed2e6c2f9..ac5c98fd4602 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -114,9 +114,9 @@ struct fsverity_operations { unsigned long num_ra_pages); /** - * Write a Merkle tree block to the given inode. + * Write a Merkle tree block to the given file. * - * @inode: the inode for which the Merkle tree is being built + * @file: the file for which the Merkle tree is being built * @buf: the Merkle tree block to write * @pos: the position of the block in the Merkle tree (in bytes) * @size: the Merkle tree block size (in bytes) @@ -126,7 +126,7 @@ struct fsverity_operations { * * Return: 0 on success, -errno on failure */ - int (*write_merkle_tree_block)(struct inode *inode, const void *buf, + int (*write_merkle_tree_block)(struct file *file, const void *buf, u64 pos, unsigned int size); }; From 821ddd25fbe88ea60e9c35cfb76c2ddeb1ffae26 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 Jan 2026 16:26:18 +0100 Subject: [PATCH 06/18] fsverity: start consolidating pagecache code ext4 and f2fs are largely using the same code to read a page full of Merkle tree blocks from the page cache, and the upcoming xfs fsverity support would add another copy. Move the ext4 code to fs/verity/ and use it in f2fs as well. For f2fs this removes the previous f2fs-specific error injection, but otherwise the behavior remains unchanged. Signed-off-by: Christoph Hellwig Reviewed-by: Andrey Albershteyn Reviewed-by: Jan Kara Reviewed-by: "Darrick J. Wong" Link: https://lore.kernel.org/r/20260128152630.627409-7-hch@lst.de Signed-off-by: Eric Biggers --- fs/ext4/verity.c | 17 +---------------- fs/f2fs/verity.c | 17 +---------------- fs/verity/Makefile | 1 + fs/verity/pagecache.c | 38 ++++++++++++++++++++++++++++++++++++++ include/linux/fsverity.h | 3 +++ 5 files changed, 44 insertions(+), 32 deletions(-) create mode 100644 fs/verity/pagecache.c diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c index 2ce4cf8a1e31..a071860ad36a 100644 --- a/fs/ext4/verity.c +++ b/fs/ext4/verity.c @@ -361,23 +361,8 @@ static struct page *ext4_read_merkle_tree_page(struct inode *inode, pgoff_t index, unsigned long num_ra_pages) { - struct folio *folio; - index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT; - - folio = __filemap_get_folio(inode->i_mapping, index, FGP_ACCESSED, 0); - if (IS_ERR(folio) || !folio_test_uptodate(folio)) { - DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index); - - if (!IS_ERR(folio)) - folio_put(folio); - else if (num_ra_pages > 1) - page_cache_ra_unbounded(&ractl, num_ra_pages, 0); - folio = read_mapping_folio(inode->i_mapping, index, NULL); - if (IS_ERR(folio)) - return ERR_CAST(folio); - } - return folio_file_page(folio, index); + return generic_read_merkle_tree_page(inode, index, num_ra_pages); } static int ext4_write_merkle_tree_block(struct file *file, const void *buf, diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c index c1c4d8044681..d37e584423af 100644 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@ -259,23 +259,8 @@ static struct page *f2fs_read_merkle_tree_page(struct inode *inode, pgoff_t index, unsigned long num_ra_pages) { - struct folio *folio; - index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT; - - folio = f2fs_filemap_get_folio(inode->i_mapping, index, FGP_ACCESSED, 0); - if (IS_ERR(folio) || !folio_test_uptodate(folio)) { - DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index); - - if (!IS_ERR(folio)) - folio_put(folio); - else if (num_ra_pages > 1) - page_cache_ra_unbounded(&ractl, num_ra_pages, 0); - folio = read_mapping_folio(inode->i_mapping, index, NULL); - if (IS_ERR(folio)) - return ERR_CAST(folio); - } - return folio_file_page(folio, index); + return generic_read_merkle_tree_page(inode, index, num_ra_pages); } static int f2fs_write_merkle_tree_block(struct file *file, const void *buf, diff --git a/fs/verity/Makefile b/fs/verity/Makefile index 435559a4fa9e..ddb4a88a0d60 100644 --- a/fs/verity/Makefile +++ b/fs/verity/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_FS_VERITY) += enable.o \ init.o \ measure.o \ open.o \ + pagecache.o \ read_metadata.o \ verify.o diff --git a/fs/verity/pagecache.c b/fs/verity/pagecache.c new file mode 100644 index 000000000000..01c652bc802f --- /dev/null +++ b/fs/verity/pagecache.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2019 Google LLC + */ + +#include +#include + +/** + * generic_read_merkle_tree_page - generic ->read_merkle_tree_page helper + * @inode: inode containing the Merkle tree + * @index: 0-based index of the Merkle tree page in the inode + * @num_ra_pages: The number of Merkle tree pages that should be prefetched. + * + * The caller needs to adjust @index from the Merkle-tree relative index passed + * to ->read_merkle_tree_page to the actual index where the Merkle tree is + * stored in the page cache for @inode. + */ +struct page *generic_read_merkle_tree_page(struct inode *inode, pgoff_t index, + unsigned long num_ra_pages) +{ + struct folio *folio; + + folio = __filemap_get_folio(inode->i_mapping, index, FGP_ACCESSED, 0); + if (IS_ERR(folio) || !folio_test_uptodate(folio)) { + DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index); + + if (!IS_ERR(folio)) + folio_put(folio); + else if (num_ra_pages > 1) + page_cache_ra_unbounded(&ractl, num_ra_pages, 0); + folio = read_mapping_folio(inode->i_mapping, index, NULL); + if (IS_ERR(folio)) + return ERR_CAST(folio); + } + return folio_file_page(folio, index); +} +EXPORT_SYMBOL_GPL(generic_read_merkle_tree_page); diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index ac5c98fd4602..8ddaa87fece3 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -309,4 +309,7 @@ static inline int fsverity_file_open(struct inode *inode, struct file *filp) void fsverity_cleanup_inode(struct inode *inode); +struct page *generic_read_merkle_tree_page(struct inode *inode, pgoff_t index, + unsigned long num_ra_pages); + #endif /* _LINUX_FSVERITY_H */ From 23eec9fd64b2889286f31340371d67babfe54155 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 2 Feb 2026 07:06:30 +0100 Subject: [PATCH 07/18] fsverity: don't issue readahead for non-ENOENT errors from __filemap_get_folio Issuing more reads on errors is not a good idea, especially when the most common error here is -ENOMEM. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20260202060754.270269-2-hch@lst.de Signed-off-by: Eric Biggers --- fs/verity/pagecache.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/verity/pagecache.c b/fs/verity/pagecache.c index 01c652bc802f..1a88decace53 100644 --- a/fs/verity/pagecache.c +++ b/fs/verity/pagecache.c @@ -22,7 +22,8 @@ struct page *generic_read_merkle_tree_page(struct inode *inode, pgoff_t index, struct folio *folio; folio = __filemap_get_folio(inode->i_mapping, index, FGP_ACCESSED, 0); - if (IS_ERR(folio) || !folio_test_uptodate(folio)) { + if (folio == ERR_PTR(-ENOENT) || + (!IS_ERR(folio) && !folio_test_uptodate(folio))) { DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index); if (!IS_ERR(folio)) @@ -30,9 +31,9 @@ struct page *generic_read_merkle_tree_page(struct inode *inode, pgoff_t index, else if (num_ra_pages > 1) page_cache_ra_unbounded(&ractl, num_ra_pages, 0); folio = read_mapping_folio(inode->i_mapping, index, NULL); - if (IS_ERR(folio)) - return ERR_CAST(folio); } + if (IS_ERR(folio)) + return ERR_CAST(folio); return folio_file_page(folio, index); } EXPORT_SYMBOL_GPL(generic_read_merkle_tree_page); From b244c89a70b306e5a91852e00fd8cdd289714505 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 2 Feb 2026 07:06:31 +0100 Subject: [PATCH 08/18] readahead: push invalidate_lock out of page_cache_ra_unbounded Require the invalidate_lock to be held over calls to page_cache_ra_unbounded instead of acquiring it in this function. This prepares for calling page_cache_ra_unbounded from ->readahead for fsverity read-ahead. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20260202060754.270269-3-hch@lst.de Signed-off-by: Eric Biggers --- fs/f2fs/file.c | 2 ++ fs/verity/pagecache.c | 7 +++++-- mm/readahead.c | 15 +++++++++------ 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index da029fed4e5a..c9b9fcdd0cae 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4418,7 +4418,9 @@ static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len) pgoff_t redirty_idx = page_idx; int page_len = 0, ret = 0; + filemap_invalidate_lock_shared(mapping); page_cache_ra_unbounded(&ractl, len, 0); + filemap_invalidate_unlock_shared(mapping); do { folio = read_cache_folio(mapping, page_idx, NULL, NULL); diff --git a/fs/verity/pagecache.c b/fs/verity/pagecache.c index 1a88decace53..8e0d6fde802f 100644 --- a/fs/verity/pagecache.c +++ b/fs/verity/pagecache.c @@ -26,10 +26,13 @@ struct page *generic_read_merkle_tree_page(struct inode *inode, pgoff_t index, (!IS_ERR(folio) && !folio_test_uptodate(folio))) { DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index); - if (!IS_ERR(folio)) + if (!IS_ERR(folio)) { folio_put(folio); - else if (num_ra_pages > 1) + } else if (num_ra_pages > 1) { + filemap_invalidate_lock_shared(inode->i_mapping); page_cache_ra_unbounded(&ractl, num_ra_pages, 0); + filemap_invalidate_unlock_shared(inode->i_mapping); + } folio = read_mapping_folio(inode->i_mapping, index, NULL); } if (IS_ERR(folio)) diff --git a/mm/readahead.c b/mm/readahead.c index b415c9969176..f43d03558e62 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -204,8 +204,9 @@ static struct folio *ractl_alloc_folio(struct readahead_control *ractl, * not the function you want to call. Use page_cache_async_readahead() * or page_cache_sync_readahead() instead. * - * Context: File is referenced by caller. Mutexes may be held by caller. - * May sleep, but will not reenter filesystem to reclaim memory. + * Context: File is referenced by caller, and ractl->mapping->invalidate_lock + * must be held by the caller at least in shared mode. Mutexes may be held by + * caller. May sleep, but will not reenter filesystem to reclaim memory. */ void page_cache_ra_unbounded(struct readahead_control *ractl, unsigned long nr_to_read, unsigned long lookahead_size) @@ -228,9 +229,10 @@ void page_cache_ra_unbounded(struct readahead_control *ractl, */ unsigned int nofs = memalloc_nofs_save(); + lockdep_assert_held(&mapping->invalidate_lock); + trace_page_cache_ra_unbounded(mapping->host, index, nr_to_read, lookahead_size); - filemap_invalidate_lock_shared(mapping); index = mapping_align_index(mapping, index); /* @@ -300,7 +302,6 @@ void page_cache_ra_unbounded(struct readahead_control *ractl, * will then handle the error. */ read_pages(ractl); - filemap_invalidate_unlock_shared(mapping); memalloc_nofs_restore(nofs); } EXPORT_SYMBOL_GPL(page_cache_ra_unbounded); @@ -314,9 +315,9 @@ EXPORT_SYMBOL_GPL(page_cache_ra_unbounded); static void do_page_cache_ra(struct readahead_control *ractl, unsigned long nr_to_read, unsigned long lookahead_size) { - struct inode *inode = ractl->mapping->host; + struct address_space *mapping = ractl->mapping; unsigned long index = readahead_index(ractl); - loff_t isize = i_size_read(inode); + loff_t isize = i_size_read(mapping->host); pgoff_t end_index; /* The last page we want to read */ if (isize == 0) @@ -329,7 +330,9 @@ static void do_page_cache_ra(struct readahead_control *ractl, if (nr_to_read > end_index - index) nr_to_read = end_index - index + 1; + filemap_invalidate_lock_shared(mapping); page_cache_ra_unbounded(ractl, nr_to_read, lookahead_size); + filemap_invalidate_unlock_shared(mapping); } /* From 314b652b7e7ad335fa20b693c8878a4850dae098 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 2 Feb 2026 07:06:32 +0100 Subject: [PATCH 09/18] ext4: move ->read_folio and ->readahead to readpage.c Keep all the read into pagecache code in a single file. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Acked-by: Theodore Ts'o Link: https://lore.kernel.org/r/20260202060754.270269-4-hch@lst.de Signed-off-by: Eric Biggers --- fs/ext4/ext4.h | 4 ++-- fs/ext4/inode.c | 27 --------------------------- fs/ext4/readpage.c | 30 +++++++++++++++++++++++++++++- 3 files changed, 31 insertions(+), 30 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 56112f201cac..a8a448e20ef8 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3735,8 +3735,8 @@ static inline void ext4_set_de_type(struct super_block *sb, } /* readpages.c */ -extern int ext4_mpage_readpages(struct inode *inode, - struct readahead_control *rac, struct folio *folio); +int ext4_read_folio(struct file *file, struct folio *folio); +void ext4_readahead(struct readahead_control *rac); extern int __init ext4_init_post_read_processing(void); extern void ext4_exit_post_read_processing(void); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 8c2ef98fa530..e98954e7d0b3 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3380,33 +3380,6 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) return ret; } -static int ext4_read_folio(struct file *file, struct folio *folio) -{ - int ret = -EAGAIN; - struct inode *inode = folio->mapping->host; - - trace_ext4_read_folio(inode, folio); - - if (ext4_has_inline_data(inode)) - ret = ext4_readpage_inline(inode, folio); - - if (ret == -EAGAIN) - return ext4_mpage_readpages(inode, NULL, folio); - - return ret; -} - -static void ext4_readahead(struct readahead_control *rac) -{ - struct inode *inode = rac->mapping->host; - - /* If the file has inline data, no need to do readahead. */ - if (ext4_has_inline_data(inode)) - return; - - ext4_mpage_readpages(inode, rac, NULL); -} - static void ext4_invalidate_folio(struct folio *folio, size_t offset, size_t length) { diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index 267594ef0b2c..5a7774f089e8 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -45,6 +45,7 @@ #include #include "ext4.h" +#include #define NUM_PREALLOC_POST_READ_CTXS 128 @@ -209,7 +210,7 @@ static inline loff_t ext4_readpage_limit(struct inode *inode) return i_size_read(inode); } -int ext4_mpage_readpages(struct inode *inode, +static int ext4_mpage_readpages(struct inode *inode, struct readahead_control *rac, struct folio *folio) { struct bio *bio = NULL; @@ -394,6 +395,33 @@ int ext4_mpage_readpages(struct inode *inode, return 0; } +int ext4_read_folio(struct file *file, struct folio *folio) +{ + int ret = -EAGAIN; + struct inode *inode = folio->mapping->host; + + trace_ext4_read_folio(inode, folio); + + if (ext4_has_inline_data(inode)) + ret = ext4_readpage_inline(inode, folio); + + if (ret == -EAGAIN) + return ext4_mpage_readpages(inode, NULL, folio); + + return ret; +} + +void ext4_readahead(struct readahead_control *rac) +{ + struct inode *inode = rac->mapping->host; + + /* If the file has inline data, no need to do readahead. */ + if (ext4_has_inline_data(inode)) + return; + + ext4_mpage_readpages(inode, rac, NULL); +} + int __init ext4_init_post_read_processing(void) { bio_post_read_ctx_cache = KMEM_CACHE(bio_post_read_ctx, SLAB_RECLAIM_ACCOUNT); From f1a6cf44b344b1ac2cefb387779e3002be237a7e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 2 Feb 2026 07:06:33 +0100 Subject: [PATCH 10/18] fsverity: kick off hash readahead at data I/O submission time Currently all reads of the fsverity hashes are kicked off from the data I/O completion handler, leading to needlessly dependent I/O. This is worked around a bit by performing readahead on the level 0 nodes, but still fairly ineffective. Switch to a model where the ->read_folio and ->readahead methods instead kick off explicit readahead of the fsverity hashed so they are usually available at I/O completion time. For 64k sequential reads on my test VM this improves read performance from 2.4GB/s - 2.6GB/s to 3.5GB/s - 3.9GB/s. The improvements for random reads are likely to be even bigger. Signed-off-by: Christoph Hellwig Acked-by: David Sterba # btrfs Link: https://lore.kernel.org/r/20260202060754.270269-5-hch@lst.de Signed-off-by: Eric Biggers --- fs/btrfs/verity.c | 4 +-- fs/ext4/readpage.c | 17 +++++---- fs/ext4/verity.c | 13 +++++-- fs/f2fs/data.c | 17 ++++++--- fs/f2fs/verity.c | 13 +++++-- fs/verity/pagecache.c | 46 ++++++++++++++++-------- fs/verity/read_metadata.c | 19 +++++++--- fs/verity/verify.c | 76 +++++++++++++++++++++++++-------------- include/linux/fsverity.h | 30 +++++++++++----- 9 files changed, 161 insertions(+), 74 deletions(-) diff --git a/fs/btrfs/verity.c b/fs/btrfs/verity.c index e7643c22a6bf..c152bef71e8b 100644 --- a/fs/btrfs/verity.c +++ b/fs/btrfs/verity.c @@ -697,7 +697,6 @@ int btrfs_get_verity_descriptor(struct inode *inode, void *buf, size_t buf_size) * * @inode: inode to read a merkle tree page for * @index: page index relative to the start of the merkle tree - * @num_ra_pages: number of pages to readahead. Optional, we ignore it * * The Merkle tree is stored in the filesystem btree, but its pages are cached * with a logical position past EOF in the inode's mapping. @@ -705,8 +704,7 @@ int btrfs_get_verity_descriptor(struct inode *inode, void *buf, size_t buf_size) * Returns the page we read, or an ERR_PTR on error. */ static struct page *btrfs_read_merkle_tree_page(struct inode *inode, - pgoff_t index, - unsigned long num_ra_pages) + pgoff_t index) { struct folio *folio; u64 off = (u64)index << PAGE_SHIFT; diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index 5a7774f089e8..3049a66a05c8 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -397,18 +397,20 @@ static int ext4_mpage_readpages(struct inode *inode, int ext4_read_folio(struct file *file, struct folio *folio) { - int ret = -EAGAIN; struct inode *inode = folio->mapping->host; + int ret; trace_ext4_read_folio(inode, folio); - if (ext4_has_inline_data(inode)) + if (ext4_has_inline_data(inode)) { ret = ext4_readpage_inline(inode, folio); + if (ret != -EAGAIN) + return ret; + } - if (ret == -EAGAIN) - return ext4_mpage_readpages(inode, NULL, folio); - - return ret; + if (ext4_need_verity(inode, folio->index)) + fsverity_readahead(inode, folio->index, folio_nr_pages(folio)); + return ext4_mpage_readpages(inode, NULL, folio); } void ext4_readahead(struct readahead_control *rac) @@ -419,6 +421,9 @@ void ext4_readahead(struct readahead_control *rac) if (ext4_has_inline_data(inode)) return; + if (ext4_need_verity(inode, readahead_index(rac))) + fsverity_readahead(inode, readahead_index(rac), + readahead_count(rac)); ext4_mpage_readpages(inode, rac, NULL); } diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c index a071860ad36a..552cc5d81d94 100644 --- a/fs/ext4/verity.c +++ b/fs/ext4/verity.c @@ -358,11 +358,17 @@ static int ext4_get_verity_descriptor(struct inode *inode, void *buf, } static struct page *ext4_read_merkle_tree_page(struct inode *inode, - pgoff_t index, - unsigned long num_ra_pages) + pgoff_t index) { index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT; - return generic_read_merkle_tree_page(inode, index, num_ra_pages); + return generic_read_merkle_tree_page(inode, index); +} + +static void ext4_readahead_merkle_tree(struct inode *inode, pgoff_t index, + unsigned long nr_pages) +{ + index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT; + generic_readahead_merkle_tree(inode, index, nr_pages); } static int ext4_write_merkle_tree_block(struct file *file, const void *buf, @@ -380,5 +386,6 @@ const struct fsverity_operations ext4_verityops = { .end_enable_verity = ext4_end_enable_verity, .get_verity_descriptor = ext4_get_verity_descriptor, .read_merkle_tree_page = ext4_read_merkle_tree_page, + .readahead_merkle_tree = ext4_readahead_merkle_tree, .write_merkle_tree_block = ext4_write_merkle_tree_block, }; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c30e69392a62..58d8a311ef2c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2458,7 +2458,7 @@ static int f2fs_mpage_readpages(struct inode *inode, static int f2fs_read_data_folio(struct file *file, struct folio *folio) { struct inode *inode = folio->mapping->host; - int ret = -EAGAIN; + int ret; trace_f2fs_readpage(folio, DATA); @@ -2468,11 +2468,15 @@ static int f2fs_read_data_folio(struct file *file, struct folio *folio) } /* If the file has inline data, try to read it directly */ - if (f2fs_has_inline_data(inode)) + if (f2fs_has_inline_data(inode)) { ret = f2fs_read_inline_data(inode, folio); - if (ret == -EAGAIN) - ret = f2fs_mpage_readpages(inode, NULL, folio); - return ret; + if (ret != -EAGAIN) + return ret; + } + + if (f2fs_need_verity(inode, folio->index)) + fsverity_readahead(inode, folio->index, folio_nr_pages(folio)); + return f2fs_mpage_readpages(inode, NULL, folio); } static void f2fs_readahead(struct readahead_control *rac) @@ -2488,6 +2492,9 @@ static void f2fs_readahead(struct readahead_control *rac) if (f2fs_has_inline_data(inode)) return; + if (f2fs_need_verity(inode, readahead_index(rac))) + fsverity_readahead(inode, readahead_index(rac), + readahead_count(rac)); f2fs_mpage_readpages(inode, rac, NULL); } diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c index d37e584423af..de2c87621319 100644 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@ -256,11 +256,17 @@ static int f2fs_get_verity_descriptor(struct inode *inode, void *buf, } static struct page *f2fs_read_merkle_tree_page(struct inode *inode, - pgoff_t index, - unsigned long num_ra_pages) + pgoff_t index) { index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT; - return generic_read_merkle_tree_page(inode, index, num_ra_pages); + return generic_read_merkle_tree_page(inode, index); +} + +static void f2fs_readahead_merkle_tree(struct inode *inode, pgoff_t index, + unsigned long nr_pages) +{ + index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT; + generic_readahead_merkle_tree(inode, index, nr_pages); } static int f2fs_write_merkle_tree_block(struct file *file, const void *buf, @@ -278,5 +284,6 @@ const struct fsverity_operations f2fs_verityops = { .end_enable_verity = f2fs_end_enable_verity, .get_verity_descriptor = f2fs_get_verity_descriptor, .read_merkle_tree_page = f2fs_read_merkle_tree_page, + .readahead_merkle_tree = f2fs_readahead_merkle_tree, .write_merkle_tree_block = f2fs_write_merkle_tree_block, }; diff --git a/fs/verity/pagecache.c b/fs/verity/pagecache.c index 8e0d6fde802f..1819314ecaa3 100644 --- a/fs/verity/pagecache.c +++ b/fs/verity/pagecache.c @@ -3,6 +3,7 @@ * Copyright 2019 Google LLC */ +#include #include #include @@ -10,33 +11,48 @@ * generic_read_merkle_tree_page - generic ->read_merkle_tree_page helper * @inode: inode containing the Merkle tree * @index: 0-based index of the Merkle tree page in the inode - * @num_ra_pages: The number of Merkle tree pages that should be prefetched. * * The caller needs to adjust @index from the Merkle-tree relative index passed * to ->read_merkle_tree_page to the actual index where the Merkle tree is * stored in the page cache for @inode. */ -struct page *generic_read_merkle_tree_page(struct inode *inode, pgoff_t index, - unsigned long num_ra_pages) +struct page *generic_read_merkle_tree_page(struct inode *inode, pgoff_t index) { struct folio *folio; + folio = read_mapping_folio(inode->i_mapping, index, NULL); + if (IS_ERR(folio)) + return ERR_CAST(folio); + return folio_file_page(folio, index); +} +EXPORT_SYMBOL_GPL(generic_read_merkle_tree_page); + +/** + * generic_readahead_merkle_tree() - generic ->readahead_merkle_tree helper + * @inode: inode containing the Merkle tree + * @index: 0-based index of the first Merkle tree page to read ahead in the + * inode + * @nr_pages: the number of Merkle tree pages that should be read ahead + * + * The caller needs to adjust @index from the Merkle-tree relative index passed + * to ->read_merkle_tree_page to the actual index where the Merkle tree is + * stored in the page cache for @inode. + */ +void generic_readahead_merkle_tree(struct inode *inode, pgoff_t index, + unsigned long nr_pages) +{ + struct folio *folio; + + lockdep_assert_held(&inode->i_mapping->invalidate_lock); + folio = __filemap_get_folio(inode->i_mapping, index, FGP_ACCESSED, 0); if (folio == ERR_PTR(-ENOENT) || (!IS_ERR(folio) && !folio_test_uptodate(folio))) { DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index); - if (!IS_ERR(folio)) { - folio_put(folio); - } else if (num_ra_pages > 1) { - filemap_invalidate_lock_shared(inode->i_mapping); - page_cache_ra_unbounded(&ractl, num_ra_pages, 0); - filemap_invalidate_unlock_shared(inode->i_mapping); - } - folio = read_mapping_folio(inode->i_mapping, index, NULL); + page_cache_ra_unbounded(&ractl, nr_pages, 0); } - if (IS_ERR(folio)) - return ERR_CAST(folio); - return folio_file_page(folio, index); + if (!IS_ERR(folio)) + folio_put(folio); } -EXPORT_SYMBOL_GPL(generic_read_merkle_tree_page); +EXPORT_SYMBOL_GPL(generic_readahead_merkle_tree); diff --git a/fs/verity/read_metadata.c b/fs/verity/read_metadata.c index cba5d6af4e04..b4c0892430cd 100644 --- a/fs/verity/read_metadata.c +++ b/fs/verity/read_metadata.c @@ -28,24 +28,33 @@ static int fsverity_read_merkle_tree(struct inode *inode, if (offset >= end_offset) return 0; offs_in_page = offset_in_page(offset); + index = offset >> PAGE_SHIFT; last_index = (end_offset - 1) >> PAGE_SHIFT; + /* + * Kick off readahead for the range we are going to read to ensure a + * single large sequential read instead of lots of small ones. + */ + if (inode->i_sb->s_vop->readahead_merkle_tree) { + filemap_invalidate_lock_shared(inode->i_mapping); + inode->i_sb->s_vop->readahead_merkle_tree( + inode, index, last_index - index + 1); + filemap_invalidate_unlock_shared(inode->i_mapping); + } + /* * Iterate through each Merkle tree page in the requested range and copy * the requested portion to userspace. Note that the Merkle tree block * size isn't important here, as we are returning a byte stream; i.e., * we can just work with pages even if the tree block size != PAGE_SIZE. */ - for (index = offset >> PAGE_SHIFT; index <= last_index; index++) { - unsigned long num_ra_pages = - min_t(unsigned long, last_index - index + 1, - inode->i_sb->s_bdi->io_pages); + for (; index <= last_index; index++) { unsigned int bytes_to_copy = min_t(u64, end_offset - offset, PAGE_SIZE - offs_in_page); struct page *page; const void *virt; - page = vops->read_merkle_tree_page(inode, index, num_ra_pages); + page = vops->read_merkle_tree_page(inode, index); if (IS_ERR(page)) { err = PTR_ERR(page); fsverity_err(inode, diff --git a/fs/verity/verify.c b/fs/verity/verify.c index 86067c8b40cf..81e4c6012eb5 100644 --- a/fs/verity/verify.c +++ b/fs/verity/verify.c @@ -21,7 +21,6 @@ struct fsverity_pending_block { struct fsverity_verification_context { struct inode *inode; struct fsverity_info *vi; - unsigned long max_ra_pages; /* * This is the queue of data blocks that are pending verification. When @@ -37,6 +36,50 @@ struct fsverity_verification_context { static struct workqueue_struct *fsverity_read_workqueue; +/** + * fsverity_readahead() - kick off readahead on fsverity hashes + * @inode: inode that is being read + * @index: first file data page index that is being read + * @nr_pages: number of file data pages to be read + * + * Start readahead on the fsverity hashes that are needed to verify the file + * data in the range from @index to @index + @nr_pages (exclusive upper bound). + * + * To be called from the file systems' ->read_folio and ->readahead methods to + * ensure that the hashes are already cached on completion of the file data + * read if possible. + */ +void fsverity_readahead(struct inode *inode, pgoff_t index, + unsigned long nr_pages) +{ + const struct fsverity_info *vi = *fsverity_info_addr(inode); + const struct merkle_tree_params *params = &vi->tree_params; + u64 start_hidx = (u64)index << params->log_blocks_per_page; + u64 end_hidx = + (((u64)index + nr_pages) << params->log_blocks_per_page) - 1; + int level; + + if (!inode->i_sb->s_vop->readahead_merkle_tree) + return; + + for (level = 0; level < params->num_levels; level++) { + unsigned long level_start = params->level_start[level]; + unsigned long next_start_hidx = start_hidx >> params->log_arity; + unsigned long next_end_hidx = end_hidx >> params->log_arity; + pgoff_t start_idx = (level_start + next_start_hidx) >> + params->log_blocks_per_page; + pgoff_t end_idx = (level_start + next_end_hidx) >> + params->log_blocks_per_page; + + inode->i_sb->s_vop->readahead_merkle_tree( + inode, start_idx, end_idx - start_idx + 1); + + start_hidx = next_start_hidx; + end_hidx = next_end_hidx; + } +} +EXPORT_SYMBOL_GPL(fsverity_readahead); + /* * Returns true if the hash block with index @hblock_idx in the tree, located in * @hpage, has already been verified. @@ -114,8 +157,7 @@ static bool is_hash_block_verified(struct fsverity_info *vi, struct page *hpage, * Return: %true if the data block is valid, else %false. */ static bool verify_data_block(struct inode *inode, struct fsverity_info *vi, - const struct fsverity_pending_block *dblock, - unsigned long max_ra_pages) + const struct fsverity_pending_block *dblock) { const u64 data_pos = dblock->pos; const struct merkle_tree_params *params = &vi->tree_params; @@ -200,8 +242,7 @@ static bool verify_data_block(struct inode *inode, struct fsverity_info *vi, (params->block_size - 1); hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode, - hpage_idx, level == 0 ? min(max_ra_pages, - params->tree_pages - hpage_idx) : 0); + hpage_idx); if (IS_ERR(hpage)) { fsverity_err(inode, "Error %ld reading Merkle tree page %lu", @@ -272,14 +313,12 @@ static bool verify_data_block(struct inode *inode, struct fsverity_info *vi, static void fsverity_init_verification_context(struct fsverity_verification_context *ctx, - struct inode *inode, - unsigned long max_ra_pages) + struct inode *inode) { struct fsverity_info *vi = *fsverity_info_addr(inode); ctx->inode = inode; ctx->vi = vi; - ctx->max_ra_pages = max_ra_pages; ctx->num_pending = 0; if (vi->tree_params.hash_alg->algo_id == HASH_ALGO_SHA256 && sha256_finup_2x_is_optimized()) @@ -322,8 +361,7 @@ fsverity_verify_pending_blocks(struct fsverity_verification_context *ctx) } for (i = 0; i < ctx->num_pending; i++) { - if (!verify_data_block(ctx->inode, vi, &ctx->pending_blocks[i], - ctx->max_ra_pages)) + if (!verify_data_block(ctx->inode, vi, &ctx->pending_blocks[i])) return false; } fsverity_clear_pending_blocks(ctx); @@ -373,7 +411,7 @@ bool fsverity_verify_blocks(struct folio *folio, size_t len, size_t offset) { struct fsverity_verification_context ctx; - fsverity_init_verification_context(&ctx, folio->mapping->host, 0); + fsverity_init_verification_context(&ctx, folio->mapping->host); if (fsverity_add_data_blocks(&ctx, folio, len, offset) && fsverity_verify_pending_blocks(&ctx)) @@ -403,22 +441,8 @@ void fsverity_verify_bio(struct bio *bio) struct inode *inode = bio_first_folio_all(bio)->mapping->host; struct fsverity_verification_context ctx; struct folio_iter fi; - unsigned long max_ra_pages = 0; - if (bio->bi_opf & REQ_RAHEAD) { - /* - * If this bio is for data readahead, then we also do readahead - * of the first (largest) level of the Merkle tree. Namely, - * when a Merkle tree page is read, we also try to piggy-back on - * some additional pages -- up to 1/4 the number of data pages. - * - * This improves sequential read performance, as it greatly - * reduces the number of I/O requests made to the Merkle tree. - */ - max_ra_pages = bio->bi_iter.bi_size >> (PAGE_SHIFT + 2); - } - - fsverity_init_verification_context(&ctx, inode, max_ra_pages); + fsverity_init_verification_context(&ctx, inode); bio_for_each_folio_all(fi, bio) { if (!fsverity_add_data_blocks(&ctx, fi.folio, fi.length, diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index 8ddaa87fece3..722a42754a86 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -97,10 +97,6 @@ struct fsverity_operations { * * @inode: the inode * @index: 0-based index of the page within the Merkle tree - * @num_ra_pages: The number of Merkle tree pages that should be - * prefetched starting at @index if the page at @index - * isn't already cached. Implementations may ignore this - * argument; it's only a performance optimization. * * This can be called at any time on an open verity file. It may be * called by multiple processes concurrently, even with the same page. @@ -110,8 +106,23 @@ struct fsverity_operations { * Return: the page on success, ERR_PTR() on failure */ struct page *(*read_merkle_tree_page)(struct inode *inode, - pgoff_t index, - unsigned long num_ra_pages); + pgoff_t index); + + /** + * Perform readahead of a Merkle tree for the given inode. + * + * @inode: the inode + * @index: 0-based index of the first page within the Merkle tree + * @nr_pages: number of pages to be read ahead. + * + * This can be called at any time on an open verity file. It may be + * called by multiple processes concurrently, even with the same range. + * + * Optional method so that ->read_merkle_tree_page preferably finds + * cached data instead of issuing dependent I/O. + */ + void (*readahead_merkle_tree)(struct inode *inode, pgoff_t index, + unsigned long nr_pages); /** * Write a Merkle tree block to the given file. @@ -308,8 +319,11 @@ static inline int fsverity_file_open(struct inode *inode, struct file *filp) } void fsverity_cleanup_inode(struct inode *inode); +void fsverity_readahead(struct inode *inode, pgoff_t index, + unsigned long nr_pages); -struct page *generic_read_merkle_tree_page(struct inode *inode, pgoff_t index, - unsigned long num_ra_pages); +struct page *generic_read_merkle_tree_page(struct inode *inode, pgoff_t index); +void generic_readahead_merkle_tree(struct inode *inode, pgoff_t index, + unsigned long nr_pages); #endif /* _LINUX_FSVERITY_H */ From 7e36e044958da7d2cbff391dcfda405f5472ae1d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 2 Feb 2026 07:06:34 +0100 Subject: [PATCH 11/18] fsverity: deconstify the inode pointer in struct fsverity_info A lot of file system code expects a non-const inode pointer. Dropping the const qualifier here allows using the inode pointer in verify_data_block and prepares for further argument reductions. Signed-off-by: Christoph Hellwig Reviewed-by: "Darrick J. Wong" Link: https://lore.kernel.org/r/20260202060754.270269-6-hch@lst.de Signed-off-by: Eric Biggers --- fs/verity/fsverity_private.h | 4 ++-- fs/verity/open.c | 2 +- fs/verity/verify.c | 5 +++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h index dd20b138d452..f9f3936b0a89 100644 --- a/fs/verity/fsverity_private.h +++ b/fs/verity/fsverity_private.h @@ -73,7 +73,7 @@ struct fsverity_info { struct merkle_tree_params tree_params; u8 root_hash[FS_VERITY_MAX_DIGEST_SIZE]; u8 file_digest[FS_VERITY_MAX_DIGEST_SIZE]; - const struct inode *inode; + struct inode *inode; unsigned long *hash_block_verified; }; @@ -124,7 +124,7 @@ int fsverity_init_merkle_tree_params(struct merkle_tree_params *params, unsigned int log_blocksize, const u8 *salt, size_t salt_size); -struct fsverity_info *fsverity_create_info(const struct inode *inode, +struct fsverity_info *fsverity_create_info(struct inode *inode, struct fsverity_descriptor *desc); void fsverity_set_info(struct inode *inode, struct fsverity_info *vi); diff --git a/fs/verity/open.c b/fs/verity/open.c index 090cb77326ee..128502cf0a23 100644 --- a/fs/verity/open.c +++ b/fs/verity/open.c @@ -175,7 +175,7 @@ static void compute_file_digest(const struct fsverity_hash_alg *hash_alg, * appended builtin signature), and check the signature if present. The * fsverity_descriptor must have already undergone basic validation. */ -struct fsverity_info *fsverity_create_info(const struct inode *inode, +struct fsverity_info *fsverity_create_info(struct inode *inode, struct fsverity_descriptor *desc) { struct fsverity_info *vi; diff --git a/fs/verity/verify.c b/fs/verity/verify.c index 81e4c6012eb5..31045d83ace4 100644 --- a/fs/verity/verify.c +++ b/fs/verity/verify.c @@ -156,9 +156,10 @@ static bool is_hash_block_verified(struct fsverity_info *vi, struct page *hpage, * * Return: %true if the data block is valid, else %false. */ -static bool verify_data_block(struct inode *inode, struct fsverity_info *vi, +static bool verify_data_block(struct fsverity_info *vi, const struct fsverity_pending_block *dblock) { + struct inode *inode = vi->inode; const u64 data_pos = dblock->pos; const struct merkle_tree_params *params = &vi->tree_params; const unsigned int hsize = params->digest_size; @@ -361,7 +362,7 @@ fsverity_verify_pending_blocks(struct fsverity_verification_context *ctx) } for (i = 0; i < ctx->num_pending; i++) { - if (!verify_data_block(ctx->inode, vi, &ctx->pending_blocks[i])) + if (!verify_data_block(vi, &ctx->pending_blocks[i])) return false; } fsverity_clear_pending_blocks(ctx); From 47bc2ac9b63da87362ce890543d8d955e0ccc36a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 2 Feb 2026 07:06:35 +0100 Subject: [PATCH 12/18] fsverity: push out fsverity_info lookup Pass a struct fsverity_info to the verification and readahead helpers, and push the lookup into the callers. Right now this is a very dumb almost mechanic move that open codes a lot of fsverity_info_addr() calls in the file systems. The subsequent patches will clean this up. This prepares for reducing the number of fsverity_info lookups, which will allow to amortize them better when using a more expensive lookup method. Signed-off-by: Christoph Hellwig Reviewed-by: "Darrick J. Wong" Acked-by: David Sterba # btrfs Link: https://lore.kernel.org/r/20260202060754.270269-7-hch@lst.de Signed-off-by: Eric Biggers --- fs/btrfs/extent_io.c | 3 ++- fs/buffer.c | 4 +++- fs/ext4/readpage.c | 14 +++++++++----- fs/f2fs/compress.c | 4 +++- fs/f2fs/data.c | 19 +++++++++++++------ fs/verity/verify.c | 24 ++++++++++++------------ include/linux/fsverity.h | 30 +++++++++++++++++++++--------- 7 files changed, 63 insertions(+), 35 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a4b74023618d..21430b7d8f27 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -484,7 +484,8 @@ static bool btrfs_verify_folio(struct folio *folio, u64 start, u32 len) btrfs_folio_test_uptodate(fs_info, folio, start, len) || start >= i_size_read(folio->mapping->host)) return true; - return fsverity_verify_folio(folio); + return fsverity_verify_folio(*fsverity_info_addr(folio->mapping->host), + folio); } static void end_folio_read(struct folio *folio, bool uptodate, u64 start, u32 len) diff --git a/fs/buffer.c b/fs/buffer.c index 838c0c571022..3982253b6805 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -309,9 +309,11 @@ static void verify_bh(struct work_struct *work) struct postprocess_bh_ctx *ctx = container_of(work, struct postprocess_bh_ctx, work); struct buffer_head *bh = ctx->bh; + struct inode *inode = bh->b_folio->mapping->host; bool valid; - valid = fsverity_verify_blocks(bh->b_folio, bh->b_size, bh_offset(bh)); + valid = fsverity_verify_blocks(*fsverity_info_addr(inode), bh->b_folio, + bh->b_size, bh_offset(bh)); end_buffer_async_read(bh, valid); kfree(ctx); } diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index 3049a66a05c8..b0a3c9236368 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -97,6 +97,7 @@ static void verity_work(struct work_struct *work) struct bio_post_read_ctx *ctx = container_of(work, struct bio_post_read_ctx, work); struct bio *bio = ctx->bio; + struct inode *inode = bio_first_folio_all(bio)->mapping->host; /* * fsverity_verify_bio() may call readahead() again, and although verity @@ -109,7 +110,7 @@ static void verity_work(struct work_struct *work) mempool_free(ctx, bio_post_read_ctx_pool); bio->bi_private = NULL; - fsverity_verify_bio(bio); + fsverity_verify_bio(*fsverity_info_addr(inode), bio); __read_end_io(bio); } @@ -331,7 +332,9 @@ static int ext4_mpage_readpages(struct inode *inode, folio_size(folio)); if (first_hole == 0) { if (ext4_need_verity(inode, folio->index) && - !fsverity_verify_folio(folio)) + !fsverity_verify_folio( + *fsverity_info_addr(inode), + folio)) goto set_error_page; folio_end_read(folio, true); continue; @@ -409,7 +412,8 @@ int ext4_read_folio(struct file *file, struct folio *folio) } if (ext4_need_verity(inode, folio->index)) - fsverity_readahead(inode, folio->index, folio_nr_pages(folio)); + fsverity_readahead(*fsverity_info_addr(inode), folio->index, + folio_nr_pages(folio)); return ext4_mpage_readpages(inode, NULL, folio); } @@ -422,8 +426,8 @@ void ext4_readahead(struct readahead_control *rac) return; if (ext4_need_verity(inode, readahead_index(rac))) - fsverity_readahead(inode, readahead_index(rac), - readahead_count(rac)); + fsverity_readahead(*fsverity_info_addr(inode), + readahead_index(rac), readahead_count(rac)); ext4_mpage_readpages(inode, rac, NULL); } diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 40a62f1dee4d..3de4a7e66959 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1814,7 +1814,9 @@ static void f2fs_verify_cluster(struct work_struct *work) if (!rpage) continue; - if (fsverity_verify_page(rpage)) + if (fsverity_verify_page( + *fsverity_info_addr(rpage->mapping->host), + rpage)) SetPageUptodate(rpage); else ClearPageUptodate(rpage); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 58d8a311ef2c..3593208c99db 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -185,15 +185,19 @@ static void f2fs_verify_bio(struct work_struct *work) bio_for_each_folio_all(fi, bio) { struct folio *folio = fi.folio; + struct fsverity_info *vi = + *fsverity_info_addr(folio->mapping->host); if (!f2fs_is_compressed_page(folio) && - !fsverity_verify_page(&folio->page)) { + !fsverity_verify_page(vi, &folio->page)) { bio->bi_status = BLK_STS_IOERR; break; } } } else { - fsverity_verify_bio(bio); + struct inode *inode = bio_first_folio_all(bio)->mapping->host; + + fsverity_verify_bio(*fsverity_info_addr(inode), bio); } f2fs_finish_read_bio(bio, true); @@ -2121,7 +2125,9 @@ static int f2fs_read_single_page(struct inode *inode, struct folio *folio, zero_out: folio_zero_segment(folio, 0, folio_size(folio)); if (f2fs_need_verity(inode, index) && - !fsverity_verify_folio(folio)) { + !fsverity_verify_folio( + *fsverity_info_addr(folio->mapping->host), + folio)) { ret = -EIO; goto out; } @@ -2475,7 +2481,8 @@ static int f2fs_read_data_folio(struct file *file, struct folio *folio) } if (f2fs_need_verity(inode, folio->index)) - fsverity_readahead(inode, folio->index, folio_nr_pages(folio)); + fsverity_readahead(*fsverity_info_addr(inode), folio->index, + folio_nr_pages(folio)); return f2fs_mpage_readpages(inode, NULL, folio); } @@ -2493,8 +2500,8 @@ static void f2fs_readahead(struct readahead_control *rac) return; if (f2fs_need_verity(inode, readahead_index(rac))) - fsverity_readahead(inode, readahead_index(rac), - readahead_count(rac)); + fsverity_readahead(*fsverity_info_addr(inode), + readahead_index(rac), readahead_count(rac)); f2fs_mpage_readpages(inode, rac, NULL); } diff --git a/fs/verity/verify.c b/fs/verity/verify.c index 31045d83ace4..37e000f01c18 100644 --- a/fs/verity/verify.c +++ b/fs/verity/verify.c @@ -38,7 +38,7 @@ static struct workqueue_struct *fsverity_read_workqueue; /** * fsverity_readahead() - kick off readahead on fsverity hashes - * @inode: inode that is being read + * @vi: fsverity_info for the inode to be read * @index: first file data page index that is being read * @nr_pages: number of file data pages to be read * @@ -49,10 +49,10 @@ static struct workqueue_struct *fsverity_read_workqueue; * ensure that the hashes are already cached on completion of the file data * read if possible. */ -void fsverity_readahead(struct inode *inode, pgoff_t index, +void fsverity_readahead(struct fsverity_info *vi, pgoff_t index, unsigned long nr_pages) { - const struct fsverity_info *vi = *fsverity_info_addr(inode); + struct inode *inode = vi->inode; const struct merkle_tree_params *params = &vi->tree_params; u64 start_hidx = (u64)index << params->log_blocks_per_page; u64 end_hidx = @@ -314,11 +314,9 @@ static bool verify_data_block(struct fsverity_info *vi, static void fsverity_init_verification_context(struct fsverity_verification_context *ctx, - struct inode *inode) + struct fsverity_info *vi) { - struct fsverity_info *vi = *fsverity_info_addr(inode); - - ctx->inode = inode; + ctx->inode = vi->inode; ctx->vi = vi; ctx->num_pending = 0; if (vi->tree_params.hash_alg->algo_id == HASH_ALGO_SHA256 && @@ -398,6 +396,7 @@ static bool fsverity_add_data_blocks(struct fsverity_verification_context *ctx, /** * fsverity_verify_blocks() - verify data in a folio + * @vi: fsverity_info for the inode to be read * @folio: the folio containing the data to verify * @len: the length of the data to verify in the folio * @offset: the offset of the data to verify in the folio @@ -408,11 +407,12 @@ static bool fsverity_add_data_blocks(struct fsverity_verification_context *ctx, * * Return: %true if the data is valid, else %false. */ -bool fsverity_verify_blocks(struct folio *folio, size_t len, size_t offset) +bool fsverity_verify_blocks(struct fsverity_info *vi, struct folio *folio, + size_t len, size_t offset) { struct fsverity_verification_context ctx; - fsverity_init_verification_context(&ctx, folio->mapping->host); + fsverity_init_verification_context(&ctx, vi); if (fsverity_add_data_blocks(&ctx, folio, len, offset) && fsverity_verify_pending_blocks(&ctx)) @@ -425,6 +425,7 @@ EXPORT_SYMBOL_GPL(fsverity_verify_blocks); #ifdef CONFIG_BLOCK /** * fsverity_verify_bio() - verify a 'read' bio that has just completed + * @vi: fsverity_info for the inode to be read * @bio: the bio to verify * * Verify the bio's data against the file's Merkle tree. All bio data segments @@ -437,13 +438,12 @@ EXPORT_SYMBOL_GPL(fsverity_verify_blocks); * filesystems) must instead call fsverity_verify_page() directly on each page. * All filesystems must also call fsverity_verify_page() on holes. */ -void fsverity_verify_bio(struct bio *bio) +void fsverity_verify_bio(struct fsverity_info *vi, struct bio *bio) { - struct inode *inode = bio_first_folio_all(bio)->mapping->host; struct fsverity_verification_context ctx; struct folio_iter fi; - fsverity_init_verification_context(&ctx, inode); + fsverity_init_verification_context(&ctx, vi); bio_for_each_folio_all(fi, bio) { if (!fsverity_add_data_blocks(&ctx, fi.folio, fi.length, diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index 722a42754a86..076228a9fd12 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -197,12 +197,20 @@ int fsverity_ioctl_read_metadata(struct file *filp, const void __user *uarg); /* verify.c */ -bool fsverity_verify_blocks(struct folio *folio, size_t len, size_t offset); -void fsverity_verify_bio(struct bio *bio); +bool fsverity_verify_blocks(struct fsverity_info *vi, struct folio *folio, + size_t len, size_t offset); +void fsverity_verify_bio(struct fsverity_info *vi, struct bio *bio); void fsverity_enqueue_verify_work(struct work_struct *work); #else /* !CONFIG_FS_VERITY */ +/* + * Provide a stub to allow code using this to compile. All callsites should be + * guarded by compiler dead code elimination, and this forces a link error if + * not. + */ +struct fsverity_info **fsverity_info_addr(const struct inode *inode); + static inline struct fsverity_info *fsverity_get_info(const struct inode *inode) { return NULL; @@ -251,14 +259,16 @@ static inline int fsverity_ioctl_read_metadata(struct file *filp, /* verify.c */ -static inline bool fsverity_verify_blocks(struct folio *folio, size_t len, +static inline bool fsverity_verify_blocks(struct fsverity_info *vi, + struct folio *folio, size_t len, size_t offset) { WARN_ON_ONCE(1); return false; } -static inline void fsverity_verify_bio(struct bio *bio) +static inline void fsverity_verify_bio(struct fsverity_info *vi, + struct bio *bio) { WARN_ON_ONCE(1); } @@ -270,14 +280,16 @@ static inline void fsverity_enqueue_verify_work(struct work_struct *work) #endif /* !CONFIG_FS_VERITY */ -static inline bool fsverity_verify_folio(struct folio *folio) +static inline bool fsverity_verify_folio(struct fsverity_info *vi, + struct folio *folio) { - return fsverity_verify_blocks(folio, folio_size(folio), 0); + return fsverity_verify_blocks(vi, folio, folio_size(folio), 0); } -static inline bool fsverity_verify_page(struct page *page) +static inline bool fsverity_verify_page(struct fsverity_info *vi, + struct page *page) { - return fsverity_verify_blocks(page_folio(page), PAGE_SIZE, 0); + return fsverity_verify_blocks(vi, page_folio(page), PAGE_SIZE, 0); } /** @@ -319,7 +331,7 @@ static inline int fsverity_file_open(struct inode *inode, struct file *filp) } void fsverity_cleanup_inode(struct inode *inode); -void fsverity_readahead(struct inode *inode, pgoff_t index, +void fsverity_readahead(struct fsverity_info *vi, pgoff_t index, unsigned long nr_pages); struct page *generic_read_merkle_tree_page(struct inode *inode, pgoff_t index); From f6ae956dfb3402bf6d5726080c2999f107bb5dd2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 2 Feb 2026 07:06:36 +0100 Subject: [PATCH 13/18] fs: consolidate fsverity_info lookup in buffer.c Look up the fsverity_info once in end_buffer_async_read_io, and then pass it along to the I/O completion workqueue in struct postprocess_bh_ctx. This amortizes the lookup better once it becomes less efficient. Signed-off-by: Christoph Hellwig Reviewed-by: "Darrick J. Wong" Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20260202060754.270269-8-hch@lst.de Signed-off-by: Eric Biggers --- fs/buffer.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 3982253b6805..f4b3297ef1b1 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -302,6 +302,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) struct postprocess_bh_ctx { struct work_struct work; struct buffer_head *bh; + struct fsverity_info *vi; }; static void verify_bh(struct work_struct *work) @@ -309,25 +310,14 @@ static void verify_bh(struct work_struct *work) struct postprocess_bh_ctx *ctx = container_of(work, struct postprocess_bh_ctx, work); struct buffer_head *bh = ctx->bh; - struct inode *inode = bh->b_folio->mapping->host; bool valid; - valid = fsverity_verify_blocks(*fsverity_info_addr(inode), bh->b_folio, - bh->b_size, bh_offset(bh)); + valid = fsverity_verify_blocks(ctx->vi, bh->b_folio, bh->b_size, + bh_offset(bh)); end_buffer_async_read(bh, valid); kfree(ctx); } -static bool need_fsverity(struct buffer_head *bh) -{ - struct folio *folio = bh->b_folio; - struct inode *inode = folio->mapping->host; - - return fsverity_active(inode) && - /* needed by ext4 */ - folio->index < DIV_ROUND_UP(inode->i_size, PAGE_SIZE); -} - static void decrypt_bh(struct work_struct *work) { struct postprocess_bh_ctx *ctx = @@ -337,7 +327,7 @@ static void decrypt_bh(struct work_struct *work) err = fscrypt_decrypt_pagecache_blocks(bh->b_folio, bh->b_size, bh_offset(bh)); - if (err == 0 && need_fsverity(bh)) { + if (err == 0 && ctx->vi) { /* * We use different work queues for decryption and for verity * because verity may require reading metadata pages that need @@ -359,15 +349,20 @@ static void end_buffer_async_read_io(struct buffer_head *bh, int uptodate) { struct inode *inode = bh->b_folio->mapping->host; bool decrypt = fscrypt_inode_uses_fs_layer_crypto(inode); - bool verify = need_fsverity(bh); + struct fsverity_info *vi = NULL; + + /* needed by ext4 */ + if (bh->b_folio->index < DIV_ROUND_UP(inode->i_size, PAGE_SIZE)) + vi = fsverity_get_info(inode); /* Decrypt (with fscrypt) and/or verify (with fsverity) if needed. */ - if (uptodate && (decrypt || verify)) { + if (uptodate && (decrypt || vi)) { struct postprocess_bh_ctx *ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC); if (ctx) { ctx->bh = bh; + ctx->vi = vi; if (decrypt) { INIT_WORK(&ctx->work, decrypt_bh); fscrypt_enqueue_decrypt_work(&ctx->work); From d0b57f0962adcc2a92947a9fd9b86216b5322797 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 2 Feb 2026 07:06:37 +0100 Subject: [PATCH 14/18] ext4: consolidate fsverity_info lookup Look up the fsverity_info once in ext4_mpage_readpages, and then use it for the readahead, local verification of holes and pass it along to the I/O completion workqueue in struct bio_post_read_ctx. This amortizes the lookup better once it becomes less efficient. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: "Darrick J. Wong" Link: https://lore.kernel.org/r/20260202060754.270269-9-hch@lst.de Signed-off-by: Eric Biggers --- fs/ext4/readpage.c | 46 ++++++++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index b0a3c9236368..04fcb7b8f2a7 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -62,6 +62,7 @@ enum bio_post_read_step { struct bio_post_read_ctx { struct bio *bio; + struct fsverity_info *vi; struct work_struct work; unsigned int cur_step; unsigned int enabled_steps; @@ -97,7 +98,7 @@ static void verity_work(struct work_struct *work) struct bio_post_read_ctx *ctx = container_of(work, struct bio_post_read_ctx, work); struct bio *bio = ctx->bio; - struct inode *inode = bio_first_folio_all(bio)->mapping->host; + struct fsverity_info *vi = ctx->vi; /* * fsverity_verify_bio() may call readahead() again, and although verity @@ -110,7 +111,7 @@ static void verity_work(struct work_struct *work) mempool_free(ctx, bio_post_read_ctx_pool); bio->bi_private = NULL; - fsverity_verify_bio(*fsverity_info_addr(inode), bio); + fsverity_verify_bio(vi, bio); __read_end_io(bio); } @@ -174,22 +175,16 @@ static void mpage_end_io(struct bio *bio) __read_end_io(bio); } -static inline bool ext4_need_verity(const struct inode *inode, pgoff_t idx) -{ - return fsverity_active(inode) && - idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE); -} - static void ext4_set_bio_post_read_ctx(struct bio *bio, const struct inode *inode, - pgoff_t first_idx) + struct fsverity_info *vi) { unsigned int post_read_steps = 0; if (fscrypt_inode_uses_fs_layer_crypto(inode)) post_read_steps |= 1 << STEP_DECRYPT; - if (ext4_need_verity(inode, first_idx)) + if (vi) post_read_steps |= 1 << STEP_VERITY; if (post_read_steps) { @@ -198,6 +193,7 @@ static void ext4_set_bio_post_read_ctx(struct bio *bio, mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS); ctx->bio = bio; + ctx->vi = vi; ctx->enabled_steps = post_read_steps; bio->bi_private = ctx; } @@ -211,7 +207,7 @@ static inline loff_t ext4_readpage_limit(struct inode *inode) return i_size_read(inode); } -static int ext4_mpage_readpages(struct inode *inode, +static int ext4_mpage_readpages(struct inode *inode, struct fsverity_info *vi, struct readahead_control *rac, struct folio *folio) { struct bio *bio = NULL; @@ -331,10 +327,7 @@ static int ext4_mpage_readpages(struct inode *inode, folio_zero_segment(folio, first_hole << blkbits, folio_size(folio)); if (first_hole == 0) { - if (ext4_need_verity(inode, folio->index) && - !fsverity_verify_folio( - *fsverity_info_addr(inode), - folio)) + if (vi && !fsverity_verify_folio(vi, folio)) goto set_error_page; folio_end_read(folio, true); continue; @@ -362,7 +355,7 @@ static int ext4_mpage_readpages(struct inode *inode, REQ_OP_READ, GFP_KERNEL); fscrypt_set_bio_crypt_ctx(bio, inode, next_block, GFP_KERNEL); - ext4_set_bio_post_read_ctx(bio, inode, folio->index); + ext4_set_bio_post_read_ctx(bio, inode, vi); bio->bi_iter.bi_sector = first_block << (blkbits - 9); bio->bi_end_io = mpage_end_io; if (rac) @@ -401,6 +394,7 @@ static int ext4_mpage_readpages(struct inode *inode, int ext4_read_folio(struct file *file, struct folio *folio) { struct inode *inode = folio->mapping->host; + struct fsverity_info *vi = NULL; int ret; trace_ext4_read_folio(inode, folio); @@ -411,24 +405,28 @@ int ext4_read_folio(struct file *file, struct folio *folio) return ret; } - if (ext4_need_verity(inode, folio->index)) - fsverity_readahead(*fsverity_info_addr(inode), folio->index, - folio_nr_pages(folio)); - return ext4_mpage_readpages(inode, NULL, folio); + if (folio->index < DIV_ROUND_UP(inode->i_size, PAGE_SIZE)) + vi = fsverity_get_info(inode); + if (vi) + fsverity_readahead(vi, folio->index, folio_nr_pages(folio)); + return ext4_mpage_readpages(inode, vi, NULL, folio); } void ext4_readahead(struct readahead_control *rac) { struct inode *inode = rac->mapping->host; + struct fsverity_info *vi = NULL; /* If the file has inline data, no need to do readahead. */ if (ext4_has_inline_data(inode)) return; - if (ext4_need_verity(inode, readahead_index(rac))) - fsverity_readahead(*fsverity_info_addr(inode), - readahead_index(rac), readahead_count(rac)); - ext4_mpage_readpages(inode, rac, NULL); + if (readahead_index(rac) < DIV_ROUND_UP(inode->i_size, PAGE_SIZE)) + vi = fsverity_get_info(inode); + if (vi) + fsverity_readahead(vi, readahead_index(rac), + readahead_count(rac)); + ext4_mpage_readpages(inode, vi, rac, NULL); } int __init ext4_init_post_read_processing(void) From 45dcb3ac9832650e5fdc2046cc7cdb97cebbb888 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 2 Feb 2026 07:06:38 +0100 Subject: [PATCH 15/18] f2fs: consolidate fsverity_info lookup Look up the fsverity_info once in f2fs_mpage_readpages, and then use it for the readahead, local verification of holes and pass it along to the I/O completion workqueue in struct bio_post_read_ctx. Do the same thing in f2fs_get_read_data_folio for reads that come from garbage collection and other background activities. This amortizes the lookup better once it becomes less efficient. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20260202060754.270269-10-hch@lst.de Signed-off-by: Eric Biggers --- fs/f2fs/compress.c | 9 ++-- fs/f2fs/data.c | 100 +++++++++++++++++++++++++-------------------- fs/f2fs/f2fs.h | 9 +--- 3 files changed, 62 insertions(+), 56 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 3de4a7e66959..ef1225af2acf 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1181,6 +1181,7 @@ int f2fs_prepare_compress_overwrite(struct inode *inode, .cluster_idx = index >> F2FS_I(inode)->i_log_cluster_size, .rpages = NULL, .nr_rpages = 0, + .vi = NULL, /* can't write to fsverity files */ }; return prepare_compress_overwrite(&cc, pagep, index, fsdata); @@ -1716,7 +1717,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) dic->nr_cpages = cc->nr_cpages; refcount_set(&dic->refcnt, 1); dic->failed = false; - dic->need_verity = f2fs_need_verity(cc->inode, start_idx); + dic->vi = cc->vi; for (i = 0; i < dic->cluster_size; i++) dic->rpages[i] = cc->rpages[i]; @@ -1814,9 +1815,7 @@ static void f2fs_verify_cluster(struct work_struct *work) if (!rpage) continue; - if (fsverity_verify_page( - *fsverity_info_addr(rpage->mapping->host), - rpage)) + if (fsverity_verify_page(dic->vi, rpage)) SetPageUptodate(rpage); else ClearPageUptodate(rpage); @@ -1835,7 +1834,7 @@ void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed, { int i; - if (IS_ENABLED(CONFIG_FS_VERITY) && !failed && dic->need_verity) { + if (IS_ENABLED(CONFIG_FS_VERITY) && !failed && dic->vi) { /* * Note that to avoid deadlocks, the verity work can't be done * on the decompression workqueue. This is because verifying diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 3593208c99db..ff061933171b 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -109,6 +109,7 @@ enum bio_post_read_step { struct bio_post_read_ctx { struct bio *bio; struct f2fs_sb_info *sbi; + struct fsverity_info *vi; struct work_struct work; unsigned int enabled_steps; /* @@ -165,6 +166,7 @@ static void f2fs_verify_bio(struct work_struct *work) container_of(work, struct bio_post_read_ctx, work); struct bio *bio = ctx->bio; bool may_have_compressed_pages = (ctx->enabled_steps & STEP_DECOMPRESS); + struct fsverity_info *vi = ctx->vi; /* * fsverity_verify_bio() may call readahead() again, and while verity @@ -185,8 +187,6 @@ static void f2fs_verify_bio(struct work_struct *work) bio_for_each_folio_all(fi, bio) { struct folio *folio = fi.folio; - struct fsverity_info *vi = - *fsverity_info_addr(folio->mapping->host); if (!f2fs_is_compressed_page(folio) && !fsverity_verify_page(vi, &folio->page)) { @@ -195,9 +195,7 @@ static void f2fs_verify_bio(struct work_struct *work) } } } else { - struct inode *inode = bio_first_folio_all(bio)->mapping->host; - - fsverity_verify_bio(*fsverity_info_addr(inode), bio); + fsverity_verify_bio(vi, bio); } f2fs_finish_read_bio(bio, true); @@ -1040,7 +1038,8 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) f2fs_up_write(&io->io_rwsem); } -static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, +static struct bio *f2fs_grab_read_bio(struct inode *inode, + struct fsverity_info *vi, block_t blkaddr, unsigned nr_pages, blk_opf_t op_flag, pgoff_t first_idx, bool for_write) { @@ -1061,7 +1060,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, if (fscrypt_inode_uses_fs_layer_crypto(inode)) post_read_steps |= STEP_DECRYPT; - if (f2fs_need_verity(inode, first_idx)) + if (vi) post_read_steps |= STEP_VERITY; /* @@ -1076,6 +1075,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS); ctx->bio = bio; ctx->sbi = sbi; + ctx->vi = vi; ctx->enabled_steps = post_read_steps; ctx->fs_blkaddr = blkaddr; ctx->decompression_attempted = false; @@ -1087,15 +1087,15 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, } /* This can handle encryption stuffs */ -static void f2fs_submit_page_read(struct inode *inode, struct folio *folio, - block_t blkaddr, blk_opf_t op_flags, - bool for_write) +static void f2fs_submit_page_read(struct inode *inode, struct fsverity_info *vi, + struct folio *folio, block_t blkaddr, + blk_opf_t op_flags, bool for_write) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct bio *bio; - bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags, - folio->index, for_write); + bio = f2fs_grab_read_bio(inode, vi, blkaddr, 1, op_flags, folio->index, + for_write); /* wait for GCed page writeback via META_MAPPING */ f2fs_wait_on_block_writeback(inode, blkaddr); @@ -1197,6 +1197,14 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) return err; } +static inline struct fsverity_info *f2fs_need_verity(const struct inode *inode, + pgoff_t idx) +{ + if (idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE)) + return fsverity_get_info(inode); + return NULL; +} + struct folio *f2fs_get_read_data_folio(struct inode *inode, pgoff_t index, blk_opf_t op_flags, bool for_write, pgoff_t *next_pgofs) { @@ -1262,8 +1270,8 @@ struct folio *f2fs_get_read_data_folio(struct inode *inode, pgoff_t index, return folio; } - f2fs_submit_page_read(inode, folio, dn.data_blkaddr, - op_flags, for_write); + f2fs_submit_page_read(inode, f2fs_need_verity(inode, folio->index), + folio, dn.data_blkaddr, op_flags, for_write); return folio; put_err: @@ -2067,12 +2075,12 @@ static inline blk_opf_t f2fs_ra_op_flags(struct readahead_control *rac) return rac ? REQ_RAHEAD : 0; } -static int f2fs_read_single_page(struct inode *inode, struct folio *folio, - unsigned nr_pages, - struct f2fs_map_blocks *map, - struct bio **bio_ret, - sector_t *last_block_in_bio, - struct readahead_control *rac) +static int f2fs_read_single_page(struct inode *inode, struct fsverity_info *vi, + struct folio *folio, unsigned int nr_pages, + struct f2fs_map_blocks *map, + struct bio **bio_ret, + sector_t *last_block_in_bio, + struct readahead_control *rac) { struct bio *bio = *bio_ret; const unsigned int blocksize = F2FS_BLKSIZE; @@ -2124,10 +2132,7 @@ static int f2fs_read_single_page(struct inode *inode, struct folio *folio, } else { zero_out: folio_zero_segment(folio, 0, folio_size(folio)); - if (f2fs_need_verity(inode, index) && - !fsverity_verify_folio( - *fsverity_info_addr(folio->mapping->host), - folio)) { + if (vi && !fsverity_verify_folio(vi, folio)) { ret = -EIO; goto out; } @@ -2149,9 +2154,8 @@ static int f2fs_read_single_page(struct inode *inode, struct folio *folio, bio = NULL; } if (bio == NULL) - bio = f2fs_grab_read_bio(inode, block_nr, nr_pages, - f2fs_ra_op_flags(rac), index, - false); + bio = f2fs_grab_read_bio(inode, vi, block_nr, nr_pages, + f2fs_ra_op_flags(rac), index, false); /* * If the page is under writeback, we need to wait for @@ -2301,9 +2305,10 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, } if (!bio) - bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages - i, - f2fs_ra_op_flags(rac), - folio->index, for_write); + bio = f2fs_grab_read_bio(inode, cc->vi, blkaddr, + nr_pages - i, + f2fs_ra_op_flags(rac), + folio->index, for_write); if (!bio_add_folio(bio, folio, blocksize, 0)) goto submit_and_realloc; @@ -2342,7 +2347,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, * This function was originally taken from fs/mpage.c, and customized for f2fs. * Major change was from block_size == page_size in f2fs by default. */ -static int f2fs_mpage_readpages(struct inode *inode, +static int f2fs_mpage_readpages(struct inode *inode, struct fsverity_info *vi, struct readahead_control *rac, struct folio *folio) { struct bio *bio = NULL; @@ -2397,6 +2402,7 @@ static int f2fs_mpage_readpages(struct inode *inode, /* there are remained compressed pages, submit them */ if (!f2fs_cluster_can_merge_page(&cc, index)) { + cc.vi = vi; ret = f2fs_read_multi_pages(&cc, &bio, max_nr_pages, &last_block_in_bio, @@ -2430,8 +2436,9 @@ static int f2fs_mpage_readpages(struct inode *inode, read_single_page: #endif - ret = f2fs_read_single_page(inode, folio, max_nr_pages, &map, - &bio, &last_block_in_bio, rac); + ret = f2fs_read_single_page(inode, vi, folio, max_nr_pages, + &map, &bio, &last_block_in_bio, + rac); if (ret) { #ifdef CONFIG_F2FS_FS_COMPRESSION set_error_page: @@ -2447,6 +2454,7 @@ static int f2fs_mpage_readpages(struct inode *inode, if (f2fs_compressed_file(inode)) { /* last page */ if (nr_pages == 1 && !f2fs_cluster_is_empty(&cc)) { + cc.vi = vi; ret = f2fs_read_multi_pages(&cc, &bio, max_nr_pages, &last_block_in_bio, @@ -2464,6 +2472,7 @@ static int f2fs_mpage_readpages(struct inode *inode, static int f2fs_read_data_folio(struct file *file, struct folio *folio) { struct inode *inode = folio->mapping->host; + struct fsverity_info *vi = NULL; int ret; trace_f2fs_readpage(folio, DATA); @@ -2480,15 +2489,16 @@ static int f2fs_read_data_folio(struct file *file, struct folio *folio) return ret; } - if (f2fs_need_verity(inode, folio->index)) - fsverity_readahead(*fsverity_info_addr(inode), folio->index, - folio_nr_pages(folio)); - return f2fs_mpage_readpages(inode, NULL, folio); + vi = f2fs_need_verity(inode, folio->index); + if (vi) + fsverity_readahead(vi, folio->index, folio_nr_pages(folio)); + return f2fs_mpage_readpages(inode, vi, NULL, folio); } static void f2fs_readahead(struct readahead_control *rac) { struct inode *inode = rac->mapping->host; + struct fsverity_info *vi = NULL; trace_f2fs_readpages(inode, readahead_index(rac), readahead_count(rac)); @@ -2499,10 +2509,11 @@ static void f2fs_readahead(struct readahead_control *rac) if (f2fs_has_inline_data(inode)) return; - if (f2fs_need_verity(inode, readahead_index(rac))) - fsverity_readahead(*fsverity_info_addr(inode), - readahead_index(rac), readahead_count(rac)); - f2fs_mpage_readpages(inode, rac, NULL); + vi = f2fs_need_verity(inode, readahead_index(rac)); + if (vi) + fsverity_readahead(vi, readahead_index(rac), + readahead_count(rac)); + f2fs_mpage_readpages(inode, vi, rac, NULL); } int f2fs_encrypt_one_page(struct f2fs_io_info *fio) @@ -3651,9 +3662,10 @@ static int f2fs_write_begin(const struct kiocb *iocb, err = -EFSCORRUPTED; goto put_folio; } - f2fs_submit_page_read(use_cow ? - F2FS_I(inode)->cow_inode : inode, - folio, blkaddr, 0, true); + f2fs_submit_page_read(use_cow ? F2FS_I(inode)->cow_inode : + inode, + NULL, /* can't write to fsverity files */ + folio, blkaddr, 0, true); folio_lock(folio); if (unlikely(folio->mapping != mapping)) { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 20edbb99b814..f2fcadc7a6fe 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1603,6 +1603,7 @@ struct compress_ctx { size_t clen; /* valid data length in cbuf */ void *private; /* payload buffer for specified compression algorithm */ void *private2; /* extra payload buffer */ + struct fsverity_info *vi; /* verity info if needed */ }; /* compress context for write IO path */ @@ -1658,7 +1659,7 @@ struct decompress_io_ctx { refcount_t refcnt; bool failed; /* IO error occurred before decompression? */ - bool need_verity; /* need fs-verity verification after decompression? */ + struct fsverity_info *vi; /* fs-verity context if needed */ unsigned char compress_algorithm; /* backup algorithm type */ void *private; /* payload buffer for specified decompression algorithm */ void *private2; /* extra payload buffer */ @@ -4886,12 +4887,6 @@ static inline bool f2fs_allow_multi_device_dio(struct f2fs_sb_info *sbi, return sbi->aligned_blksize; } -static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx) -{ - return fsverity_active(inode) && - idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE); -} - #ifdef CONFIG_F2FS_FAULT_INJECTION extern int f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned long rate, unsigned long type, enum fault_option fo); From b0160e4501bb3572d9ef6e4f8edf758193ee37aa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 2 Feb 2026 07:06:39 +0100 Subject: [PATCH 16/18] btrfs: consolidate fsverity_info lookup Look up the fsverity_info once in btrfs_do_readpage, and then use it for all operations performed there, and do the same in end_folio_read for all folios processed there. The latter is also changed to derive the inode from the btrfs_bio - while bbio->inode is optional, it is always set for buffered reads. This amortizes the lookup better once it becomes less efficient. Signed-off-by: Christoph Hellwig Acked-by: David Sterba Link: https://lore.kernel.org/r/20260202060754.270269-11-hch@lst.de Signed-off-by: Eric Biggers --- fs/btrfs/extent_io.c | 54 +++++++++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 21 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 21430b7d8f27..24988520521c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -476,26 +476,25 @@ void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end, end, page_ops); } -static bool btrfs_verify_folio(struct folio *folio, u64 start, u32 len) +static bool btrfs_verify_folio(struct fsverity_info *vi, struct folio *folio, + u64 start, u32 len) { struct btrfs_fs_info *fs_info = folio_to_fs_info(folio); - if (!fsverity_active(folio->mapping->host) || - btrfs_folio_test_uptodate(fs_info, folio, start, len) || - start >= i_size_read(folio->mapping->host)) + if (!vi || btrfs_folio_test_uptodate(fs_info, folio, start, len)) return true; - return fsverity_verify_folio(*fsverity_info_addr(folio->mapping->host), - folio); + return fsverity_verify_folio(vi, folio); } -static void end_folio_read(struct folio *folio, bool uptodate, u64 start, u32 len) +static void end_folio_read(struct fsverity_info *vi, struct folio *folio, + bool uptodate, u64 start, u32 len) { struct btrfs_fs_info *fs_info = folio_to_fs_info(folio); ASSERT(folio_pos(folio) <= start && start + len <= folio_next_pos(folio)); - if (uptodate && btrfs_verify_folio(folio, start, len)) + if (uptodate && btrfs_verify_folio(vi, folio, start, len)) btrfs_folio_set_uptodate(fs_info, folio, start, len); else btrfs_folio_clear_uptodate(fs_info, folio, start, len); @@ -575,14 +574,19 @@ static void begin_folio_read(struct btrfs_fs_info *fs_info, struct folio *folio) static void end_bbio_data_read(struct btrfs_bio *bbio) { struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info; + struct inode *inode = &bbio->inode->vfs_inode; struct bio *bio = &bbio->bio; + struct fsverity_info *vi = NULL; struct folio_iter fi; ASSERT(!bio_flagged(bio, BIO_CLONED)); + + if (bbio->file_offset < i_size_read(inode)) + vi = fsverity_get_info(inode); + bio_for_each_folio_all(fi, &bbio->bio) { bool uptodate = !bio->bi_status; struct folio *folio = fi.folio; - struct inode *inode = folio->mapping->host; u64 start = folio_pos(folio) + fi.offset; btrfs_debug(fs_info, @@ -617,7 +621,7 @@ static void end_bbio_data_read(struct btrfs_bio *bbio) } /* Update page status and unlock. */ - end_folio_read(folio, uptodate, start, fi.length); + end_folio_read(vi, folio, uptodate, start, fi.length); } bio_put(bio); } @@ -992,7 +996,8 @@ static void btrfs_readahead_expand(struct readahead_control *ractl, * return 0 on success, otherwise return error */ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached, - struct btrfs_bio_ctrl *bio_ctrl) + struct btrfs_bio_ctrl *bio_ctrl, + struct fsverity_info *vi) { struct inode *inode = folio->mapping->host; struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); @@ -1030,16 +1035,16 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached, ASSERT(IS_ALIGNED(cur, fs_info->sectorsize)); if (cur >= last_byte) { folio_zero_range(folio, pg_offset, end - cur + 1); - end_folio_read(folio, true, cur, end - cur + 1); + end_folio_read(vi, folio, true, cur, end - cur + 1); break; } if (btrfs_folio_test_uptodate(fs_info, folio, cur, blocksize)) { - end_folio_read(folio, true, cur, blocksize); + end_folio_read(vi, folio, true, cur, blocksize); continue; } em = get_extent_map(BTRFS_I(inode), folio, cur, end - cur + 1, em_cached); if (IS_ERR(em)) { - end_folio_read(folio, false, cur, end + 1 - cur); + end_folio_read(vi, folio, false, cur, end + 1 - cur); return PTR_ERR(em); } extent_offset = cur - em->start; @@ -1116,12 +1121,12 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached, /* we've found a hole, just zero and go on */ if (block_start == EXTENT_MAP_HOLE) { folio_zero_range(folio, pg_offset, blocksize); - end_folio_read(folio, true, cur, blocksize); + end_folio_read(vi, folio, true, cur, blocksize); continue; } /* the get_extent function already copied into the folio */ if (block_start == EXTENT_MAP_INLINE) { - end_folio_read(folio, true, cur, blocksize); + end_folio_read(vi, folio, true, cur, blocksize); continue; } @@ -1318,7 +1323,8 @@ static void lock_extents_for_read(struct btrfs_inode *inode, u64 start, u64 end, int btrfs_read_folio(struct file *file, struct folio *folio) { - struct btrfs_inode *inode = folio_to_inode(folio); + struct inode *vfs_inode = folio->mapping->host; + struct btrfs_inode *inode = BTRFS_I(vfs_inode); const u64 start = folio_pos(folio); const u64 end = start + folio_size(folio) - 1; struct extent_state *cached_state = NULL; @@ -1327,10 +1333,13 @@ int btrfs_read_folio(struct file *file, struct folio *folio) .last_em_start = U64_MAX, }; struct extent_map *em_cached = NULL; + struct fsverity_info *vi = NULL; int ret; lock_extents_for_read(inode, start, end, &cached_state); - ret = btrfs_do_readpage(folio, &em_cached, &bio_ctrl); + if (folio_pos(folio) < i_size_read(vfs_inode)) + vi = fsverity_get_info(vfs_inode); + ret = btrfs_do_readpage(folio, &em_cached, &bio_ctrl, vi); btrfs_unlock_extent(&inode->io_tree, start, end, &cached_state); btrfs_free_extent_map(em_cached); @@ -2697,16 +2706,19 @@ void btrfs_readahead(struct readahead_control *rac) .last_em_start = U64_MAX, }; struct folio *folio; - struct btrfs_inode *inode = BTRFS_I(rac->mapping->host); + struct inode *vfs_inode = rac->mapping->host; + struct btrfs_inode *inode = BTRFS_I(vfs_inode); const u64 start = readahead_pos(rac); const u64 end = start + readahead_length(rac) - 1; struct extent_state *cached_state = NULL; struct extent_map *em_cached = NULL; + struct fsverity_info *vi = NULL; lock_extents_for_read(inode, start, end, &cached_state); - + if (start < i_size_read(vfs_inode)) + vi = fsverity_get_info(vfs_inode); while ((folio = readahead_folio(rac)) != NULL) - btrfs_do_readpage(folio, &em_cached, &bio_ctrl); + btrfs_do_readpage(folio, &em_cached, &bio_ctrl, vi); btrfs_unlock_extent(&inode->io_tree, start, end, &cached_state); From f77f281b61183a5c0b87e6a4d101c70bd32c1c79 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 2 Feb 2026 07:06:40 +0100 Subject: [PATCH 17/18] fsverity: use a hashtable to find the fsverity_info Use the kernel's resizable hash table (rhashtable) to find the fsverity_info. This way file systems that want to support fsverity don't have to bloat every inode in the system with an extra pointer. The trade-off is that looking up the fsverity_info is a bit more expensive now, but the main operations are still dominated by I/O and hashing overhead. The rhashtable implementations requires no external synchronization, and the _fast versions of the APIs provide the RCU critical sections required by the implementation. Because struct fsverity_info is only removed on inode eviction and does not contain a reference count, there is no need for an extended critical section to grab a reference or validate the object state. The file open path uses rhashtable_lookup_get_insert_fast, which can either find an existing object for the hash key or insert a new one in a single atomic operation, so that concurrent opens never instantiate duplicate fsverity_info structure. FS_IOC_ENABLE_VERITY must already be synchronized by a combination of i_rwsem and file system flags and uses rhashtable_lookup_insert_fast, which errors out on an existing object for the hash key as an additional safety check. Because insertion into the hash table now happens before S_VERITY is set, fsverity just becomes a barrier and a flag check and doesn't have to look up the fsverity_info at all, so there is only a single lookup per ->read_folio or ->readahead invocation. For btrfs there is an additional one for each bio completion, while for ext4 and f2fs the fsverity_info is stored in the per-I/O context and reused for the completion workqueue. Signed-off-by: Christoph Hellwig Reviewed-by: "Darrick J. Wong" Link: https://lore.kernel.org/r/20260202060754.270269-12-hch@lst.de [EB: folded in fix for missing fsverity_free_info()] Signed-off-by: Eric Biggers --- fs/btrfs/btrfs_inode.h | 4 -- fs/btrfs/inode.c | 3 -- fs/btrfs/verity.c | 2 - fs/ext4/ext4.h | 4 -- fs/ext4/super.c | 3 -- fs/ext4/verity.c | 2 - fs/f2fs/f2fs.h | 3 -- fs/f2fs/super.c | 3 -- fs/f2fs/verity.c | 2 - fs/verity/enable.c | 32 ++++++++----- fs/verity/fsverity_private.h | 16 ++++--- fs/verity/open.c | 77 +++++++++++++++++++----------- include/linux/fsverity.h | 90 ++++++++++++++---------------------- 13 files changed, 112 insertions(+), 129 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 73602ee8de3f..55c272fe5d92 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -339,10 +339,6 @@ struct btrfs_inode { struct rw_semaphore i_mmap_lock; -#ifdef CONFIG_FS_VERITY - struct fsverity_info *i_verity_info; -#endif - struct inode vfs_inode; }; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 67c64efc5099..93b2ce75fb06 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8097,9 +8097,6 @@ static void init_once(void *foo) struct btrfs_inode *ei = foo; inode_init_once(&ei->vfs_inode); -#ifdef CONFIG_FS_VERITY - ei->i_verity_info = NULL; -#endif } void __cold btrfs_destroy_cachep(void) diff --git a/fs/btrfs/verity.c b/fs/btrfs/verity.c index c152bef71e8b..cd96fac4739f 100644 --- a/fs/btrfs/verity.c +++ b/fs/btrfs/verity.c @@ -795,8 +795,6 @@ static int btrfs_write_merkle_tree_block(struct file *file, const void *buf, } const struct fsverity_operations btrfs_verityops = { - .inode_info_offs = (int)offsetof(struct btrfs_inode, i_verity_info) - - (int)offsetof(struct btrfs_inode, vfs_inode), .begin_enable_verity = btrfs_begin_enable_verity, .end_enable_verity = btrfs_end_enable_verity, .get_verity_descriptor = btrfs_get_verity_descriptor, diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index a8a448e20ef8..79c319fbf5f0 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1205,10 +1205,6 @@ struct ext4_inode_info { #ifdef CONFIG_FS_ENCRYPTION struct fscrypt_inode_info *i_crypt_info; #endif - -#ifdef CONFIG_FS_VERITY - struct fsverity_info *i_verity_info; -#endif }; /* diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 86131f4d8718..1fb0c90c7a4b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1484,9 +1484,6 @@ static void init_once(void *foo) #ifdef CONFIG_FS_ENCRYPTION ei->i_crypt_info = NULL; #endif -#ifdef CONFIG_FS_VERITY - ei->i_verity_info = NULL; -#endif } static int __init init_inodecache(void) diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c index 552cc5d81d94..5caa658adc12 100644 --- a/fs/ext4/verity.c +++ b/fs/ext4/verity.c @@ -380,8 +380,6 @@ static int ext4_write_merkle_tree_block(struct file *file, const void *buf, } const struct fsverity_operations ext4_verityops = { - .inode_info_offs = (int)offsetof(struct ext4_inode_info, i_verity_info) - - (int)offsetof(struct ext4_inode_info, vfs_inode), .begin_enable_verity = ext4_begin_enable_verity, .end_enable_verity = ext4_end_enable_verity, .get_verity_descriptor = ext4_get_verity_descriptor, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f2fcadc7a6fe..8ee8a7bc012c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -974,9 +974,6 @@ struct f2fs_inode_info { #ifdef CONFIG_FS_ENCRYPTION struct fscrypt_inode_info *i_crypt_info; /* filesystem encryption info */ #endif -#ifdef CONFIG_FS_VERITY - struct fsverity_info *i_verity_info; /* filesystem verity info */ -#endif }; static inline void get_read_extent_info(struct extent_info *ext, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c4c225e09dc4..cd00d030edda 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -504,9 +504,6 @@ static void init_once(void *foo) #ifdef CONFIG_FS_ENCRYPTION fi->i_crypt_info = NULL; #endif -#ifdef CONFIG_FS_VERITY - fi->i_verity_info = NULL; -#endif } #ifdef CONFIG_QUOTA diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c index de2c87621319..92ebcc19cab0 100644 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@ -278,8 +278,6 @@ static int f2fs_write_merkle_tree_block(struct file *file, const void *buf, } const struct fsverity_operations f2fs_verityops = { - .inode_info_offs = (int)offsetof(struct f2fs_inode_info, i_verity_info) - - (int)offsetof(struct f2fs_inode_info, vfs_inode), .begin_enable_verity = f2fs_begin_enable_verity, .end_enable_verity = f2fs_end_enable_verity, .get_verity_descriptor = f2fs_get_verity_descriptor, diff --git a/fs/verity/enable.c b/fs/verity/enable.c index c56c18e2605b..c9448074cce1 100644 --- a/fs/verity/enable.c +++ b/fs/verity/enable.c @@ -265,9 +265,26 @@ static int enable_verity(struct file *filp, goto rollback; } + /* + * Add the fsverity_info into the hash table before finishing the + * initialization so that we don't have to undo the enabling when memory + * allocation for the hash table fails. This is safe because looking up + * the fsverity_info always first checks the S_VERITY flag on the inode, + * which will only be set at the very end of the ->end_enable_verity + * method. + */ + err = fsverity_set_info(vi); + if (err) { + fsverity_free_info(vi); + goto rollback; + } + /* * Tell the filesystem to finish enabling verity on the file. - * Serialized with ->begin_enable_verity() by the inode lock. + * Serialized with ->begin_enable_verity() by the inode lock. The file + * system needs to set the S_VERITY flag on the inode at the very end of + * the method, at which point the fsverity information can be accessed + * by other threads. */ inode_lock(inode); err = vops->end_enable_verity(filp, desc, desc_size, params.tree_size); @@ -275,19 +292,10 @@ static int enable_verity(struct file *filp, if (err) { fsverity_err(inode, "%ps() failed with err %d", vops->end_enable_verity, err); - fsverity_free_info(vi); + fsverity_remove_info(vi); } else if (WARN_ON_ONCE(!IS_VERITY(inode))) { + fsverity_remove_info(vi); err = -EINVAL; - fsverity_free_info(vi); - } else { - /* Successfully enabled verity */ - - /* - * Readers can start using the inode's verity info immediately, - * so it can't be rolled back once set. So don't set it until - * just after the filesystem has successfully enabled verity. - */ - fsverity_set_info(inode, vi); } out: kfree(params.hashstate); diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h index f9f3936b0a89..2887cb849cec 100644 --- a/fs/verity/fsverity_private.h +++ b/fs/verity/fsverity_private.h @@ -11,6 +11,7 @@ #define pr_fmt(fmt) "fs-verity: " fmt #include +#include /* * Implementation limit: maximum depth of the Merkle tree. For now 8 is plenty; @@ -63,13 +64,14 @@ struct merkle_tree_params { * fsverity_info - cached verity metadata for an inode * * When a verity file is first opened, an instance of this struct is allocated - * and a pointer to it is stored in the file's in-memory inode. It remains - * until the inode is evicted. It caches information about the Merkle tree - * that's needed to efficiently verify data read from the file. It also caches - * the file digest. The Merkle tree pages themselves are not cached here, but - * the filesystem may cache them. + * and a pointer to it is stored in the global hash table, indexed by the inode + * pointer value. It remains alive until the inode is evicted. It caches + * information about the Merkle tree that's needed to efficiently verify data + * read from the file. It also caches the file digest. The Merkle tree pages + * themselves are not cached here, but the filesystem may cache them. */ struct fsverity_info { + struct rhash_head rhash_head; struct merkle_tree_params tree_params; u8 root_hash[FS_VERITY_MAX_DIGEST_SIZE]; u8 file_digest[FS_VERITY_MAX_DIGEST_SIZE]; @@ -127,9 +129,9 @@ int fsverity_init_merkle_tree_params(struct merkle_tree_params *params, struct fsverity_info *fsverity_create_info(struct inode *inode, struct fsverity_descriptor *desc); -void fsverity_set_info(struct inode *inode, struct fsverity_info *vi); - +int fsverity_set_info(struct fsverity_info *vi); void fsverity_free_info(struct fsverity_info *vi); +void fsverity_remove_info(struct fsverity_info *vi); int fsverity_get_descriptor(struct inode *inode, struct fsverity_descriptor **desc_ret); diff --git a/fs/verity/open.c b/fs/verity/open.c index 128502cf0a23..dfa0d1afe0fe 100644 --- a/fs/verity/open.c +++ b/fs/verity/open.c @@ -12,6 +12,14 @@ #include static struct kmem_cache *fsverity_info_cachep; +static struct rhashtable fsverity_info_hash; + +static const struct rhashtable_params fsverity_info_hash_params = { + .key_len = sizeof_field(struct fsverity_info, inode), + .key_offset = offsetof(struct fsverity_info, inode), + .head_offset = offsetof(struct fsverity_info, rhash_head), + .automatic_shrinking = true, +}; /** * fsverity_init_merkle_tree_params() - initialize Merkle tree parameters @@ -241,33 +249,19 @@ struct fsverity_info *fsverity_create_info(struct inode *inode, return ERR_PTR(err); } -void fsverity_set_info(struct inode *inode, struct fsverity_info *vi) +int fsverity_set_info(struct fsverity_info *vi) { - /* - * Multiple tasks may race to set the inode's verity info pointer, so - * use cmpxchg_release(). This pairs with the smp_load_acquire() in - * fsverity_get_info(). I.e., publish the pointer with a RELEASE - * barrier so that other tasks can ACQUIRE it. - */ - if (cmpxchg_release(fsverity_info_addr(inode), NULL, vi) != NULL) { - /* Lost the race, so free the verity info we allocated. */ - fsverity_free_info(vi); - /* - * Afterwards, the caller may access the inode's verity info - * directly, so make sure to ACQUIRE the winning verity info. - */ - (void)fsverity_get_info(inode); - } + return rhashtable_lookup_insert_fast(&fsverity_info_hash, + &vi->rhash_head, + fsverity_info_hash_params); } -void fsverity_free_info(struct fsverity_info *vi) +struct fsverity_info *__fsverity_get_info(const struct inode *inode) { - if (!vi) - return; - kfree(vi->tree_params.hashstate); - kvfree(vi->hash_block_verified); - kmem_cache_free(fsverity_info_cachep, vi); + return rhashtable_lookup_fast(&fsverity_info_hash, &inode, + fsverity_info_hash_params); } +EXPORT_SYMBOL_GPL(__fsverity_get_info); static bool validate_fsverity_descriptor(struct inode *inode, const struct fsverity_descriptor *desc, @@ -352,7 +346,7 @@ int fsverity_get_descriptor(struct inode *inode, static int ensure_verity_info(struct inode *inode) { - struct fsverity_info *vi = fsverity_get_info(inode); + struct fsverity_info *vi = fsverity_get_info(inode), *found; struct fsverity_descriptor *desc; int err; @@ -369,8 +363,19 @@ static int ensure_verity_info(struct inode *inode) goto out_free_desc; } - fsverity_set_info(inode, vi); - err = 0; + /* + * Multiple tasks may race to set the inode's verity info, in which case + * we might find an existing fsverity_info in the hash table. + */ + found = rhashtable_lookup_get_insert_fast(&fsverity_info_hash, + &vi->rhash_head, + fsverity_info_hash_params); + if (found) { + fsverity_free_info(vi); + if (IS_ERR(found)) + err = PTR_ERR(found); + } + out_free_desc: kfree(desc); return err; @@ -384,16 +389,32 @@ int __fsverity_file_open(struct inode *inode, struct file *filp) } EXPORT_SYMBOL_GPL(__fsverity_file_open); +void fsverity_free_info(struct fsverity_info *vi) +{ + kfree(vi->tree_params.hashstate); + kvfree(vi->hash_block_verified); + kmem_cache_free(fsverity_info_cachep, vi); +} + +void fsverity_remove_info(struct fsverity_info *vi) +{ + rhashtable_remove_fast(&fsverity_info_hash, &vi->rhash_head, + fsverity_info_hash_params); + fsverity_free_info(vi); +} + void fsverity_cleanup_inode(struct inode *inode) { - struct fsverity_info **vi_addr = fsverity_info_addr(inode); + struct fsverity_info *vi = fsverity_get_info(inode); - fsverity_free_info(*vi_addr); - *vi_addr = NULL; + if (vi) + fsverity_remove_info(vi); } void __init fsverity_init_info_cache(void) { + if (rhashtable_init(&fsverity_info_hash, &fsverity_info_hash_params)) + panic("failed to initialize fsverity hash\n"); fsverity_info_cachep = KMEM_CACHE_USERCOPY( fsverity_info, SLAB_RECLAIM_ACCOUNT | SLAB_PANIC, diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index 076228a9fd12..fed91023bea9 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -30,13 +30,6 @@ struct fsverity_info; /* Verity operations for filesystems */ struct fsverity_operations { - /** - * The offset of the pointer to struct fsverity_info in the - * filesystem-specific part of the inode, relative to the beginning of - * the common part of the inode (the 'struct inode'). - */ - ptrdiff_t inode_info_offs; - /** * Begin enabling verity on the given file. * @@ -142,38 +135,43 @@ struct fsverity_operations { }; #ifdef CONFIG_FS_VERITY - -/* - * Returns the address of the verity info pointer within the filesystem-specific - * part of the inode. (To save memory on filesystems that don't support - * fsverity, a field in 'struct inode' itself is no longer used.) +/** + * fsverity_active() - do reads from the inode need to go through fs-verity? + * @inode: inode to check + * + * This checks whether the inode's verity info has been set, and reads need + * to verify the file data. + * + * Return: true if reads need to go through fs-verity, otherwise false */ -static inline struct fsverity_info ** -fsverity_info_addr(const struct inode *inode) +static inline bool fsverity_active(const struct inode *inode) { - VFS_WARN_ON_ONCE(inode->i_sb->s_vop->inode_info_offs == 0); - return (void *)inode + inode->i_sb->s_vop->inode_info_offs; + if (IS_VERITY(inode)) { + /* + * This pairs with the try_cmpxchg in set_mask_bits() + * used to set the S_VERITY bit in i_flags. + */ + smp_mb(); + return true; + } + + return false; } +struct fsverity_info *__fsverity_get_info(const struct inode *inode); +/** + * fsverity_get_info - get fsverity information for an inode + * @inode: inode to operate on. + * + * This gets the fsverity_info for @inode if it exists. Safe to call without + * knowin that a fsverity_info exist for @inode, including on file systems that + * do not support fsverity. + */ static inline struct fsverity_info *fsverity_get_info(const struct inode *inode) { - /* - * Since this function can be called on inodes belonging to filesystems - * that don't support fsverity at all, and fsverity_info_addr() doesn't - * work on such filesystems, we have to start with an IS_VERITY() check. - * Checking IS_VERITY() here is also useful to minimize the overhead of - * fsverity_active() on non-verity files. - */ - if (!IS_VERITY(inode)) + if (!fsverity_active(inode)) return NULL; - - /* - * Pairs with the cmpxchg_release() in fsverity_set_info(). I.e., - * another task may publish the inode's verity info concurrently, - * executing a RELEASE barrier. Use smp_load_acquire() here to safely - * ACQUIRE the memory the other task published. - */ - return smp_load_acquire(fsverity_info_addr(inode)); + return __fsverity_get_info(inode); } /* enable.c */ @@ -204,12 +202,10 @@ void fsverity_enqueue_verify_work(struct work_struct *work); #else /* !CONFIG_FS_VERITY */ -/* - * Provide a stub to allow code using this to compile. All callsites should be - * guarded by compiler dead code elimination, and this forces a link error if - * not. - */ -struct fsverity_info **fsverity_info_addr(const struct inode *inode); +static inline bool fsverity_active(const struct inode *inode) +{ + return false; +} static inline struct fsverity_info *fsverity_get_info(const struct inode *inode) { @@ -292,24 +288,6 @@ static inline bool fsverity_verify_page(struct fsverity_info *vi, return fsverity_verify_blocks(vi, page_folio(page), PAGE_SIZE, 0); } -/** - * fsverity_active() - do reads from the inode need to go through fs-verity? - * @inode: inode to check - * - * This checks whether the inode's verity info has been set. - * - * Filesystems call this from ->readahead() to check whether the pages need to - * be verified or not. Don't use IS_VERITY() for this purpose; it's subject to - * a race condition where the file is being read concurrently with - * FS_IOC_ENABLE_VERITY completing. (S_VERITY is set before the verity info.) - * - * Return: true if reads need to go through fs-verity, otherwise false - */ -static inline bool fsverity_active(const struct inode *inode) -{ - return fsverity_get_info(inode) != NULL; -} - /** * fsverity_file_open() - prepare to open a verity file * @inode: the inode being opened From 433fbcac9ebe491b518b21c7305fba9a748c7d2c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 2 Feb 2026 13:33:39 -0800 Subject: [PATCH 18/18] fsverity: remove inode from fsverity_verification_ctx This field is no longer used, so remove it. Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20260202213339.143683-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/verity/verify.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/verity/verify.c b/fs/verity/verify.c index 37e000f01c18..31797f9b24d0 100644 --- a/fs/verity/verify.c +++ b/fs/verity/verify.c @@ -19,7 +19,6 @@ struct fsverity_pending_block { }; struct fsverity_verification_context { - struct inode *inode; struct fsverity_info *vi; /* @@ -316,7 +315,6 @@ static void fsverity_init_verification_context(struct fsverity_verification_context *ctx, struct fsverity_info *vi) { - ctx->inode = vi->inode; ctx->vi = vi; ctx->num_pending = 0; if (vi->tree_params.hash_alg->algo_id == HASH_ALGO_SHA256 &&