fsverity: use a hashtable to find the fsverity_info

Use the kernel's resizable hash table (rhashtable) to find the
fsverity_info.  This way file systems that want to support fsverity don't
have to bloat every inode in the system with an extra pointer.  The
trade-off is that looking up the fsverity_info is a bit more expensive
now, but the main operations are still dominated by I/O and hashing
overhead.

The rhashtable implementations requires no external synchronization, and
the _fast versions of the APIs provide the RCU critical sections required
by the implementation.  Because struct fsverity_info is only removed on
inode eviction and does not contain a reference count, there is no need
for an extended critical section to grab a reference or validate the
object state.  The file open path uses rhashtable_lookup_get_insert_fast,
which can either find an existing object for the hash key or insert a
new one in a single atomic operation, so that concurrent opens never
instantiate duplicate fsverity_info structure.  FS_IOC_ENABLE_VERITY must
already be synchronized by a combination of i_rwsem and file system flags
and uses rhashtable_lookup_insert_fast, which errors out on an existing
object for the hash key as an additional safety check.

Because insertion into the hash table now happens before S_VERITY is set,
fsverity just becomes a barrier and a flag check and doesn't have to look
up the fsverity_info at all, so there is only a single lookup per
->read_folio or ->readahead invocation.  For btrfs there is an additional
one for each bio completion, while for ext4 and f2fs the fsverity_info
is stored in the per-I/O context and reused for the completion workqueue.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
Link: https://lore.kernel.org/r/20260202060754.270269-12-hch@lst.de
[EB: folded in fix for missing fsverity_free_info()]
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
This commit is contained in:
Christoph Hellwig 2026-02-02 07:06:40 +01:00 committed by Eric Biggers
parent b0160e4501
commit f77f281b61
13 changed files with 112 additions and 129 deletions

View File

@ -339,10 +339,6 @@ struct btrfs_inode {
struct rw_semaphore i_mmap_lock;
#ifdef CONFIG_FS_VERITY
struct fsverity_info *i_verity_info;
#endif
struct inode vfs_inode;
};

View File

@ -8097,9 +8097,6 @@ static void init_once(void *foo)
struct btrfs_inode *ei = foo;
inode_init_once(&ei->vfs_inode);
#ifdef CONFIG_FS_VERITY
ei->i_verity_info = NULL;
#endif
}
void __cold btrfs_destroy_cachep(void)

View File

@ -795,8 +795,6 @@ static int btrfs_write_merkle_tree_block(struct file *file, const void *buf,
}
const struct fsverity_operations btrfs_verityops = {
.inode_info_offs = (int)offsetof(struct btrfs_inode, i_verity_info) -
(int)offsetof(struct btrfs_inode, vfs_inode),
.begin_enable_verity = btrfs_begin_enable_verity,
.end_enable_verity = btrfs_end_enable_verity,
.get_verity_descriptor = btrfs_get_verity_descriptor,

View File

@ -1205,10 +1205,6 @@ struct ext4_inode_info {
#ifdef CONFIG_FS_ENCRYPTION
struct fscrypt_inode_info *i_crypt_info;
#endif
#ifdef CONFIG_FS_VERITY
struct fsverity_info *i_verity_info;
#endif
};
/*

View File

@ -1484,9 +1484,6 @@ static void init_once(void *foo)
#ifdef CONFIG_FS_ENCRYPTION
ei->i_crypt_info = NULL;
#endif
#ifdef CONFIG_FS_VERITY
ei->i_verity_info = NULL;
#endif
}
static int __init init_inodecache(void)

View File

@ -380,8 +380,6 @@ static int ext4_write_merkle_tree_block(struct file *file, const void *buf,
}
const struct fsverity_operations ext4_verityops = {
.inode_info_offs = (int)offsetof(struct ext4_inode_info, i_verity_info) -
(int)offsetof(struct ext4_inode_info, vfs_inode),
.begin_enable_verity = ext4_begin_enable_verity,
.end_enable_verity = ext4_end_enable_verity,
.get_verity_descriptor = ext4_get_verity_descriptor,

View File

@ -974,9 +974,6 @@ struct f2fs_inode_info {
#ifdef CONFIG_FS_ENCRYPTION
struct fscrypt_inode_info *i_crypt_info; /* filesystem encryption info */
#endif
#ifdef CONFIG_FS_VERITY
struct fsverity_info *i_verity_info; /* filesystem verity info */
#endif
};
static inline void get_read_extent_info(struct extent_info *ext,

View File

@ -504,9 +504,6 @@ static void init_once(void *foo)
#ifdef CONFIG_FS_ENCRYPTION
fi->i_crypt_info = NULL;
#endif
#ifdef CONFIG_FS_VERITY
fi->i_verity_info = NULL;
#endif
}
#ifdef CONFIG_QUOTA

View File

@ -278,8 +278,6 @@ static int f2fs_write_merkle_tree_block(struct file *file, const void *buf,
}
const struct fsverity_operations f2fs_verityops = {
.inode_info_offs = (int)offsetof(struct f2fs_inode_info, i_verity_info) -
(int)offsetof(struct f2fs_inode_info, vfs_inode),
.begin_enable_verity = f2fs_begin_enable_verity,
.end_enable_verity = f2fs_end_enable_verity,
.get_verity_descriptor = f2fs_get_verity_descriptor,

View File

@ -265,9 +265,26 @@ static int enable_verity(struct file *filp,
goto rollback;
}
/*
* Add the fsverity_info into the hash table before finishing the
* initialization so that we don't have to undo the enabling when memory
* allocation for the hash table fails. This is safe because looking up
* the fsverity_info always first checks the S_VERITY flag on the inode,
* which will only be set at the very end of the ->end_enable_verity
* method.
*/
err = fsverity_set_info(vi);
if (err) {
fsverity_free_info(vi);
goto rollback;
}
/*
* Tell the filesystem to finish enabling verity on the file.
* Serialized with ->begin_enable_verity() by the inode lock.
* Serialized with ->begin_enable_verity() by the inode lock. The file
* system needs to set the S_VERITY flag on the inode at the very end of
* the method, at which point the fsverity information can be accessed
* by other threads.
*/
inode_lock(inode);
err = vops->end_enable_verity(filp, desc, desc_size, params.tree_size);
@ -275,19 +292,10 @@ static int enable_verity(struct file *filp,
if (err) {
fsverity_err(inode, "%ps() failed with err %d",
vops->end_enable_verity, err);
fsverity_free_info(vi);
fsverity_remove_info(vi);
} else if (WARN_ON_ONCE(!IS_VERITY(inode))) {
fsverity_remove_info(vi);
err = -EINVAL;
fsverity_free_info(vi);
} else {
/* Successfully enabled verity */
/*
* Readers can start using the inode's verity info immediately,
* so it can't be rolled back once set. So don't set it until
* just after the filesystem has successfully enabled verity.
*/
fsverity_set_info(inode, vi);
}
out:
kfree(params.hashstate);

View File

@ -11,6 +11,7 @@
#define pr_fmt(fmt) "fs-verity: " fmt
#include <linux/fsverity.h>
#include <linux/rhashtable.h>
/*
* Implementation limit: maximum depth of the Merkle tree. For now 8 is plenty;
@ -63,13 +64,14 @@ struct merkle_tree_params {
* fsverity_info - cached verity metadata for an inode
*
* When a verity file is first opened, an instance of this struct is allocated
* and a pointer to it is stored in the file's in-memory inode. It remains
* until the inode is evicted. It caches information about the Merkle tree
* that's needed to efficiently verify data read from the file. It also caches
* the file digest. The Merkle tree pages themselves are not cached here, but
* the filesystem may cache them.
* and a pointer to it is stored in the global hash table, indexed by the inode
* pointer value. It remains alive until the inode is evicted. It caches
* information about the Merkle tree that's needed to efficiently verify data
* read from the file. It also caches the file digest. The Merkle tree pages
* themselves are not cached here, but the filesystem may cache them.
*/
struct fsverity_info {
struct rhash_head rhash_head;
struct merkle_tree_params tree_params;
u8 root_hash[FS_VERITY_MAX_DIGEST_SIZE];
u8 file_digest[FS_VERITY_MAX_DIGEST_SIZE];
@ -127,9 +129,9 @@ int fsverity_init_merkle_tree_params(struct merkle_tree_params *params,
struct fsverity_info *fsverity_create_info(struct inode *inode,
struct fsverity_descriptor *desc);
void fsverity_set_info(struct inode *inode, struct fsverity_info *vi);
int fsverity_set_info(struct fsverity_info *vi);
void fsverity_free_info(struct fsverity_info *vi);
void fsverity_remove_info(struct fsverity_info *vi);
int fsverity_get_descriptor(struct inode *inode,
struct fsverity_descriptor **desc_ret);

View File

@ -12,6 +12,14 @@
#include <linux/slab.h>
static struct kmem_cache *fsverity_info_cachep;
static struct rhashtable fsverity_info_hash;
static const struct rhashtable_params fsverity_info_hash_params = {
.key_len = sizeof_field(struct fsverity_info, inode),
.key_offset = offsetof(struct fsverity_info, inode),
.head_offset = offsetof(struct fsverity_info, rhash_head),
.automatic_shrinking = true,
};
/**
* fsverity_init_merkle_tree_params() - initialize Merkle tree parameters
@ -241,33 +249,19 @@ struct fsverity_info *fsverity_create_info(struct inode *inode,
return ERR_PTR(err);
}
void fsverity_set_info(struct inode *inode, struct fsverity_info *vi)
int fsverity_set_info(struct fsverity_info *vi)
{
/*
* Multiple tasks may race to set the inode's verity info pointer, so
* use cmpxchg_release(). This pairs with the smp_load_acquire() in
* fsverity_get_info(). I.e., publish the pointer with a RELEASE
* barrier so that other tasks can ACQUIRE it.
*/
if (cmpxchg_release(fsverity_info_addr(inode), NULL, vi) != NULL) {
/* Lost the race, so free the verity info we allocated. */
fsverity_free_info(vi);
/*
* Afterwards, the caller may access the inode's verity info
* directly, so make sure to ACQUIRE the winning verity info.
*/
(void)fsverity_get_info(inode);
}
return rhashtable_lookup_insert_fast(&fsverity_info_hash,
&vi->rhash_head,
fsverity_info_hash_params);
}
void fsverity_free_info(struct fsverity_info *vi)
struct fsverity_info *__fsverity_get_info(const struct inode *inode)
{
if (!vi)
return;
kfree(vi->tree_params.hashstate);
kvfree(vi->hash_block_verified);
kmem_cache_free(fsverity_info_cachep, vi);
return rhashtable_lookup_fast(&fsverity_info_hash, &inode,
fsverity_info_hash_params);
}
EXPORT_SYMBOL_GPL(__fsverity_get_info);
static bool validate_fsverity_descriptor(struct inode *inode,
const struct fsverity_descriptor *desc,
@ -352,7 +346,7 @@ int fsverity_get_descriptor(struct inode *inode,
static int ensure_verity_info(struct inode *inode)
{
struct fsverity_info *vi = fsverity_get_info(inode);
struct fsverity_info *vi = fsverity_get_info(inode), *found;
struct fsverity_descriptor *desc;
int err;
@ -369,8 +363,19 @@ static int ensure_verity_info(struct inode *inode)
goto out_free_desc;
}
fsverity_set_info(inode, vi);
err = 0;
/*
* Multiple tasks may race to set the inode's verity info, in which case
* we might find an existing fsverity_info in the hash table.
*/
found = rhashtable_lookup_get_insert_fast(&fsverity_info_hash,
&vi->rhash_head,
fsverity_info_hash_params);
if (found) {
fsverity_free_info(vi);
if (IS_ERR(found))
err = PTR_ERR(found);
}
out_free_desc:
kfree(desc);
return err;
@ -384,16 +389,32 @@ int __fsverity_file_open(struct inode *inode, struct file *filp)
}
EXPORT_SYMBOL_GPL(__fsverity_file_open);
void fsverity_free_info(struct fsverity_info *vi)
{
kfree(vi->tree_params.hashstate);
kvfree(vi->hash_block_verified);
kmem_cache_free(fsverity_info_cachep, vi);
}
void fsverity_remove_info(struct fsverity_info *vi)
{
rhashtable_remove_fast(&fsverity_info_hash, &vi->rhash_head,
fsverity_info_hash_params);
fsverity_free_info(vi);
}
void fsverity_cleanup_inode(struct inode *inode)
{
struct fsverity_info **vi_addr = fsverity_info_addr(inode);
struct fsverity_info *vi = fsverity_get_info(inode);
fsverity_free_info(*vi_addr);
*vi_addr = NULL;
if (vi)
fsverity_remove_info(vi);
}
void __init fsverity_init_info_cache(void)
{
if (rhashtable_init(&fsverity_info_hash, &fsverity_info_hash_params))
panic("failed to initialize fsverity hash\n");
fsverity_info_cachep = KMEM_CACHE_USERCOPY(
fsverity_info,
SLAB_RECLAIM_ACCOUNT | SLAB_PANIC,

View File

@ -30,13 +30,6 @@ struct fsverity_info;
/* Verity operations for filesystems */
struct fsverity_operations {
/**
* The offset of the pointer to struct fsverity_info in the
* filesystem-specific part of the inode, relative to the beginning of
* the common part of the inode (the 'struct inode').
*/
ptrdiff_t inode_info_offs;
/**
* Begin enabling verity on the given file.
*
@ -142,38 +135,43 @@ struct fsverity_operations {
};
#ifdef CONFIG_FS_VERITY
/*
* Returns the address of the verity info pointer within the filesystem-specific
* part of the inode. (To save memory on filesystems that don't support
* fsverity, a field in 'struct inode' itself is no longer used.)
/**
* fsverity_active() - do reads from the inode need to go through fs-verity?
* @inode: inode to check
*
* This checks whether the inode's verity info has been set, and reads need
* to verify the file data.
*
* Return: true if reads need to go through fs-verity, otherwise false
*/
static inline struct fsverity_info **
fsverity_info_addr(const struct inode *inode)
static inline bool fsverity_active(const struct inode *inode)
{
VFS_WARN_ON_ONCE(inode->i_sb->s_vop->inode_info_offs == 0);
return (void *)inode + inode->i_sb->s_vop->inode_info_offs;
if (IS_VERITY(inode)) {
/*
* This pairs with the try_cmpxchg in set_mask_bits()
* used to set the S_VERITY bit in i_flags.
*/
smp_mb();
return true;
}
return false;
}
struct fsverity_info *__fsverity_get_info(const struct inode *inode);
/**
* fsverity_get_info - get fsverity information for an inode
* @inode: inode to operate on.
*
* This gets the fsverity_info for @inode if it exists. Safe to call without
* knowin that a fsverity_info exist for @inode, including on file systems that
* do not support fsverity.
*/
static inline struct fsverity_info *fsverity_get_info(const struct inode *inode)
{
/*
* Since this function can be called on inodes belonging to filesystems
* that don't support fsverity at all, and fsverity_info_addr() doesn't
* work on such filesystems, we have to start with an IS_VERITY() check.
* Checking IS_VERITY() here is also useful to minimize the overhead of
* fsverity_active() on non-verity files.
*/
if (!IS_VERITY(inode))
if (!fsverity_active(inode))
return NULL;
/*
* Pairs with the cmpxchg_release() in fsverity_set_info(). I.e.,
* another task may publish the inode's verity info concurrently,
* executing a RELEASE barrier. Use smp_load_acquire() here to safely
* ACQUIRE the memory the other task published.
*/
return smp_load_acquire(fsverity_info_addr(inode));
return __fsverity_get_info(inode);
}
/* enable.c */
@ -204,12 +202,10 @@ void fsverity_enqueue_verify_work(struct work_struct *work);
#else /* !CONFIG_FS_VERITY */
/*
* Provide a stub to allow code using this to compile. All callsites should be
* guarded by compiler dead code elimination, and this forces a link error if
* not.
*/
struct fsverity_info **fsverity_info_addr(const struct inode *inode);
static inline bool fsverity_active(const struct inode *inode)
{
return false;
}
static inline struct fsverity_info *fsverity_get_info(const struct inode *inode)
{
@ -292,24 +288,6 @@ static inline bool fsverity_verify_page(struct fsverity_info *vi,
return fsverity_verify_blocks(vi, page_folio(page), PAGE_SIZE, 0);
}
/**
* fsverity_active() - do reads from the inode need to go through fs-verity?
* @inode: inode to check
*
* This checks whether the inode's verity info has been set.
*
* Filesystems call this from ->readahead() to check whether the pages need to
* be verified or not. Don't use IS_VERITY() for this purpose; it's subject to
* a race condition where the file is being read concurrently with
* FS_IOC_ENABLE_VERITY completing. (S_VERITY is set before the verity info.)
*
* Return: true if reads need to go through fs-verity, otherwise false
*/
static inline bool fsverity_active(const struct inode *inode)
{
return fsverity_get_info(inode) != NULL;
}
/**
* fsverity_file_open() - prepare to open a verity file
* @inode: the inode being opened