mirror of
https://github.com/torvalds/linux.git
synced 2026-05-25 07:33:19 +02:00
After a recent fsmark benchmarking run, I observed that the overhead of
parent pointers on file creation and deletion can be a bit high. On a
machine with 20 CPUs, 128G of memory, and an NVME SSD capable of pushing
750000iops, I see the following results:
$ mkfs.xfs -f -l logdev=/dev/nvme1n1,size=1g /dev/nvme0n1 -n parent=0
meta-data=/dev/nvme0n1 isize=512 agcount=40, agsize=9767586 blks
= sectsz=4096 attr=2, projid32bit=1
= crc=1 finobt=1, sparse=1, rmapbt=1
= reflink=1 bigtime=1 inobtcount=1 nrext64=1
= exchange=0 metadir=0
data = bsize=4096 blocks=390703440, imaxpct=5
= sunit=0 swidth=0 blks
naming =version 2 bsize=4096 ascii-ci=0, ftype=1, parent=0
log =/dev/nvme1n1 bsize=4096 blocks=262144, version=2
= sectsz=4096 sunit=1 blks, lazy-count=1
realtime =none extsz=4096 blocks=0, rtextents=0
= rgcount=0 rgsize=0 extents
= zoned=0 start=0 reserved=0
So we created 40 AGs, one per CPU. Now we create 40 directories and run
fsmark:
$ time fs_mark -D 10000 -S 0 -n 100000 -s 0 -L 8 -d ...
# Version 3.3, 40 thread(s) starting at Wed Dec 10 14:22:07 2025
# Sync method: NO SYNC: Test does not issue sync() or fsync() calls.
# Directories: Time based hash between directories across 10000 subdirectories with 180 seconds per subdirectory.
# File names: 40 bytes long, (16 initial bytes of time stamp with 24 random bytes at end of name)
# Files info: size 0 bytes, written with an IO size of 16384 bytes per write
# App overhead is time in microseconds spent in the test not doing file writing related system calls.
parent=0 parent=1
================== ==================
real 0m57.573s real 1m2.934s
user 3m53.578s user 3m53.508s
sys 19m44.440s sys 25m14.810s
$ time rm -rf ...
parent=0 parent=1
================== ==================
real 0m59.649s real 1m12.505s
user 0m41.196s user 0m47.489s
sys 13m9.566s sys 20m33.844s
Parent pointers increase the system time by 28% overhead to create 32
million files that are totally empty. Removing them incurs a system
time increase of 56%. Wall time increases by 9% and 22%.
For most filesystems, each file tends to have a single owner and not
that many xattrs. If the xattr structure is shortform, then all xattr
changes are logged with the inode and do not require the the xattr
intent mechanism to persist the parent pointer.
Therefore, we can speed up parent pointer operations by calling the
shortform xattr functions directly if the child's xattr is in short
format. Now the overhead looks like:
$ time fs_mark -D 10000 -S 0 -n 100000 -s 0 -L 8 -d ...
parent=0 parent=1
================== ==================
real 0m58.030s real 1m0.983s
user 3m54.141s user 3m53.758s
sys 19m57.003s sys 21m30.605s
$ time rm -rf ...
parent=0 parent=1
================== ==================
real 0m58.911s real 1m4.420s
user 0m41.329s user 0m45.169s
sys 13m27.857s sys 15m58.564s
Now parent pointers only increase the system time by 8% for creation and
19% for deletion. Wall time increases by 5% and 9% now.
Close the performance gap by creating helpers for the attr set, remove,
and replace operations that will try to make direct shortform updates,
and fall back to the attr intent machinery if that doesn't work. This
works for regular xattrs and for parent pointers.
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
382 lines
9.7 KiB
C
382 lines
9.7 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright (c) 2022-2024 Oracle.
|
|
* All rights reserved.
|
|
*/
|
|
#include "xfs_platform.h"
|
|
#include "xfs_fs.h"
|
|
#include "xfs_format.h"
|
|
#include "xfs_da_format.h"
|
|
#include "xfs_log_format.h"
|
|
#include "xfs_shared.h"
|
|
#include "xfs_trans_resv.h"
|
|
#include "xfs_mount.h"
|
|
#include "xfs_bmap_btree.h"
|
|
#include "xfs_inode.h"
|
|
#include "xfs_error.h"
|
|
#include "xfs_trace.h"
|
|
#include "xfs_trans.h"
|
|
#include "xfs_da_btree.h"
|
|
#include "xfs_attr.h"
|
|
#include "xfs_dir2.h"
|
|
#include "xfs_dir2_priv.h"
|
|
#include "xfs_attr_sf.h"
|
|
#include "xfs_bmap.h"
|
|
#include "xfs_defer.h"
|
|
#include "xfs_log.h"
|
|
#include "xfs_xattr.h"
|
|
#include "xfs_parent.h"
|
|
#include "xfs_trans_space.h"
|
|
#include "xfs_attr_item.h"
|
|
#include "xfs_health.h"
|
|
#include "xfs_attr_leaf.h"
|
|
|
|
struct kmem_cache *xfs_parent_args_cache;
|
|
|
|
/*
|
|
* Parent pointer attribute handling.
|
|
*
|
|
* Because the attribute name is a filename component, it will never be longer
|
|
* than 255 bytes and must not contain nulls or slashes. These are roughly the
|
|
* same constraints that apply to attribute names.
|
|
*
|
|
* The attribute value must always be a struct xfs_parent_rec. This means the
|
|
* attribute will never be in remote format because 12 bytes is nowhere near
|
|
* xfs_attr_leaf_entsize_local_max() (~75% of block size).
|
|
*
|
|
* Creating a new parent attribute will always create a new attribute - there
|
|
* should never, ever be an existing attribute in the tree for a new inode.
|
|
* ENOSPC behavior is problematic - creating the inode without the parent
|
|
* pointer is effectively a corruption, so we allow parent attribute creation
|
|
* to dip into the reserve block pool to avoid unexpected ENOSPC errors from
|
|
* occurring.
|
|
*/
|
|
|
|
/* Return true if parent pointer attr name is valid. */
|
|
bool
|
|
xfs_parent_namecheck(
|
|
unsigned int attr_flags,
|
|
const void *name,
|
|
size_t length)
|
|
{
|
|
/*
|
|
* Parent pointers always use logged operations, so there should never
|
|
* be incomplete xattrs.
|
|
*/
|
|
if (attr_flags & XFS_ATTR_INCOMPLETE)
|
|
return false;
|
|
|
|
return xfs_dir2_namecheck(name, length);
|
|
}
|
|
|
|
/* Return true if parent pointer attr value is valid. */
|
|
bool
|
|
xfs_parent_valuecheck(
|
|
struct xfs_mount *mp,
|
|
const void *value,
|
|
size_t valuelen)
|
|
{
|
|
const struct xfs_parent_rec *rec = value;
|
|
|
|
if (!xfs_has_parent(mp))
|
|
return false;
|
|
|
|
/* The xattr value must be a parent record. */
|
|
if (valuelen != sizeof(struct xfs_parent_rec))
|
|
return false;
|
|
|
|
/* The parent record must be local. */
|
|
if (value == NULL)
|
|
return false;
|
|
|
|
/* The parent inumber must be valid. */
|
|
if (!xfs_verify_dir_ino(mp, be64_to_cpu(rec->p_ino)))
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
/* Compute the attribute name hash for a parent pointer. */
|
|
xfs_dahash_t
|
|
xfs_parent_hashval(
|
|
struct xfs_mount *mp,
|
|
const uint8_t *name,
|
|
int namelen,
|
|
xfs_ino_t parent_ino)
|
|
{
|
|
struct xfs_name xname = {
|
|
.name = name,
|
|
.len = namelen,
|
|
};
|
|
|
|
/*
|
|
* Use the same dirent name hash as would be used on the directory, but
|
|
* mix in the parent inode number to avoid collisions on hardlinked
|
|
* files with identical names but different parents.
|
|
*/
|
|
return xfs_dir2_hashname(mp, &xname) ^
|
|
upper_32_bits(parent_ino) ^ lower_32_bits(parent_ino);
|
|
}
|
|
|
|
/* Compute the attribute name hash from the xattr components. */
|
|
xfs_dahash_t
|
|
xfs_parent_hashattr(
|
|
struct xfs_mount *mp,
|
|
const uint8_t *name,
|
|
int namelen,
|
|
const void *value,
|
|
int valuelen)
|
|
{
|
|
const struct xfs_parent_rec *rec = value;
|
|
|
|
/* Requires a local attr value in xfs_parent_rec format */
|
|
if (valuelen != sizeof(struct xfs_parent_rec)) {
|
|
ASSERT(valuelen == sizeof(struct xfs_parent_rec));
|
|
return 0;
|
|
}
|
|
|
|
if (!value) {
|
|
ASSERT(value != NULL);
|
|
return 0;
|
|
}
|
|
|
|
return xfs_parent_hashval(mp, name, namelen, be64_to_cpu(rec->p_ino));
|
|
}
|
|
|
|
/*
|
|
* Initialize the parent pointer arguments structure. Caller must have zeroed
|
|
* the contents of @args. @tp is only required for updates.
|
|
*/
|
|
static void
|
|
xfs_parent_da_args_init(
|
|
struct xfs_da_args *args,
|
|
struct xfs_trans *tp,
|
|
struct xfs_parent_rec *rec,
|
|
struct xfs_inode *child,
|
|
xfs_ino_t owner,
|
|
const struct xfs_name *parent_name)
|
|
{
|
|
args->geo = child->i_mount->m_attr_geo;
|
|
args->whichfork = XFS_ATTR_FORK;
|
|
args->attr_filter = XFS_ATTR_PARENT;
|
|
args->op_flags = XFS_DA_OP_LOGGED | XFS_DA_OP_OKNOENT;
|
|
args->trans = tp;
|
|
args->dp = child;
|
|
args->owner = owner;
|
|
args->name = parent_name->name;
|
|
args->namelen = parent_name->len;
|
|
args->value = rec;
|
|
args->valuelen = sizeof(struct xfs_parent_rec);
|
|
xfs_attr_sethash(args);
|
|
}
|
|
|
|
/* Make sure the incore state is ready for a parent pointer query/update. */
|
|
static inline int
|
|
xfs_parent_iread_extents(
|
|
struct xfs_trans *tp,
|
|
struct xfs_inode *child)
|
|
{
|
|
/* Parent pointers require that the attr fork must exist. */
|
|
if (XFS_IS_CORRUPT(child->i_mount, !xfs_inode_has_attr_fork(child))) {
|
|
xfs_inode_mark_sick(child, XFS_SICK_INO_PARENT);
|
|
return -EFSCORRUPTED;
|
|
}
|
|
|
|
return xfs_iread_extents(tp, child, XFS_ATTR_FORK);
|
|
}
|
|
|
|
/* Add a parent pointer to reflect a dirent addition. */
|
|
int
|
|
xfs_parent_addname(
|
|
struct xfs_trans *tp,
|
|
struct xfs_parent_args *ppargs,
|
|
struct xfs_inode *dp,
|
|
const struct xfs_name *parent_name,
|
|
struct xfs_inode *child)
|
|
{
|
|
int error;
|
|
|
|
error = xfs_parent_iread_extents(tp, child);
|
|
if (error)
|
|
return error;
|
|
|
|
xfs_inode_to_parent_rec(&ppargs->rec, dp);
|
|
xfs_parent_da_args_init(&ppargs->args, tp, &ppargs->rec, child,
|
|
child->i_ino, parent_name);
|
|
|
|
return xfs_attr_setname(&ppargs->args, 0);
|
|
}
|
|
|
|
/* Remove a parent pointer to reflect a dirent removal. */
|
|
int
|
|
xfs_parent_removename(
|
|
struct xfs_trans *tp,
|
|
struct xfs_parent_args *ppargs,
|
|
struct xfs_inode *dp,
|
|
const struct xfs_name *parent_name,
|
|
struct xfs_inode *child)
|
|
{
|
|
int error;
|
|
|
|
error = xfs_parent_iread_extents(tp, child);
|
|
if (error)
|
|
return error;
|
|
|
|
xfs_inode_to_parent_rec(&ppargs->rec, dp);
|
|
xfs_parent_da_args_init(&ppargs->args, tp, &ppargs->rec, child,
|
|
child->i_ino, parent_name);
|
|
|
|
return xfs_attr_removename(&ppargs->args);
|
|
}
|
|
|
|
/* Replace one parent pointer with another to reflect a rename. */
|
|
int
|
|
xfs_parent_replacename(
|
|
struct xfs_trans *tp,
|
|
struct xfs_parent_args *ppargs,
|
|
struct xfs_inode *old_dp,
|
|
const struct xfs_name *old_name,
|
|
struct xfs_inode *new_dp,
|
|
const struct xfs_name *new_name,
|
|
struct xfs_inode *child)
|
|
{
|
|
int error;
|
|
|
|
error = xfs_parent_iread_extents(tp, child);
|
|
if (error)
|
|
return error;
|
|
|
|
xfs_inode_to_parent_rec(&ppargs->rec, old_dp);
|
|
xfs_parent_da_args_init(&ppargs->args, tp, &ppargs->rec, child,
|
|
child->i_ino, old_name);
|
|
|
|
xfs_inode_to_parent_rec(&ppargs->new_rec, new_dp);
|
|
|
|
ppargs->args.new_name = new_name->name;
|
|
ppargs->args.new_namelen = new_name->len;
|
|
ppargs->args.new_value = &ppargs->new_rec;
|
|
ppargs->args.new_valuelen = sizeof(struct xfs_parent_rec);
|
|
|
|
return xfs_attr_replacename(&ppargs->args, 0);
|
|
}
|
|
|
|
/*
|
|
* Extract parent pointer information from any parent pointer xattr into
|
|
* @parent_ino/gen. The last two parameters can be NULL pointers.
|
|
*
|
|
* Returns 0 if this is not a parent pointer xattr at all; or -EFSCORRUPTED for
|
|
* garbage.
|
|
*/
|
|
int
|
|
xfs_parent_from_attr(
|
|
struct xfs_mount *mp,
|
|
unsigned int attr_flags,
|
|
const unsigned char *name,
|
|
unsigned int namelen,
|
|
const void *value,
|
|
unsigned int valuelen,
|
|
xfs_ino_t *parent_ino,
|
|
uint32_t *parent_gen)
|
|
{
|
|
const struct xfs_parent_rec *rec = value;
|
|
|
|
ASSERT(attr_flags & XFS_ATTR_PARENT);
|
|
|
|
if (!xfs_parent_namecheck(attr_flags, name, namelen))
|
|
return -EFSCORRUPTED;
|
|
if (!xfs_parent_valuecheck(mp, value, valuelen))
|
|
return -EFSCORRUPTED;
|
|
|
|
if (parent_ino)
|
|
*parent_ino = be64_to_cpu(rec->p_ino);
|
|
if (parent_gen)
|
|
*parent_gen = be32_to_cpu(rec->p_gen);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Look up a parent pointer record (@parent_name -> @pptr) of @ip.
|
|
*
|
|
* Caller must hold at least ILOCK_SHARED. The scratchpad need not be
|
|
* initialized.
|
|
*
|
|
* Returns 0 if the pointer is found, -ENOATTR if there is no match, or a
|
|
* negative errno.
|
|
*/
|
|
int
|
|
xfs_parent_lookup(
|
|
struct xfs_trans *tp,
|
|
struct xfs_inode *ip,
|
|
const struct xfs_name *parent_name,
|
|
struct xfs_parent_rec *pptr,
|
|
struct xfs_da_args *scratch)
|
|
{
|
|
memset(scratch, 0, sizeof(struct xfs_da_args));
|
|
xfs_parent_da_args_init(scratch, tp, pptr, ip, ip->i_ino, parent_name);
|
|
return xfs_attr_get_ilocked(scratch);
|
|
}
|
|
|
|
/* Sanity-check a parent pointer before we try to perform repairs. */
|
|
static inline bool
|
|
xfs_parent_sanity_check(
|
|
struct xfs_mount *mp,
|
|
const struct xfs_name *parent_name,
|
|
const struct xfs_parent_rec *pptr)
|
|
{
|
|
if (!xfs_parent_namecheck(XFS_ATTR_PARENT, parent_name->name,
|
|
parent_name->len))
|
|
return false;
|
|
|
|
if (!xfs_parent_valuecheck(mp, pptr, sizeof(*pptr)))
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
/*
|
|
* Attach the parent pointer (@parent_name -> @pptr) to @ip immediately.
|
|
* Caller must not have a transaction or hold the ILOCK. This is for
|
|
* specialized repair functions only. The scratchpad need not be initialized.
|
|
*/
|
|
int
|
|
xfs_parent_set(
|
|
struct xfs_inode *ip,
|
|
xfs_ino_t owner,
|
|
const struct xfs_name *parent_name,
|
|
struct xfs_parent_rec *pptr,
|
|
struct xfs_da_args *scratch)
|
|
{
|
|
if (!xfs_parent_sanity_check(ip->i_mount, parent_name, pptr)) {
|
|
ASSERT(0);
|
|
return -EFSCORRUPTED;
|
|
}
|
|
|
|
memset(scratch, 0, sizeof(struct xfs_da_args));
|
|
xfs_parent_da_args_init(scratch, NULL, pptr, ip, owner, parent_name);
|
|
return xfs_attr_set(scratch, XFS_ATTRUPDATE_CREATE, false);
|
|
}
|
|
|
|
/*
|
|
* Remove the parent pointer (@parent_name -> @pptr) from @ip immediately.
|
|
* Caller must not have a transaction or hold the ILOCK. This is for
|
|
* specialized repair functions only. The scratchpad need not be initialized.
|
|
*/
|
|
int
|
|
xfs_parent_unset(
|
|
struct xfs_inode *ip,
|
|
xfs_ino_t owner,
|
|
const struct xfs_name *parent_name,
|
|
struct xfs_parent_rec *pptr,
|
|
struct xfs_da_args *scratch)
|
|
{
|
|
if (!xfs_parent_sanity_check(ip->i_mount, parent_name, pptr)) {
|
|
ASSERT(0);
|
|
return -EFSCORRUPTED;
|
|
}
|
|
|
|
memset(scratch, 0, sizeof(struct xfs_da_args));
|
|
xfs_parent_da_args_init(scratch, NULL, pptr, ip, owner, parent_name);
|
|
return xfs_attr_set(scratch, XFS_ATTRUPDATE_REMOVE, false);
|
|
}
|