mirror of
https://github.com/torvalds/linux.git
synced 2026-05-12 16:18:45 +02:00
ceph: parse subvolume_id from InodeStat v9 and store in inode
Add support for parsing the subvolume_id field from InodeStat v9 and storing it in the inode for later use by subvolume metrics tracking. The subvolume_id identifies which CephFS subvolume an inode belongs to, enabling per-subvolume I/O metrics collection and reporting. This patch: - Adds subvolume_id field to struct ceph_mds_reply_info_in - Adds i_subvolume_id field to struct ceph_inode_info - Parses subvolume_id from v9 InodeStat in parse_reply_info_in() - Adds ceph_inode_set_subvolume() helper to propagate the ID to inodes - Initializes i_subvolume_id in inode allocation and clears on destroy Signed-off-by: Alex Markuze <amarkuze@redhat.com> Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
This commit is contained in:
parent
e58103caff
commit
4a1c543479
|
|
@ -638,6 +638,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
|
|||
|
||||
ci->i_max_bytes = 0;
|
||||
ci->i_max_files = 0;
|
||||
ci->i_subvolume_id = CEPH_SUBVOLUME_ID_NONE;
|
||||
|
||||
memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout));
|
||||
memset(&ci->i_cached_layout, 0, sizeof(ci->i_cached_layout));
|
||||
|
|
@ -742,6 +743,8 @@ void ceph_evict_inode(struct inode *inode)
|
|||
|
||||
percpu_counter_dec(&mdsc->metric.total_inodes);
|
||||
|
||||
ci->i_subvolume_id = CEPH_SUBVOLUME_ID_NONE;
|
||||
|
||||
netfs_wait_for_outstanding_io(inode);
|
||||
truncate_inode_pages_final(&inode->i_data);
|
||||
if (inode_state_read_once(inode) & I_PINNING_NETFS_WB)
|
||||
|
|
@ -873,6 +876,40 @@ int ceph_fill_file_size(struct inode *inode, int issued,
|
|||
return queue_trunc;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the subvolume ID for an inode.
|
||||
*
|
||||
* The subvolume_id identifies which CephFS subvolume this inode belongs to.
|
||||
* CEPH_SUBVOLUME_ID_NONE (0) means unknown/unset - the MDS only sends
|
||||
* non-zero IDs for inodes within subvolumes.
|
||||
*
|
||||
* An inode's subvolume membership is immutable - once an inode is created
|
||||
* in a subvolume, it stays there. Therefore, if we already have a valid
|
||||
* (non-zero) subvolume_id and receive a different one, that indicates a bug.
|
||||
*/
|
||||
void ceph_inode_set_subvolume(struct inode *inode, u64 subvolume_id)
|
||||
{
|
||||
struct ceph_inode_info *ci;
|
||||
u64 old;
|
||||
|
||||
if (!inode || subvolume_id == CEPH_SUBVOLUME_ID_NONE)
|
||||
return;
|
||||
|
||||
ci = ceph_inode(inode);
|
||||
old = READ_ONCE(ci->i_subvolume_id);
|
||||
|
||||
if (old == subvolume_id)
|
||||
return;
|
||||
|
||||
if (old != CEPH_SUBVOLUME_ID_NONE) {
|
||||
/* subvolume_id should not change once set */
|
||||
WARN_ON_ONCE(1);
|
||||
return;
|
||||
}
|
||||
|
||||
WRITE_ONCE(ci->i_subvolume_id, subvolume_id);
|
||||
}
|
||||
|
||||
void ceph_fill_file_time(struct inode *inode, int issued,
|
||||
u64 time_warp_seq, struct timespec64 *ctime,
|
||||
struct timespec64 *mtime, struct timespec64 *atime)
|
||||
|
|
@ -1076,6 +1113,7 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
|
|||
new_issued = ~issued & info_caps;
|
||||
|
||||
__ceph_update_quota(ci, iinfo->max_bytes, iinfo->max_files);
|
||||
ceph_inode_set_subvolume(inode, iinfo->subvolume_id);
|
||||
|
||||
#ifdef CONFIG_FS_ENCRYPTION
|
||||
if (iinfo->fscrypt_auth_len &&
|
||||
|
|
@ -1583,6 +1621,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
|
|||
goto done;
|
||||
}
|
||||
if (parent_dir) {
|
||||
ceph_inode_set_subvolume(parent_dir,
|
||||
rinfo->diri.subvolume_id);
|
||||
err = ceph_fill_inode(parent_dir, NULL, &rinfo->diri,
|
||||
rinfo->dirfrag, session, -1,
|
||||
&req->r_caps_reservation);
|
||||
|
|
@ -1671,6 +1711,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
|
|||
BUG_ON(!req->r_target_inode);
|
||||
|
||||
in = req->r_target_inode;
|
||||
ceph_inode_set_subvolume(in, rinfo->targeti.subvolume_id);
|
||||
err = ceph_fill_inode(in, req->r_locked_page, &rinfo->targeti,
|
||||
NULL, session,
|
||||
(!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) &&
|
||||
|
|
|
|||
|
|
@ -96,19 +96,19 @@ static int parse_reply_info_quota(void **p, void *end,
|
|||
return -EIO;
|
||||
}
|
||||
|
||||
/*
|
||||
* parse individual inode info
|
||||
*/
|
||||
static int parse_reply_info_in(void **p, void *end,
|
||||
struct ceph_mds_reply_info_in *info,
|
||||
u64 features)
|
||||
u64 features,
|
||||
struct ceph_mds_client *mdsc)
|
||||
{
|
||||
int err = 0;
|
||||
u8 struct_v = 0;
|
||||
u8 struct_compat = 0;
|
||||
u32 struct_len = 0;
|
||||
|
||||
info->subvolume_id = CEPH_SUBVOLUME_ID_NONE;
|
||||
|
||||
if (features == (u64)-1) {
|
||||
u32 struct_len;
|
||||
u8 struct_compat;
|
||||
ceph_decode_8_safe(p, end, struct_v, bad);
|
||||
ceph_decode_8_safe(p, end, struct_compat, bad);
|
||||
/* struct_v is expected to be >= 1. we only understand
|
||||
|
|
@ -252,6 +252,10 @@ static int parse_reply_info_in(void **p, void *end,
|
|||
ceph_decode_skip_n(p, end, v8_struct_len, bad);
|
||||
}
|
||||
|
||||
/* struct_v 9 added subvolume_id */
|
||||
if (struct_v >= 9)
|
||||
ceph_decode_64_safe(p, end, info->subvolume_id, bad);
|
||||
|
||||
*p = end;
|
||||
} else {
|
||||
/* legacy (unversioned) struct */
|
||||
|
|
@ -384,12 +388,13 @@ static int parse_reply_info_lease(void **p, void *end,
|
|||
*/
|
||||
static int parse_reply_info_trace(void **p, void *end,
|
||||
struct ceph_mds_reply_info_parsed *info,
|
||||
u64 features)
|
||||
u64 features,
|
||||
struct ceph_mds_client *mdsc)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (info->head->is_dentry) {
|
||||
err = parse_reply_info_in(p, end, &info->diri, features);
|
||||
err = parse_reply_info_in(p, end, &info->diri, features, mdsc);
|
||||
if (err < 0)
|
||||
goto out_bad;
|
||||
|
||||
|
|
@ -409,7 +414,8 @@ static int parse_reply_info_trace(void **p, void *end,
|
|||
}
|
||||
|
||||
if (info->head->is_target) {
|
||||
err = parse_reply_info_in(p, end, &info->targeti, features);
|
||||
err = parse_reply_info_in(p, end, &info->targeti, features,
|
||||
mdsc);
|
||||
if (err < 0)
|
||||
goto out_bad;
|
||||
}
|
||||
|
|
@ -430,7 +436,8 @@ static int parse_reply_info_trace(void **p, void *end,
|
|||
*/
|
||||
static int parse_reply_info_readdir(void **p, void *end,
|
||||
struct ceph_mds_request *req,
|
||||
u64 features)
|
||||
u64 features,
|
||||
struct ceph_mds_client *mdsc)
|
||||
{
|
||||
struct ceph_mds_reply_info_parsed *info = &req->r_reply_info;
|
||||
struct ceph_client *cl = req->r_mdsc->fsc->client;
|
||||
|
|
@ -545,7 +552,7 @@ static int parse_reply_info_readdir(void **p, void *end,
|
|||
rde->name_len = oname.len;
|
||||
|
||||
/* inode */
|
||||
err = parse_reply_info_in(p, end, &rde->inode, features);
|
||||
err = parse_reply_info_in(p, end, &rde->inode, features, mdsc);
|
||||
if (err < 0)
|
||||
goto out_bad;
|
||||
/* ceph_readdir_prepopulate() will update it */
|
||||
|
|
@ -753,7 +760,8 @@ static int parse_reply_info_extra(void **p, void *end,
|
|||
if (op == CEPH_MDS_OP_GETFILELOCK)
|
||||
return parse_reply_info_filelock(p, end, info, features);
|
||||
else if (op == CEPH_MDS_OP_READDIR || op == CEPH_MDS_OP_LSSNAP)
|
||||
return parse_reply_info_readdir(p, end, req, features);
|
||||
return parse_reply_info_readdir(p, end, req, features,
|
||||
req->r_mdsc);
|
||||
else if (op == CEPH_MDS_OP_CREATE)
|
||||
return parse_reply_info_create(p, end, info, features, s);
|
||||
else if (op == CEPH_MDS_OP_GETVXATTR)
|
||||
|
|
@ -782,7 +790,8 @@ static int parse_reply_info(struct ceph_mds_session *s, struct ceph_msg *msg,
|
|||
ceph_decode_32_safe(&p, end, len, bad);
|
||||
if (len > 0) {
|
||||
ceph_decode_need(&p, end, len, bad);
|
||||
err = parse_reply_info_trace(&p, p+len, info, features);
|
||||
err = parse_reply_info_trace(&p, p + len, info, features,
|
||||
s->s_mdsc);
|
||||
if (err < 0)
|
||||
goto out_bad;
|
||||
}
|
||||
|
|
@ -791,7 +800,7 @@ static int parse_reply_info(struct ceph_mds_session *s, struct ceph_msg *msg,
|
|||
ceph_decode_32_safe(&p, end, len, bad);
|
||||
if (len > 0) {
|
||||
ceph_decode_need(&p, end, len, bad);
|
||||
err = parse_reply_info_extra(&p, p+len, req, features, s);
|
||||
err = parse_reply_info_extra(&p, p + len, req, features, s);
|
||||
if (err < 0)
|
||||
goto out_bad;
|
||||
}
|
||||
|
|
@ -3989,6 +3998,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
|
|||
goto out_err;
|
||||
}
|
||||
req->r_target_inode = in;
|
||||
ceph_inode_set_subvolume(in, rinfo->targeti.subvolume_id);
|
||||
}
|
||||
|
||||
mutex_lock(&session->s_mutex);
|
||||
|
|
|
|||
|
|
@ -118,6 +118,7 @@ struct ceph_mds_reply_info_in {
|
|||
u32 fscrypt_file_len;
|
||||
u64 rsnaps;
|
||||
u64 change_attr;
|
||||
u64 subvolume_id;
|
||||
};
|
||||
|
||||
struct ceph_mds_reply_dir_entry {
|
||||
|
|
|
|||
|
|
@ -398,6 +398,15 @@ struct ceph_inode_info {
|
|||
/* quotas */
|
||||
u64 i_max_bytes, i_max_files;
|
||||
|
||||
/*
|
||||
* Subvolume ID this inode belongs to. CEPH_SUBVOLUME_ID_NONE (0)
|
||||
* means unknown/unset, matching the FUSE client convention.
|
||||
* Once set to a valid (non-zero) value, it should not change
|
||||
* during the inode's lifetime.
|
||||
*/
|
||||
#define CEPH_SUBVOLUME_ID_NONE 0
|
||||
u64 i_subvolume_id;
|
||||
|
||||
s32 i_dir_pin;
|
||||
|
||||
struct rb_root i_fragtree;
|
||||
|
|
@ -1069,6 +1078,7 @@ extern struct inode *ceph_get_inode(struct super_block *sb,
|
|||
extern struct inode *ceph_get_snapdir(struct inode *parent);
|
||||
extern int ceph_fill_file_size(struct inode *inode, int issued,
|
||||
u32 truncate_seq, u64 truncate_size, u64 size);
|
||||
extern void ceph_inode_set_subvolume(struct inode *inode, u64 subvolume_id);
|
||||
extern void ceph_fill_file_time(struct inode *inode, int issued,
|
||||
u64 time_warp_seq, struct timespec64 *ctime,
|
||||
struct timespec64 *mtime,
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user