linux/fs/ceph/subvolume_metrics.h
Alex Markuze b1137e0b3d ceph: add subvolume metrics collection and reporting
Add complete infrastructure for per-subvolume I/O metrics collection
and reporting to the MDS. This enables administrators to monitor I/O
patterns at the subvolume granularity, which is useful for multi-tenant
CephFS deployments.

This patch adds:
- CEPHFS_FEATURE_SUBVOLUME_METRICS feature flag for MDS negotiation
- CEPH_SUBVOLUME_ID_NONE constant (0) for unknown/unset state
- Red-black tree based metrics tracker for efficient per-subvolume
  aggregation with kmem_cache for entry allocations
- Wire format encoding matching the MDS C++ AggregatedIOMetrics struct
- Integration with the existing CLIENT_METRICS message
- Recording of I/O operations from file read/write and writeback paths
- Debugfs interfaces for monitoring (metrics/subvolumes, metrics/metric_features)

Metrics tracked per subvolume include:
- Read/write operation counts
- Read/write byte counts
- Read/write latency sums (for average calculation)

The metrics are periodically sent to the MDS as part of the existing
metrics reporting infrastructure when the MDS advertises support for
the SUBVOLUME_METRICS feature.

CEPH_SUBVOLUME_ID_NONE enforces subvolume_id immutability. Following
the FUSE client convention, 0 means unknown/unset. Once an inode has
a valid (non-zero) subvolume_id, it should not change during the
inode's lifetime.

Signed-off-by: Alex Markuze <amarkuze@redhat.com>
Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
2026-04-22 01:40:23 +02:00

98 lines
3.8 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _FS_CEPH_SUBVOLUME_METRICS_H
#define _FS_CEPH_SUBVOLUME_METRICS_H
#include <linux/types.h>
#include <linux/rbtree.h>
#include <linux/spinlock.h>
#include <linux/ktime.h>
#include <linux/atomic.h>
struct seq_file;
struct ceph_mds_client;
struct ceph_inode_info;
/**
* struct ceph_subvol_metric_snapshot - Point-in-time snapshot of subvolume metrics
* @subvolume_id: Subvolume identifier (inode number of subvolume root)
* @read_ops: Number of read operations since last snapshot
* @write_ops: Number of write operations since last snapshot
* @read_bytes: Total bytes read since last snapshot
* @write_bytes: Total bytes written since last snapshot
* @read_latency_us: Sum of read latencies in microseconds (for avg calculation)
* @write_latency_us: Sum of write latencies in microseconds (for avg calculation)
*/
struct ceph_subvol_metric_snapshot {
u64 subvolume_id;
u64 read_ops;
u64 write_ops;
u64 read_bytes;
u64 write_bytes;
u64 read_latency_us;
u64 write_latency_us;
};
/**
* struct ceph_subvolume_metrics_tracker - Tracks per-subvolume I/O metrics
* @lock: Protects @tree and @nr_entries during concurrent access
* @tree: Red-black tree of per-subvolume entries, keyed by subvolume_id
* @nr_entries: Number of entries currently in @tree
* @enabled: Whether collection is enabled (requires MDS feature support)
* @snapshot_attempts: Debug counter: total ceph_subvolume_metrics_snapshot() calls
* @snapshot_empty: Debug counter: snapshots that found no data to report
* @snapshot_failures: Debug counter: snapshots that failed to allocate memory
* @record_calls: Debug counter: total ceph_subvolume_metrics_record() calls
* @record_disabled: Debug counter: record calls skipped because disabled
* @record_no_subvol: Debug counter: record calls skipped (no subvolume_id)
* @total_read_ops: Cumulative read ops across all snapshots (never reset)
* @total_read_bytes: Cumulative bytes read across all snapshots (never reset)
* @total_write_ops: Cumulative write ops across all snapshots (never reset)
* @total_write_bytes: Cumulative bytes written across all snapshots (never reset)
*/
struct ceph_subvolume_metrics_tracker {
spinlock_t lock;
struct rb_root_cached tree;
u32 nr_entries;
bool enabled;
atomic64_t snapshot_attempts;
atomic64_t snapshot_empty;
atomic64_t snapshot_failures;
atomic64_t record_calls;
atomic64_t record_disabled;
atomic64_t record_no_subvol;
atomic64_t total_read_ops;
atomic64_t total_read_bytes;
atomic64_t total_write_ops;
atomic64_t total_write_bytes;
};
void ceph_subvolume_metrics_init(struct ceph_subvolume_metrics_tracker *tracker);
void ceph_subvolume_metrics_destroy(struct ceph_subvolume_metrics_tracker *tracker);
void ceph_subvolume_metrics_enable(struct ceph_subvolume_metrics_tracker *tracker,
bool enable);
void ceph_subvolume_metrics_record(struct ceph_subvolume_metrics_tracker *tracker,
u64 subvol_id, bool is_write,
size_t size, u64 latency_us);
int ceph_subvolume_metrics_snapshot(struct ceph_subvolume_metrics_tracker *tracker,
struct ceph_subvol_metric_snapshot **out,
u32 *nr, bool consume);
void ceph_subvolume_metrics_free_snapshot(struct ceph_subvol_metric_snapshot *snapshot);
void ceph_subvolume_metrics_dump(struct ceph_subvolume_metrics_tracker *tracker,
struct seq_file *s);
void ceph_subvolume_metrics_record_io(struct ceph_mds_client *mdsc,
struct ceph_inode_info *ci,
bool is_write, size_t bytes,
ktime_t start, ktime_t end);
static inline bool ceph_subvolume_metrics_enabled(
const struct ceph_subvolume_metrics_tracker *tracker)
{
return READ_ONCE(tracker->enabled);
}
int __init ceph_subvolume_metrics_cache_init(void);
void ceph_subvolume_metrics_cache_destroy(void);
#endif /* _FS_CEPH_SUBVOLUME_METRICS_H */