overlayfs updates for 6.18

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEE9zuTYTs0RXF+Ke33EVvVyTe/1WoFAmjSe0kACgkQEVvVyTe/
 1Wo3hA/+KeDBSGjFS2NrdskthbXpYNid0blue/Rp2F12kqWEUQdRj95CMrgwjZXd
 o6oTwyDK2ieBCkgnmW8xHVrWArFp2eKqoBfDauxhK7ZuJzFxiTxlMbra6kSrhHxy
 J2lTZtcdlsWdwpJ8xYKv2Hs79BVOfP2rpQWql3WgWKJD0aiW/QFKx7pKVh4YLlnV
 C5xqOKBsMRQRwrTKnhL00nG5yKMGJ6REKNaGk8PT+XIuNuHXGfFs90dC8xIGi3pu
 DT/ZKYYj2Cnsj+lYXWKKI2NPmtr/LS7iYhKqIahmLPlA2gMtACbP6VTuh/aG9GH+
 KkcPRUpZAxUIqyR8dZ2X5P0SnSCQfIEjh1TTQqdfIas+7WIoTSyAx8P3kSetXZuJ
 gGYsd2JlCnNqNcZ23NZVm7oRLv/6v744Lgd6qURXlE7uWj5fSSvBDT37FH1zmZ09
 i5VTL5gtG0Zo70BNXys0b/ScnQDnFmlsSG2HK3EVXofvxPtFhQ451oDdweNswidQ
 mIggyn1cbMmFYextNF/vYmns1k5hBgrdpdNl5JBI5RnZkpHr1xKQpquoQ8CEzx48
 PMNBXgUtT7r4zKB2WkQOleXnffmEIOcl1C1sq059UR+eCbg76Td3C3qy3rFYaJ4y
 BRDEsWEAvBQ5vww+P6j0Wx2sML46m2/hH4c1HUzc790yRace01M=
 =RCDE
 -----END PGP SIGNATURE-----

Merge tag 'ovl-update-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/overlayfs/vfs

Pull overlayfs updates from Amir Goldstein:

 - Work by André Almeida to support case-insensitive overlayfs

   Underlying case-insensitive filesystems casefolding is per directory,
   but for overlayfs it is all-or-nothing. It supports layers where all
   directories are casefolded (with same encoding) or layers where no
   directories are casefolded.

 - A fix for a "bug" in Neil's ovl directory lock changes, which only
   manifested itself with casefold enabled layers which may return an
   unhashed negative dentry from lookup.

* tag 'ovl-update-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/overlayfs/vfs:
  ovl: make sure that ovl_create_real() returns a hashed dentry
  ovl: Support mounting case-insensitive enabled layers
  ovl: Check for casefold consistency when creating new dentries
  ovl: Add S_CASEFOLD as part of the inode flag to be copied
  ovl: Set case-insensitive dentry operations for ovl sb
  ovl: Ensure that all layers have the same encoding
  ovl: Create ovl_casefold() to support casefolded strncmp()
  ovl: Prepare for mounting case-insensitive enabled layers
  fs: Create sb_same_encoding() helper
  fs: Create sb_encoding() helper
This commit is contained in:
Linus Torvalds 2025-10-03 12:14:24 -07:00
commit cf06d791f8
12 changed files with 255 additions and 44 deletions

View File

@ -670,7 +670,7 @@ static int ovl_copy_up_metadata(struct ovl_copy_up_ctx *c, struct dentry *temp)
if (err)
return err;
if (inode->i_flags & OVL_COPY_I_FLAGS_MASK &&
if (inode->i_flags & OVL_FATTR_I_FLAGS_MASK &&
(S_ISREG(c->stat.mode) || S_ISDIR(c->stat.mode))) {
/*
* Copy the fileattr inode flags that are the source of already

View File

@ -187,6 +187,13 @@ struct dentry *ovl_create_real(struct ovl_fs *ofs, struct dentry *parent,
/* mkdir is special... */
newdentry = ovl_do_mkdir(ofs, dir, newdentry, attr->mode);
err = PTR_ERR_OR_ZERO(newdentry);
/* expect to inherit casefolding from workdir/upperdir */
if (!err && ofs->casefold != ovl_dentry_casefolded(newdentry)) {
pr_warn_ratelimited("wrong inherited casefold (%pd2)\n",
newdentry);
dput(newdentry);
err = -EINVAL;
}
break;
case S_IFCHR:
@ -205,12 +212,32 @@ struct dentry *ovl_create_real(struct ovl_fs *ofs, struct dentry *parent,
err = -EPERM;
}
}
if (!err && WARN_ON(!newdentry->d_inode)) {
if (err)
goto out;
if (WARN_ON(!newdentry->d_inode)) {
/*
* Not quite sure if non-instantiated dentry is legal or not.
* VFS doesn't seem to care so check and warn here.
*/
err = -EIO;
} else if (d_unhashed(newdentry)) {
struct dentry *d;
/*
* Some filesystems (i.e. casefolded) may return an unhashed
* negative dentry from the ovl_lookup_upper() call before
* ovl_create_real().
* In that case, lookup again after making the newdentry
* positive, so ovl_create_upper() always returns a hashed
* positive dentry.
*/
d = ovl_lookup_upper(ofs, newdentry->d_name.name, parent,
newdentry->d_name.len);
dput(newdentry);
if (IS_ERR_OR_NULL(d))
err = d ? PTR_ERR(d) : -ENOENT;
else
return d;
}
out:
if (err) {

View File

@ -1277,6 +1277,7 @@ struct inode *ovl_get_inode(struct super_block *sb,
}
ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev);
ovl_inode_init(inode, oip, ino, fsid);
WARN_ON_ONCE(!!IS_CASEFOLDED(inode) != ofs->casefold);
if (upperdentry && ovl_is_impuredir(sb, upperdentry))
ovl_set_flag(OVL_IMPURE, inode);

View File

@ -239,13 +239,14 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
char val;
/*
* We allow filesystems that are case-folding capable but deny composing
* ovl stack from case-folded directories. If someone has enabled case
* folding on a directory on underlying layer, the warranty of the ovl
* stack is voided.
* We allow filesystems that are case-folding capable as long as the
* layers are consistently enabled in the stack, enabled for every dir
* or disabled in all dirs. If someone has modified case folding on a
* directory on underlying layer, the warranty of the ovl stack is
* voided.
*/
if (ovl_dentry_casefolded(base)) {
warn = "case folded parent";
if (ofs->casefold != ovl_dentry_casefolded(base)) {
warn = "parent wrong casefold";
err = -ESTALE;
goto out_warn;
}
@ -259,8 +260,8 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
goto out_err;
}
if (ovl_dentry_casefolded(this)) {
warn = "case folded child";
if (ofs->casefold != ovl_dentry_casefolded(this)) {
warn = "child wrong casefold";
err = -EREMOTE;
goto out_warn;
}

View File

@ -820,10 +820,12 @@ struct inode *ovl_get_inode(struct super_block *sb,
struct ovl_inode_params *oip);
void ovl_copyattr(struct inode *to);
/* vfs fileattr flags read from overlay.protattr xattr to ovl inode */
#define OVL_PROT_I_FLAGS_MASK (S_APPEND | S_IMMUTABLE)
/* vfs fileattr flags copied from real to ovl inode */
#define OVL_FATTR_I_FLAGS_MASK (OVL_PROT_I_FLAGS_MASK | S_SYNC | S_NOATIME)
/* vfs inode flags copied from real to ovl inode */
#define OVL_COPY_I_FLAGS_MASK (S_SYNC | S_NOATIME | S_APPEND | S_IMMUTABLE)
/* vfs inode flags read from overlay.protattr xattr to ovl inode */
#define OVL_PROT_I_FLAGS_MASK (S_APPEND | S_IMMUTABLE)
#define OVL_COPY_I_FLAGS_MASK (OVL_FATTR_I_FLAGS_MASK | S_CASEFOLD)
/*
* fileattr flags copied from lower to upper inode on copy up.

View File

@ -91,6 +91,7 @@ struct ovl_fs {
struct mutex whiteout_lock;
/* r/o snapshot of upperdir sb's only taken on volatile mounts */
errseq_t errseq;
bool casefold;
};
/* Number of lower layers, not including data-only layers */

View File

@ -276,17 +276,26 @@ static int ovl_mount_dir(const char *name, struct path *path)
static int ovl_mount_dir_check(struct fs_context *fc, const struct path *path,
enum ovl_opt layer, const char *name, bool upper)
{
bool is_casefolded = ovl_dentry_casefolded(path->dentry);
struct ovl_fs_context *ctx = fc->fs_private;
struct ovl_fs *ofs = fc->s_fs_info;
if (!d_is_dir(path->dentry))
return invalfc(fc, "%s is not a directory", name);
/*
* Allow filesystems that are case-folding capable but deny composing
* ovl stack from case-folded directories.
* ovl stack from inconsistent case-folded directories.
*/
if (ovl_dentry_casefolded(path->dentry))
return invalfc(fc, "case-insensitive directory on %s not supported", name);
if (!ctx->casefold_set) {
ofs->casefold = is_casefolded;
ctx->casefold_set = true;
}
if (ofs->casefold != is_casefolded) {
return invalfc(fc, "case-%ssensitive directory on %s is inconsistent",
is_casefolded ? "in" : "", name);
}
if (ovl_dentry_weird(path->dentry))
return invalfc(fc, "filesystem on %s not supported", name);

View File

@ -33,6 +33,7 @@ struct ovl_fs_context {
struct ovl_opt_set set;
struct ovl_fs_context_layer *lower;
char *lowerdir_all; /* user provided lowerdir string */
bool casefold_set;
};
int ovl_init_fs_context(struct fs_context *fc);

View File

@ -27,6 +27,8 @@ struct ovl_cache_entry {
bool is_upper;
bool is_whiteout;
bool check_xwhiteout;
const char *c_name;
int c_len;
char name[];
};
@ -45,6 +47,7 @@ struct ovl_readdir_data {
struct list_head *list;
struct list_head middle;
struct ovl_cache_entry *first_maybe_whiteout;
struct unicode_map *map;
int count;
int err;
bool is_upper;
@ -66,6 +69,31 @@ static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
return rb_entry(n, struct ovl_cache_entry, node);
}
static int ovl_casefold(struct ovl_readdir_data *rdd, const char *str, int len,
char **dst)
{
const struct qstr qstr = { .name = str, .len = len };
char *cf_name;
int cf_len;
if (!IS_ENABLED(CONFIG_UNICODE) || !rdd->map || is_dot_dotdot(str, len))
return 0;
cf_name = kmalloc(NAME_MAX, GFP_KERNEL);
if (!cf_name) {
rdd->err = -ENOMEM;
return -ENOMEM;
}
cf_len = utf8_casefold(rdd->map, &qstr, cf_name, NAME_MAX);
if (cf_len > 0)
*dst = cf_name;
else
kfree(cf_name);
return cf_len;
}
static bool ovl_cache_entry_find_link(const char *name, int len,
struct rb_node ***link,
struct rb_node **parent)
@ -79,10 +107,10 @@ static bool ovl_cache_entry_find_link(const char *name, int len,
*parent = *newp;
tmp = ovl_cache_entry_from_node(*newp);
cmp = strncmp(name, tmp->name, len);
cmp = strncmp(name, tmp->c_name, len);
if (cmp > 0)
newp = &tmp->node.rb_right;
else if (cmp < 0 || len < tmp->len)
else if (cmp < 0 || len < tmp->c_len)
newp = &tmp->node.rb_left;
else
found = true;
@ -101,10 +129,10 @@ static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
while (node) {
struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
cmp = strncmp(name, p->name, len);
cmp = strncmp(name, p->c_name, len);
if (cmp > 0)
node = p->node.rb_right;
else if (cmp < 0 || len < p->len)
else if (cmp < 0 || len < p->c_len)
node = p->node.rb_left;
else
return p;
@ -145,6 +173,7 @@ static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd,
static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
const char *name, int len,
const char *c_name, int c_len,
u64 ino, unsigned int d_type)
{
struct ovl_cache_entry *p;
@ -167,6 +196,14 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
/* Defer check for overlay.whiteout to ovl_iterate() */
p->check_xwhiteout = rdd->in_xwhiteouts_dir && d_type == DT_REG;
if (c_name && c_name != name) {
p->c_name = c_name;
p->c_len = c_len;
} else {
p->c_name = p->name;
p->c_len = len;
}
if (d_type == DT_CHR) {
p->next_maybe_whiteout = rdd->first_maybe_whiteout;
rdd->first_maybe_whiteout = p;
@ -174,48 +211,62 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
return p;
}
static bool ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
const char *name, int len, u64 ino,
/* Return 0 for found, 1 for added, <0 for error */
static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
const char *name, int len,
const char *c_name, int c_len,
u64 ino,
unsigned int d_type)
{
struct rb_node **newp = &rdd->root->rb_node;
struct rb_node *parent = NULL;
struct ovl_cache_entry *p;
if (ovl_cache_entry_find_link(name, len, &newp, &parent))
return true;
if (ovl_cache_entry_find_link(c_name, c_len, &newp, &parent))
return 0;
p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
p = ovl_cache_entry_new(rdd, name, len, c_name, c_len, ino, d_type);
if (p == NULL) {
rdd->err = -ENOMEM;
return false;
return -ENOMEM;
}
list_add_tail(&p->l_node, rdd->list);
rb_link_node(&p->node, parent, newp);
rb_insert_color(&p->node, rdd->root);
return true;
return 1;
}
static bool ovl_fill_lowest(struct ovl_readdir_data *rdd,
/* Return 0 for found, 1 for added, <0 for error */
static int ovl_fill_lowest(struct ovl_readdir_data *rdd,
const char *name, int namelen,
const char *c_name, int c_len,
loff_t offset, u64 ino, unsigned int d_type)
{
struct ovl_cache_entry *p;
p = ovl_cache_entry_find(rdd->root, name, namelen);
p = ovl_cache_entry_find(rdd->root, c_name, c_len);
if (p) {
list_move_tail(&p->l_node, &rdd->middle);
return 0;
} else {
p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
p = ovl_cache_entry_new(rdd, name, namelen, c_name, c_len,
ino, d_type);
if (p == NULL)
rdd->err = -ENOMEM;
else
list_add_tail(&p->l_node, &rdd->middle);
}
return rdd->err == 0;
return rdd->err ?: 1;
}
static void ovl_cache_entry_free(struct ovl_cache_entry *p)
{
if (p->c_name != p->name)
kfree(p->c_name);
kfree(p);
}
void ovl_cache_free(struct list_head *list)
@ -224,7 +275,7 @@ void ovl_cache_free(struct list_head *list)
struct ovl_cache_entry *n;
list_for_each_entry_safe(p, n, list, l_node)
kfree(p);
ovl_cache_entry_free(p);
INIT_LIST_HEAD(list);
}
@ -260,12 +311,39 @@ static bool ovl_fill_merge(struct dir_context *ctx, const char *name,
{
struct ovl_readdir_data *rdd =
container_of(ctx, struct ovl_readdir_data, ctx);
struct ovl_fs *ofs = OVL_FS(rdd->dentry->d_sb);
const char *c_name = NULL;
char *cf_name = NULL;
int c_len = 0, ret;
if (ofs->casefold)
c_len = ovl_casefold(rdd, name, namelen, &cf_name);
if (rdd->err)
return false;
if (c_len <= 0) {
c_name = name;
c_len = namelen;
} else {
c_name = cf_name;
}
rdd->count++;
if (!rdd->is_lowest)
return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
ret = ovl_cache_entry_add_rb(rdd, name, namelen, c_name, c_len, ino, d_type);
else
return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type);
ret = ovl_fill_lowest(rdd, name, namelen, c_name, c_len, offset, ino, d_type);
/*
* If ret == 1, that means that c_name is being used as part of struct
* ovl_cache_entry and will be freed at ovl_cache_free(). Otherwise,
* c_name was found in the rb-tree so we can free it here.
*/
if (ret != 1 && c_name != name)
kfree(c_name);
return ret >= 0;
}
static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data *rdd)
@ -357,12 +435,18 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list,
.list = list,
.root = root,
.is_lowest = false,
.map = NULL,
};
int idx, next;
const struct ovl_layer *layer;
struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
for (idx = 0; idx != -1; idx = next) {
next = ovl_path_next(idx, dentry, &realpath, &layer);
if (ofs->casefold)
rdd.map = sb_encoding(realpath.dentry->d_sb);
rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry;
rdd.in_xwhiteouts_dir = layer->has_xwhiteouts &&
ovl_dentry_has_xwhiteouts(dentry);
@ -555,7 +639,7 @@ static bool ovl_fill_plain(struct dir_context *ctx, const char *name,
container_of(ctx, struct ovl_readdir_data, ctx);
rdd->count++;
p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
p = ovl_cache_entry_new(rdd, name, namelen, NULL, 0, ino, d_type);
if (p == NULL) {
rdd->err = -ENOMEM;
return false;
@ -595,7 +679,7 @@ static int ovl_dir_read_impure(const struct path *path, struct list_head *list,
}
if (p->ino == p->real_ino) {
list_del(&p->l_node);
kfree(p);
ovl_cache_entry_free(p);
} else {
struct rb_node **newp = &root->rb_node;
struct rb_node *parent = NULL;
@ -1023,7 +1107,7 @@ int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
del_entry:
list_del(&p->l_node);
kfree(p);
ovl_cache_entry_free(p);
}
return err;

View File

@ -161,6 +161,16 @@ static const struct dentry_operations ovl_dentry_operations = {
.d_weak_revalidate = ovl_dentry_weak_revalidate,
};
#if IS_ENABLED(CONFIG_UNICODE)
static const struct dentry_operations ovl_dentry_ci_operations = {
.d_real = ovl_d_real,
.d_revalidate = ovl_dentry_revalidate,
.d_weak_revalidate = ovl_dentry_weak_revalidate,
.d_hash = generic_ci_d_hash,
.d_compare = generic_ci_d_compare,
};
#endif
static struct kmem_cache *ovl_inode_cachep;
static struct inode *ovl_alloc_inode(struct super_block *sb)
@ -991,6 +1001,25 @@ static int ovl_get_data_fsid(struct ovl_fs *ofs)
return ofs->numfs;
}
/*
* Set the ovl sb encoding as the same one used by the first layer
*/
static int ovl_set_encoding(struct super_block *sb, struct super_block *fs_sb)
{
if (!sb_has_encoding(fs_sb))
return 0;
#if IS_ENABLED(CONFIG_UNICODE)
if (sb_has_strict_encoding(fs_sb)) {
pr_err("strict encoding not supported\n");
return -EINVAL;
}
sb->s_encoding = fs_sb->s_encoding;
sb->s_encoding_flags = fs_sb->s_encoding_flags;
#endif
return 0;
}
static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
struct ovl_fs_context *ctx, struct ovl_layer *layers)
@ -1024,6 +1053,12 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
if (ovl_upper_mnt(ofs)) {
ofs->fs[0].sb = ovl_upper_mnt(ofs)->mnt_sb;
ofs->fs[0].is_lower = false;
if (ofs->casefold) {
err = ovl_set_encoding(sb, ofs->fs[0].sb);
if (err)
return err;
}
}
nr_merged_lower = ctx->nr - ctx->nr_data;
@ -1083,6 +1118,19 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
l->name = NULL;
ofs->numlayer++;
ofs->fs[fsid].is_lower = true;
if (ofs->casefold) {
if (!ovl_upper_mnt(ofs) && !sb_has_encoding(sb)) {
err = ovl_set_encoding(sb, ofs->fs[fsid].sb);
if (err)
return err;
}
if (!sb_same_encoding(sb, mnt->mnt_sb)) {
pr_err("all layers must have the same encoding\n");
return -EINVAL;
}
}
}
/*
@ -1300,6 +1348,7 @@ static struct dentry *ovl_get_root(struct super_block *sb,
ovl_dentry_set_flag(OVL_E_CONNECTED, root);
ovl_set_upperdata(d_inode(root));
ovl_inode_init(d_inode(root), &oip, ino, fsid);
WARN_ON(!!IS_CASEFOLDED(d_inode(root)) != ofs->casefold);
ovl_dentry_init_flags(root, upperdentry, oe, DCACHE_OP_WEAK_REVALIDATE);
/* root keeps a reference of upperdentry */
dget(upperdentry);
@ -1307,6 +1356,19 @@ static struct dentry *ovl_get_root(struct super_block *sb,
return root;
}
static void ovl_set_d_op(struct super_block *sb)
{
#if IS_ENABLED(CONFIG_UNICODE)
struct ovl_fs *ofs = sb->s_fs_info;
if (ofs->casefold) {
set_default_d_op(sb, &ovl_dentry_ci_operations);
return;
}
#endif
set_default_d_op(sb, &ovl_dentry_operations);
}
int ovl_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct ovl_fs *ofs = sb->s_fs_info;
@ -1322,7 +1384,7 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc)
if (WARN_ON(fc->user_ns != current_user_ns()))
goto out_err;
set_default_d_op(sb, &ovl_dentry_operations);
ovl_set_d_op(sb);
err = -ENOMEM;
if (!ofs->creator_cred)

View File

@ -210,11 +210,11 @@ bool ovl_dentry_weird(struct dentry *dentry)
return true;
/*
* Allow filesystems that are case-folding capable but deny composing
* ovl stack from case-folded directories.
* Exceptionally for layers with casefold, we accept that they have
* their own hash and compare operations
*/
if (sb_has_encoding(dentry->d_sb))
return IS_CASEFOLDED(d_inode(dentry));
return false;
return dentry->d_flags & (DCACHE_OP_HASH | DCACHE_OP_COMPARE);
}

View File

@ -3739,12 +3739,35 @@ static inline bool generic_ci_validate_strict_name(struct inode *dir,
}
#endif
static inline bool sb_has_encoding(const struct super_block *sb)
static inline struct unicode_map *sb_encoding(const struct super_block *sb)
{
#if IS_ENABLED(CONFIG_UNICODE)
return !!sb->s_encoding;
return sb->s_encoding;
#else
return false;
return NULL;
#endif
}
static inline bool sb_has_encoding(const struct super_block *sb)
{
return !!sb_encoding(sb);
}
/*
* Compare if two super blocks have the same encoding and flags
*/
static inline bool sb_same_encoding(const struct super_block *sb1,
const struct super_block *sb2)
{
#if IS_ENABLED(CONFIG_UNICODE)
if (sb1->s_encoding == sb2->s_encoding)
return true;
return (sb1->s_encoding && sb2->s_encoding &&
(sb1->s_encoding->version == sb2->s_encoding->version) &&
(sb1->s_encoding_flags == sb2->s_encoding_flags));
#else
return true;
#endif
}