From 7412dee9f1fd3e224202b633fdfa6eeaebe0307e Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Fri, 16 Apr 2021 11:43:47 +0200 Subject: [PATCH 1/6] mmc: meson-gx: replace WARN_ONCE with dev_warn_once about scatterlist size alignment in block mode Since commit e085b51c74cc ("mmc: meson-gx: check for scatterlist size alignment in block mode"), support for SDIO SD_IO_RW_EXTENDED transferts are properly filtered but some driver like brcmfmac still gives a block sg buffer size not aligned with SDIO block, triggerring a WARN_ONCE() with scary stacktrace even if the transfer works fine but with possible degraded performances. Simply replace with dev_warn_once() to inform user this should be fixed to avoid degraded performance. This should be ultimately fixed in brcmfmac, but since it's only a performance issue the warning should be removed. Fixes: e085b51c74cc ("mmc: meson-gx: check for scatterlist size alignment in block mode") Reported-by: Marek Szyprowski Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20210416094347.2015896-1-narmstrong@baylibre.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/meson-gx-mmc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index eb6c02bc4a02..b8b771b643cc 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -247,8 +247,9 @@ static void meson_mmc_get_transfer_mode(struct mmc_host *mmc, */ for_each_sg(data->sg, sg, data->sg_len, i) { if (sg->length % data->blksz) { - WARN_ONCE(1, "unaligned sg len %u blksize %u\n", - sg->length, data->blksz); + dev_warn_once(mmc_dev(mmc), + "unaligned sg len %u blksize %u, disabling descriptor DMA for transfer\n", + sg->length, data->blksz); return; } } From 0e1e71d34901a633825cd5ae78efaf8abd9215c6 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 19 Apr 2021 14:23:12 -0400 Subject: [PATCH 2/6] tracing: Fix checking event hash pointer logic when tp_printk is enabled Pointers in events that are printed are unhashed if the flags allow it, and the logic to do so is called before processing the event output from the raw ring buffer. In most cases, this is done when a user reads one of the trace files. But if tp_printk is added on the kernel command line, this logic is done for trace events when they are triggered, and their output goes out via printk. The unhash logic (and even the validation of the output) did not support the tp_printk output, and would crash. Link: https://lore.kernel.org/linux-tegra/9835d9f1-8d3a-3440-c53f-516c2606ad07@nvidia.com/ Fixes: efbbdaa22bb7 ("tracing: Show real address for trace event arguments") Reported-by: Jon Hunter Tested-by: Jon Hunter Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5c777627212f..c0c9aa5cd8e2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3545,7 +3545,11 @@ static char *trace_iter_expand_format(struct trace_iterator *iter) { char *tmp; - if (iter->fmt == static_fmt_buf) + /* + * iter->tr is NULL when used with tp_printk, which makes + * this get called where it is not safe to call krealloc(). + */ + if (!iter->tr || iter->fmt == static_fmt_buf) return NULL; tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE, @@ -3566,7 +3570,7 @@ const char *trace_event_format(struct trace_iterator *iter, const char *fmt) if (WARN_ON_ONCE(!fmt)) return fmt; - if (iter->tr->trace_flags & TRACE_ITER_HASH_PTR) + if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR) return fmt; p = fmt; @@ -9692,7 +9696,7 @@ void __init early_trace_init(void) { if (tracepoint_printk) { tracepoint_print_iter = - kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL); + kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL); if (MEM_FAIL(!tracepoint_print_iter, "Failed to allocate trace iterator\n")) tracepoint_printk = 0; From db2e718a47984b9d71ed890eb2ea36ecf150de18 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Tue, 20 Apr 2021 08:43:34 -0500 Subject: [PATCH 3/6] capabilities: require CAP_SETFCAP to map uid 0 cap_setfcap is required to create file capabilities. Since commit 8db6c34f1dbc ("Introduce v3 namespaced file capabilities"), a process running as uid 0 but without cap_setfcap is able to work around this as follows: unshare a new user namespace which maps parent uid 0 into the child namespace. While this task will not have new capabilities against the parent namespace, there is a loophole due to the way namespaced file capabilities are represented as xattrs. File capabilities valid in userns 1 are distinguished from file capabilities valid in userns 2 by the kuid which underlies uid 0. Therefore the restricted root process can unshare a new self-mapping namespace, add a namespaced file capability onto a file, then use that file capability in the parent namespace. To prevent that, do not allow mapping parent uid 0 if the process which opened the uid_map file does not have CAP_SETFCAP, which is the capability for setting file capabilities. As a further wrinkle: a task can unshare its user namespace, then open its uid_map file itself, and map (only) its own uid. In this case we do not have the credential from before unshare, which was potentially more restricted. So, when creating a user namespace, we record whether the creator had CAP_SETFCAP. Then we can use that during map_write(). With this patch: 1. Unprivileged user can still unshare -Ur ubuntu@caps:~$ unshare -Ur root@caps:~# logout 2. Root user can still unshare -Ur ubuntu@caps:~$ sudo bash root@caps:/home/ubuntu# unshare -Ur root@caps:/home/ubuntu# logout 3. Root user without CAP_SETFCAP cannot unshare -Ur: root@caps:/home/ubuntu# /sbin/capsh --drop=cap_setfcap -- root@caps:/home/ubuntu# /sbin/setcap cap_setfcap=p /sbin/setcap unable to set CAP_SETFCAP effective capability: Operation not permitted root@caps:/home/ubuntu# unshare -Ur unshare: write failed /proc/self/uid_map: Operation not permitted Note: an alternative solution would be to allow uid 0 mappings by processes without CAP_SETFCAP, but to prevent such a namespace from writing any file capabilities. This approach can be seen at [1]. Background history: commit 95ebabde382 ("capabilities: Don't allow writing ambiguous v3 file capabilities") tried to fix the issue by preventing v3 fscaps to be written to disk when the root uid would map to the same uid in nested user namespaces. This led to regressions for various workloads. For example, see [2]. Ultimately this is a valid use-case we have to support meaning we had to revert this change in 3b0c2d3eaa83 ("Revert 95ebabde382c ("capabilities: Don't allow writing ambiguous v3 file capabilities")"). Link: https://git.kernel.org/pub/scm/linux/kernel/git/sergeh/linux.git/log/?h=2021-04-15/setfcap-nsfscaps-v4 [1] Link: https://github.com/containers/buildah/issues/3071 [2] Signed-off-by: Serge Hallyn Reviewed-by: Andrew G. Morgan Tested-by: Christian Brauner Reviewed-by: Christian Brauner Tested-by: Giuseppe Scrivano Cc: Eric Biederman Signed-off-by: Linus Torvalds --- include/linux/user_namespace.h | 3 ++ include/uapi/linux/capability.h | 3 +- kernel/user_namespace.c | 65 +++++++++++++++++++++++++++++++-- 3 files changed, 67 insertions(+), 4 deletions(-) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 64cf8ebdc4ec..f6c5f784be5a 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -63,6 +63,9 @@ struct user_namespace { kgid_t group; struct ns_common ns; unsigned long flags; + /* parent_could_setfcap: true if the creator if this ns had CAP_SETFCAP + * in its effective capability set at the child ns creation time. */ + bool parent_could_setfcap; #ifdef CONFIG_KEYS /* List of joinable keyrings in this namespace. Modification access of diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h index c6ca33034147..2ddb4226cd23 100644 --- a/include/uapi/linux/capability.h +++ b/include/uapi/linux/capability.h @@ -335,7 +335,8 @@ struct vfs_ns_cap_data { #define CAP_AUDIT_CONTROL 30 -/* Set or remove capabilities on files */ +/* Set or remove capabilities on files. + Map uid=0 into a child user namespace. */ #define CAP_SETFCAP 31 diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index af612945a4d0..9a4b980d695b 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -106,6 +106,7 @@ int create_user_ns(struct cred *new) if (!ns) goto fail_dec; + ns->parent_could_setfcap = cap_raised(new->cap_effective, CAP_SETFCAP); ret = ns_alloc_inum(&ns->ns); if (ret) goto fail_free; @@ -841,6 +842,60 @@ static int sort_idmaps(struct uid_gid_map *map) return 0; } +/** + * verify_root_map() - check the uid 0 mapping + * @file: idmapping file + * @map_ns: user namespace of the target process + * @new_map: requested idmap + * + * If a process requests mapping parent uid 0 into the new ns, verify that the + * process writing the map had the CAP_SETFCAP capability as the target process + * will be able to write fscaps that are valid in ancestor user namespaces. + * + * Return: true if the mapping is allowed, false if not. + */ +static bool verify_root_map(const struct file *file, + struct user_namespace *map_ns, + struct uid_gid_map *new_map) +{ + int idx; + const struct user_namespace *file_ns = file->f_cred->user_ns; + struct uid_gid_extent *extent0 = NULL; + + for (idx = 0; idx < new_map->nr_extents; idx++) { + if (new_map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) + extent0 = &new_map->extent[idx]; + else + extent0 = &new_map->forward[idx]; + if (extent0->lower_first == 0) + break; + + extent0 = NULL; + } + + if (!extent0) + return true; + + if (map_ns == file_ns) { + /* The process unshared its ns and is writing to its own + * /proc/self/uid_map. User already has full capabilites in + * the new namespace. Verify that the parent had CAP_SETFCAP + * when it unshared. + * */ + if (!file_ns->parent_could_setfcap) + return false; + } else { + /* Process p1 is writing to uid_map of p2, who is in a child + * user namespace to p1's. Verify that the opener of the map + * file has CAP_SETFCAP against the parent of the new map + * namespace */ + if (!file_ns_capable(file, map_ns->parent, CAP_SETFCAP)) + return false; + } + + return true; +} + static ssize_t map_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos, int cap_setid, @@ -848,7 +903,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, struct uid_gid_map *parent_map) { struct seq_file *seq = file->private_data; - struct user_namespace *ns = seq->private; + struct user_namespace *map_ns = seq->private; struct uid_gid_map new_map; unsigned idx; struct uid_gid_extent extent; @@ -895,7 +950,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, /* * Adjusting namespace settings requires capabilities on the target. */ - if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN)) + if (cap_valid(cap_setid) && !file_ns_capable(file, map_ns, CAP_SYS_ADMIN)) goto out; /* Parse the user data */ @@ -965,7 +1020,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, ret = -EPERM; /* Validate the user is allowed to use user id's mapped to. */ - if (!new_idmap_permitted(file, ns, cap_setid, &new_map)) + if (!new_idmap_permitted(file, map_ns, cap_setid, &new_map)) goto out; ret = -EPERM; @@ -1086,6 +1141,10 @@ static bool new_idmap_permitted(const struct file *file, struct uid_gid_map *new_map) { const struct cred *cred = file->f_cred; + + if (cap_setid == CAP_SETUID && !verify_root_map(file, ns, new_map)) + return false; + /* Don't allow mappings that would allow anything that wouldn't * be allowed without the establishment of unprivileged mappings. */ From 9d5171eab462a63e2fbebfccf6026e92be018f20 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 21 Apr 2021 15:42:47 -0700 Subject: [PATCH 4/6] KEYS: trusted: Fix TPM reservation for seal/unseal The original patch 8c657a0590de ("KEYS: trusted: Reserve TPM for seal and unseal operations") was correct on the mailing list: https://lore.kernel.org/linux-integrity/20210128235621.127925-4-jarkko@kernel.org/ But somehow got rebased so that the tpm_try_get_ops() in tpm2_seal_trusted() got lost. This causes an imbalanced put of the TPM ops and causes oopses on TIS based hardware. This fix puts back the lost tpm_try_get_ops() Fixes: 8c657a0590de ("KEYS: trusted: Reserve TPM for seal and unseal operations") Reported-by: Mimi Zohar Acked-by: Mimi Zohar Signed-off-by: James Bottomley --- security/keys/trusted-keys/trusted_tpm2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/keys/trusted-keys/trusted_tpm2.c b/security/keys/trusted-keys/trusted_tpm2.c index e2a0ed5d02f0..c87c4df8703d 100644 --- a/security/keys/trusted-keys/trusted_tpm2.c +++ b/security/keys/trusted-keys/trusted_tpm2.c @@ -79,7 +79,7 @@ int tpm2_seal_trusted(struct tpm_chip *chip, if (i == ARRAY_SIZE(tpm2_hash_map)) return -EINVAL; - rc = tpm_buf_init(&buf, TPM2_ST_SESSIONS, TPM2_CC_CREATE); + rc = tpm_try_get_ops(chip); if (rc) return rc; From a9d064524fc3cf463b3bb14fa63de78aafb40dab Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Mon, 12 Apr 2021 17:55:12 +0800 Subject: [PATCH 5/6] vhost-vdpa: protect concurrent access to vhost device iotlb Protect vhost device iotlb by vhost_dev->mutex. Otherwise, it might cause corruption of the list and interval tree in struct vhost_iotlb if userspace sends the VHOST_IOTLB_MSG_V2 message concurrently. Fixes: 4c8cf318("vhost: introduce vDPA-based backend") Cc: stable@vger.kernel.org Signed-off-by: Xie Yongji Acked-by: Jason Wang Reviewed-by: Stefano Garzarella Link: https://lore.kernel.org/r/20210412095512.178-1-xieyongji@bytedance.com Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index e0a27e336293..bfa4c6ef554e 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -745,9 +745,11 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, const struct vdpa_config_ops *ops = vdpa->config; int r = 0; + mutex_lock(&dev->mutex); + r = vhost_dev_check_owner(dev); if (r) - return r; + goto unlock; switch (msg->type) { case VHOST_IOTLB_UPDATE: @@ -768,6 +770,8 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, r = -EINVAL; break; } +unlock: + mutex_unlock(&dev->mutex); return r; } From be286f84e33da1a7f83142b64dbd86f600e73363 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sun, 11 Apr 2021 11:36:46 +0300 Subject: [PATCH 6/6] vdpa/mlx5: Set err = -ENOMEM in case dma_map_sg_attrs fails Set err = -ENOMEM if dma_map_sg_attrs() fails so the function reutrns error. Fixes: 94abbccdf291 ("vdpa/mlx5: Add shared memory registration code") Signed-off-by: Eli Cohen Reported-by: kernel test robot Reported-by: Dan Carpenter Link: https://lore.kernel.org/r/20210411083646.910546-1-elic@nvidia.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Reviewed-by: Stefano Garzarella --- drivers/vdpa/mlx5/core/mr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index 3908ff28eec0..800cfd1967ad 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -278,8 +278,10 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr mr->log_size = log_entity_size; mr->nsg = nsg; mr->nent = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); - if (!mr->nent) + if (!mr->nent) { + err = -ENOMEM; goto err_map; + } err = create_direct_mr(mvdev, mr); if (err)