From 49e065f5049f2b8d69f29ddc641498f46108dd12 Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Thu, 21 Dec 2017 21:10:36 -0500 Subject: [PATCH 001/519] KVM/Eventfd: Avoid crash when assign and deassign specific eventfd in parallel. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b5020a8e6b54d2ece80b1e7dedb33c79a40ebd47 upstream. Syzbot reports crashes in kvm_irqfd_assign(), caused by use-after-free when kvm_irqfd_assign() and kvm_irqfd_deassign() run in parallel for one specific eventfd. When the assign path hasn't finished but irqfd has been added to kvm->irqfds.items list, another thead may deassign the eventfd and free struct kvm_kernel_irqfd(). The assign path then uses the struct kvm_kernel_irqfd that has been freed by deassign path. To avoid such issue, keep irqfd under kvm->irq_srcu protection after the irqfd has been added to kvm->irqfds.items list, and call synchronize_srcu() in irq_shutdown() to make sure that irqfd has been fully initialized in the assign path. Reported-by: Dmitry Vyukov Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Dmitry Vyukov Signed-off-by: Tianyu Lan Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- virt/kvm/eventfd.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 49001fa84ead..1203829316b2 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -119,8 +119,12 @@ irqfd_shutdown(struct work_struct *work) { struct kvm_kernel_irqfd *irqfd = container_of(work, struct kvm_kernel_irqfd, shutdown); + struct kvm *kvm = irqfd->kvm; u64 cnt; + /* Make sure irqfd has been initalized in assign path. */ + synchronize_srcu(&kvm->irq_srcu); + /* * Synchronize with the wait-queue and unhook ourselves to prevent * further events. @@ -387,7 +391,6 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) idx = srcu_read_lock(&kvm->irq_srcu); irqfd_update(kvm, irqfd); - srcu_read_unlock(&kvm->irq_srcu, idx); list_add_tail(&irqfd->list, &kvm->irqfds.items); @@ -419,6 +422,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) irqfd->consumer.token, ret); #endif + srcu_read_unlock(&kvm->irq_srcu, idx); return 0; fail: From d0f4cd75aee1d5e55303de347f5a89431480ec7b Mon Sep 17 00:00:00 2001 From: Dewet Thibaut Date: Mon, 16 Jul 2018 10:49:27 +0200 Subject: [PATCH 002/519] x86/MCE: Remove min interval polling limitation commit fbdb328c6bae0a7c78d75734a738b66b86dffc96 upstream. commit b3b7c4795c ("x86/MCE: Serialize sysfs changes") introduced a min interval limitation when setting the check interval for polled MCEs. However, the logic is that 0 disables polling for corrected MCEs, see Documentation/x86/x86_64/machinecheck. The limitation prevents disabling. Remove this limitation and allow the value 0 to disable polling again. Fixes: b3b7c4795c ("x86/MCE: Serialize sysfs changes") Signed-off-by: Dewet Thibaut Signed-off-by: Alexander Sverdlin [ Massage commit message. ] Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Tony Luck Cc: linux-edac Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20180716084927.24869-1-alexander.sverdlin@nokia.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mcheck/mce.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index ddc9b8125918..7b8c8c838191 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -2294,9 +2294,6 @@ static ssize_t store_int_with_restart(struct device *s, if (check_interval == old_check_interval) return ret; - if (check_interval < 1) - check_interval = 1; - mutex_lock(&mce_sysfs_mutex); mce_restart(); mutex_unlock(&mce_sysfs_mutex); From 0cb6eaf5e5be88ddfda51100adc7149d00a28bb2 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Fri, 20 Jul 2018 17:53:42 -0700 Subject: [PATCH 003/519] fat: fix memory allocation failure handling of match_strdup() commit 35033ab988c396ad7bce3b6d24060c16a9066db8 upstream. In parse_options(), if match_strdup() failed, parse_options() leaves opts->iocharset in unexpected state (i.e. still pointing the freed string). And this can be the cause of double free. To fix, this initialize opts->iocharset always when freeing. Link: http://lkml.kernel.org/r/8736wp9dzc.fsf@mail.parknet.co.jp Signed-off-by: OGAWA Hirofumi Reported-by: syzbot+90b8e10515ae88228a92@syzkaller.appspotmail.com Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/fat/inode.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index cf644d52c0cf..c81cfb79a339 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -613,13 +613,21 @@ static void fat_set_state(struct super_block *sb, brelse(bh); } +static void fat_reset_iocharset(struct fat_mount_options *opts) +{ + if (opts->iocharset != fat_default_iocharset) { + /* Note: opts->iocharset can be NULL here */ + kfree(opts->iocharset); + opts->iocharset = fat_default_iocharset; + } +} + static void delayed_free(struct rcu_head *p) { struct msdos_sb_info *sbi = container_of(p, struct msdos_sb_info, rcu); unload_nls(sbi->nls_disk); unload_nls(sbi->nls_io); - if (sbi->options.iocharset != fat_default_iocharset) - kfree(sbi->options.iocharset); + fat_reset_iocharset(&sbi->options); kfree(sbi); } @@ -1034,7 +1042,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, opts->fs_fmask = opts->fs_dmask = current_umask(); opts->allow_utime = -1; opts->codepage = fat_default_codepage; - opts->iocharset = fat_default_iocharset; + fat_reset_iocharset(opts); if (is_vfat) { opts->shortname = VFAT_SFN_DISPLAY_WINNT|VFAT_SFN_CREATE_WIN95; opts->rodir = 0; @@ -1184,8 +1192,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, /* vfat specific */ case Opt_charset: - if (opts->iocharset != fat_default_iocharset) - kfree(opts->iocharset); + fat_reset_iocharset(opts); iocharset = match_strdup(&args[0]); if (!iocharset) return -ENOMEM; @@ -1776,8 +1783,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, iput(fat_inode); unload_nls(sbi->nls_io); unload_nls(sbi->nls_disk); - if (sbi->options.iocharset != fat_default_iocharset) - kfree(sbi->options.iocharset); + fat_reset_iocharset(&sbi->options); sb->s_fs_info = NULL; kfree(sbi); return error; From 01b6ca65e10f2669965fbc62440cb9b09a25d086 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 17 Jul 2018 17:26:43 +0200 Subject: [PATCH 004/519] ALSA: rawmidi: Change resized buffers atomically commit 39675f7a7c7e7702f7d5341f1e0d01db746543a0 upstream. The SNDRV_RAWMIDI_IOCTL_PARAMS ioctl may resize the buffers and the current code is racy. For example, the sequencer client may write to buffer while it being resized. As a simple workaround, let's switch to the resized buffer inside the stream runtime lock. Reported-by: syzbot+52f83f0ea8df16932f7f@syzkaller.appspotmail.com Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/rawmidi.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index 16f8124b1150..59111cadaec2 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -635,7 +635,7 @@ static int snd_rawmidi_info_select_user(struct snd_card *card, int snd_rawmidi_output_params(struct snd_rawmidi_substream *substream, struct snd_rawmidi_params * params) { - char *newbuf; + char *newbuf, *oldbuf; struct snd_rawmidi_runtime *runtime = substream->runtime; if (substream->append && substream->use_count > 1) @@ -648,13 +648,17 @@ int snd_rawmidi_output_params(struct snd_rawmidi_substream *substream, return -EINVAL; } if (params->buffer_size != runtime->buffer_size) { - newbuf = krealloc(runtime->buffer, params->buffer_size, - GFP_KERNEL); + newbuf = kmalloc(params->buffer_size, GFP_KERNEL); if (!newbuf) return -ENOMEM; + spin_lock_irq(&runtime->lock); + oldbuf = runtime->buffer; runtime->buffer = newbuf; runtime->buffer_size = params->buffer_size; runtime->avail = runtime->buffer_size; + runtime->appl_ptr = runtime->hw_ptr = 0; + spin_unlock_irq(&runtime->lock); + kfree(oldbuf); } runtime->avail_min = params->avail_min; substream->active_sensing = !params->no_active_sensing; @@ -665,7 +669,7 @@ EXPORT_SYMBOL(snd_rawmidi_output_params); int snd_rawmidi_input_params(struct snd_rawmidi_substream *substream, struct snd_rawmidi_params * params) { - char *newbuf; + char *newbuf, *oldbuf; struct snd_rawmidi_runtime *runtime = substream->runtime; snd_rawmidi_drain_input(substream); @@ -676,12 +680,16 @@ int snd_rawmidi_input_params(struct snd_rawmidi_substream *substream, return -EINVAL; } if (params->buffer_size != runtime->buffer_size) { - newbuf = krealloc(runtime->buffer, params->buffer_size, - GFP_KERNEL); + newbuf = kmalloc(params->buffer_size, GFP_KERNEL); if (!newbuf) return -ENOMEM; + spin_lock_irq(&runtime->lock); + oldbuf = runtime->buffer; runtime->buffer = newbuf; runtime->buffer_size = params->buffer_size; + runtime->appl_ptr = runtime->hw_ptr = 0; + spin_unlock_irq(&runtime->lock); + kfree(oldbuf); } runtime->avail_min = params->avail_min; return 0; From b3e0971a733e67d15aa114a4b42b4f4e67618b55 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Thu, 28 Jun 2018 16:59:14 -0700 Subject: [PATCH 005/519] ARC: Fix CONFIG_SWAP commit 6e3761145a9ba3ce267c330b6bff51cf6a057b06 upstream. swap was broken on ARC due to silly copy-paste issue. We encode offset from swapcache page in __swp_entry() as (off << 13) but were not decoding back in __swp_offset() as (off >> 13) - it was still (off << 13). This finally fixes swap usage on ARC. | # mkswap /dev/sda2 | | # swapon -a -e /dev/sda2 | Adding 500728k swap on /dev/sda2. Priority:-2 extents:1 across:500728k | | # free | total used free shared buffers cached | Mem: 765104 13456 751648 4736 8 4736 | -/+ buffers/cache: 8712 756392 | Swap: 500728 0 500728 Cc: stable@vger.kernel.org Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index e5fec320f158..c07d7b0a4058 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -372,7 +372,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, /* Decode a PTE containing swap "identifier "into constituents */ #define __swp_type(pte_lookalike) (((pte_lookalike).val) & 0x1f) -#define __swp_offset(pte_lookalike) ((pte_lookalike).val << 13) +#define __swp_offset(pte_lookalike) ((pte_lookalike).val >> 13) /* NOPs, to keep generic kernel happy */ #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) From cfebfe7a80e35b582fd8d7f6400f9a1d10003583 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 11 Jul 2018 10:42:20 -0700 Subject: [PATCH 006/519] ARC: mm: allow mprotect to make stack mappings executable commit 93312b6da4df31e4102ce5420e6217135a16c7ea upstream. mprotect(EXEC) was failing for stack mappings as default vm flags was missing MAYEXEC. This was triggered by glibc test suite nptl/tst-execstack testcase What is surprising is that despite running LTP for years on, we didn't catch this issue as it lacks a directed test case. gcc dejagnu tests with nested functions also requiring exec stack work fine though because they rely on the GNU_STACK segment spit out by compiler and handled in kernel elf loader. This glibc case is different as the stack is non exec to begin with and a dlopen of shared lib with GNU_STACK segment triggers the exec stack proceedings using a mprotect(PROT_EXEC) which was broken. CC: stable@vger.kernel.org Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/page.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index 429957f1c236..8f1145ed0046 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -102,7 +102,7 @@ typedef pte_t * pgtable_t; #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) /* Default Permissions for stack/heaps pages (Non Executable) */ -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE) +#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #define WANT_PAGE_VIRTUAL 1 From 08a0dc770c40f9d28c28d21c4728a329e489a57b Mon Sep 17 00:00:00 2001 From: Jing Xia Date: Fri, 20 Jul 2018 17:53:48 -0700 Subject: [PATCH 007/519] mm: memcg: fix use after free in mem_cgroup_iter() commit 9f15bde671355c351cf20d9f879004b234353100 upstream. It was reported that a kernel crash happened in mem_cgroup_iter(), which can be triggered if the legacy cgroup-v1 non-hierarchical mode is used. Unable to handle kernel paging request at virtual address 6b6b6b6b6b6b8f ...... Call trace: mem_cgroup_iter+0x2e0/0x6d4 shrink_zone+0x8c/0x324 balance_pgdat+0x450/0x640 kswapd+0x130/0x4b8 kthread+0xe8/0xfc ret_from_fork+0x10/0x20 mem_cgroup_iter(): ...... if (css_tryget(css)) <-- crash here break; ...... The crashing reason is that mem_cgroup_iter() uses the memcg object whose pointer is stored in iter->position, which has been freed before and filled with POISON_FREE(0x6b). And the root cause of the use-after-free issue is that invalidate_reclaim_iterators() fails to reset the value of iter->position to NULL when the css of the memcg is released in non- hierarchical mode. Link: http://lkml.kernel.org/r/1531994807-25639-1-git-send-email-jing.xia@unisoc.com Fixes: 6df38689e0e9 ("mm: memcontrol: fix possible memcg leak due to interrupted reclaim") Signed-off-by: Jing Xia Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Vladimir Davydov Cc: Cc: Shakeel Butt Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memcontrol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 55a9facb8e8d..9a8e688724b1 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -996,7 +996,7 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg) int nid, zid; int i; - while ((memcg = parent_mem_cgroup(memcg))) { + for (; memcg; memcg = parent_mem_cgroup(memcg)) { for_each_node(nid) { for (zid = 0; zid < MAX_NR_ZONES; zid++) { mz = &memcg->nodeinfo[nid]->zoneinfo[zid]; From 5a95ecebc7083533c8ac384146de991c29145aee Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Thu, 5 Jul 2018 18:49:23 +0000 Subject: [PATCH 008/519] ipv4: Return EINVAL when ping_group_range sysctl doesn't map to user ns [ Upstream commit 70ba5b6db96ff7324b8cfc87e0d0383cf59c9677 ] The low and high values of the net.ipv4.ping_group_range sysctl were being silently forced to the default disabled state when a write to the sysctl contained GIDs that didn't map to the associated user namespace. Confusingly, the sysctl's write operation would return success and then a subsequent read of the sysctl would indicate that the low and high values are the overflowgid. This patch changes the behavior by clearly returning an error when the sysctl write operation receives a GID range that doesn't map to the associated user namespace. In such a situation, the previous value of the sysctl is preserved and that range will be returned in a subsequent read of the sysctl. Signed-off-by: Tyler Hicks Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/sysctl_net_ipv4.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 75abf978ef30..da90c74d12ef 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -141,8 +141,9 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write, if (write && ret == 0) { low = make_kgid(user_ns, urange[0]); high = make_kgid(user_ns, urange[1]); - if (!gid_valid(low) || !gid_valid(high) || - (urange[1] < urange[0]) || gid_lt(high, low)) { + if (!gid_valid(low) || !gid_valid(high)) + return -EINVAL; + if (urange[1] < urange[0] || gid_lt(high, low)) { low = make_kgid(&init_user_ns, 1); high = make_kgid(&init_user_ns, 0); } From 2be7797acd1bb9ec30920b2aac29e474184ede4e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 17 Jul 2018 17:12:39 +0100 Subject: [PATCH 009/519] ipv6: fix useless rol32 call on hash [ Upstream commit 169dc027fb02492ea37a0575db6a658cf922b854 ] The rol32 call is currently rotating hash but the rol'd value is being discarded. I believe the current code is incorrect and hash should be assigned the rotated value returned from rol32. Thanks to David Lebrun for spotting this. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ipv6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 84f0d0602433..0e01d570fa22 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -762,7 +762,7 @@ static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, * to minimize possbility that any useful information to an * attacker is leaked. Only lower 20 bits are relevant. */ - rol32(hash, 16); + hash = rol32(hash, 16); flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK; From b67a684222441668cf326427d02c34a8dfedb6be Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 16 Jul 2018 13:26:13 -0700 Subject: [PATCH 010/519] lib/rhashtable: consider param->min_size when setting initial table size [ Upstream commit 107d01f5ba10f4162c38109496607eb197059064 ] rhashtable_init() currently does not take into account the user-passed min_size parameter unless param->nelem_hint is set as well. As such, the default size (number of buckets) will always be HASH_DEFAULT_SIZE even if the smallest allowed size is larger than that. Remediate this by unconditionally calling into rounded_hashtable_size() and handling things accordingly. Signed-off-by: Davidlohr Bueso Acked-by: Herbert Xu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- lib/rhashtable.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 51282f579760..37ea94b636a3 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -670,8 +670,16 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_stop); static size_t rounded_hashtable_size(const struct rhashtable_params *params) { - return max(roundup_pow_of_two(params->nelem_hint * 4 / 3), - (unsigned long)params->min_size); + size_t retsize; + + if (params->nelem_hint) + retsize = max(roundup_pow_of_two(params->nelem_hint * 4 / 3), + (unsigned long)params->min_size); + else + retsize = max(HASH_DEFAULT_SIZE, + (unsigned long)params->min_size); + + return retsize; } static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed) @@ -728,8 +736,6 @@ int rhashtable_init(struct rhashtable *ht, struct bucket_table *tbl; size_t size; - size = HASH_DEFAULT_SIZE; - if ((!params->key_len && !params->obj_hashfn) || (params->obj_hashfn && !params->obj_cmpfn)) return -EINVAL; @@ -756,8 +762,7 @@ int rhashtable_init(struct rhashtable *ht, ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE); - if (params->nelem_hint) - size = rounded_hashtable_size(&ht->p); + size = rounded_hashtable_size(&ht->p); /* The maximum (not average) chain length grows with the * size of the hash table, at a rate of (log N)/(log log N). From be64f9f7a253184b733072ccd69a90350e86c46d Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 7 Jul 2018 16:15:26 -0700 Subject: [PATCH 011/519] net/ipv4: Set oif in fib_compute_spec_dst [ Upstream commit e7372197e15856ec4ee66b668020a662994db103 ] Xin reported that icmp replies may not use the address on the device the echo request is received if the destination address is broadcast. Instead a route lookup is done without considering VRF context. Fix by setting oif in flow struct to the master device if it is enslaved. That directs the lookup to the VRF table. If the device is not enslaved, oif is still 0 so no affect. Fixes: cd2fbe1b6b51 ("net: Use VRF device index for lookups on RX") Reported-by: Xin Long Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fib_frontend.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c9e68ff48a72..8f05816a8be2 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -297,6 +297,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) { struct flowi4 fl4 = { .flowi4_iif = LOOPBACK_IFINDEX, + .flowi4_oif = l3mdev_master_ifindex_rcu(dev), .daddr = ip_hdr(skb)->saddr, .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), .flowi4_scope = scope, From 92b0c8dd9ea76cf215b1f740d06f8de430326289 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 3 Jul 2018 22:34:54 +0200 Subject: [PATCH 012/519] net: phy: fix flag masking in __set_phy_supported [ Upstream commit df8ed346d4a806a6eef2db5924285e839604b3f9 ] Currently also the pause flags are removed from phydev->supported because they're not included in PHY_DEFAULT_FEATURES. I don't think this is intended, especially when considering that this function can be called via phy_set_max_speed() anywhere in a driver. Change the masking to mask out only the values we're going to change. In addition remove the misleading comment, job of this small function is just to adjust the supported and advertised speeds. Fixes: f3a6bd393c2c ("phylib: Add phy_set_max_speed helper") Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phy_device.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 8179727d3423..1f2f25a71d18 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1265,11 +1265,8 @@ static int gen10g_resume(struct phy_device *phydev) static int __set_phy_supported(struct phy_device *phydev, u32 max_speed) { - /* The default values for phydev->supported are provided by the PHY - * driver "features" member, we want to reset to sane defaults first - * before supporting higher speeds. - */ - phydev->supported &= PHY_DEFAULT_FEATURES; + phydev->supported &= ~(PHY_1000BT_FEATURES | PHY_100BT_FEATURES | + PHY_10BT_FEATURES); switch (max_speed) { default: From 67aaf36e0da9b5008d6732520bcb4046f0cf8962 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 17 Jul 2018 20:17:33 -0500 Subject: [PATCH 013/519] ptp: fix missing break in switch [ Upstream commit 9ba8376ce1e2cbf4ce44f7e4bee1d0648e10d594 ] It seems that a *break* is missing in order to avoid falling through to the default case. Otherwise, checking *chan* makes no sense. Fixes: 72df7a7244c0 ("ptp: Allow reassigning calibration pin function") Signed-off-by: Gustavo A. R. Silva Acked-by: Richard Cochran Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/ptp/ptp_chardev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index da7bae991552..d877ff124365 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -88,6 +88,7 @@ int ptp_set_pinfunc(struct ptp_clock *ptp, unsigned int pin, case PTP_PF_PHYSYNC: if (chan != 0) return -EINVAL; + break; default: return -EINVAL; } From 8a82aee2d2f349afbfaee3754af7cf40c16c16a8 Mon Sep 17 00:00:00 2001 From: Sanjeev Bansal Date: Mon, 16 Jul 2018 11:13:32 +0530 Subject: [PATCH 014/519] tg3: Add higher cpu clock for 5762. [ Upstream commit 3a498606bb04af603a46ebde8296040b2de350d1 ] This patch has fix for TX timeout while running bi-directional traffic with 100 Mbps using 5762. Signed-off-by: Sanjeev Bansal Signed-off-by: Siva Reddy Kallam Reviewed-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/tg3.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 1325825d5225..ce3a56bea6e6 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -9278,6 +9278,15 @@ static int tg3_chip_reset(struct tg3 *tp) tg3_restore_clk(tp); + /* Increase the core clock speed to fix tx timeout issue for 5762 + * with 100Mbps link speed. + */ + if (tg3_asic_rev(tp) == ASIC_REV_5762) { + val = tr32(TG3_CPMU_CLCK_ORIDE_ENABLE); + tw32(TG3_CPMU_CLCK_ORIDE_ENABLE, val | + TG3_CPMU_MAC_ORIDE_ENABLE); + } + /* Reprobe ASF enable state. */ tg3_flag_clear(tp, ENABLE_ASF); tp->phy_flags &= ~(TG3_PHYFLG_1G_ON_VAUX_OK | From d629be850ac6e296dfe156604d7bb5202f1613da Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Wed, 11 Jul 2018 14:39:42 +0200 Subject: [PATCH 015/519] net: Don't copy pfmemalloc flag in __copy_skb_header() [ Upstream commit 8b7008620b8452728cadead460a36f64ed78c460 ] The pfmemalloc flag indicates that the skb was allocated from the PFMEMALLOC reserves, and the flag is currently copied on skb copy and clone. However, an skb copied from an skb flagged with pfmemalloc wasn't necessarily allocated from PFMEMALLOC reserves, and on the other hand an skb allocated that way might be copied from an skb that wasn't. So we should not copy the flag on skb copy, and rather decide whether to allow an skb to be associated with sockets unrelated to page reclaim depending only on how it was allocated. Move the pfmemalloc flag before headers_start[0] using an existing 1-bit hole, so that __copy_skb_header() doesn't copy it. When cloning, we'll now take care of this flag explicitly, contravening to the warning comment of __skb_clone(). While at it, restore the newline usage introduced by commit b19372273164 ("net: reorganize sk_buff for faster __copy_skb_header()") to visually separate bytes used in bitfields after headers_start[0], that was gone after commit a9e419dc7be6 ("netfilter: merge ctinfo into nfct pointer storage area"), and describe the pfmemalloc flag in the kernel-doc structure comment. This doesn't change the size of sk_buff or cacheline boundaries, but consolidates the 15 bits hole before tc_index into a 2 bytes hole before csum, that could now be filled more easily. Reported-by: Patrick Talbert Fixes: c93bdd0e03e8 ("netvm: allow skb allocation to use PFMEMALLOC reserves") Signed-off-by: Stefano Brivio Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/skbuff.h | 12 ++++++------ net/core/skbuff.c | 2 ++ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a6da214d0584..c28bd8be290a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -514,6 +514,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @hash: the packet hash * @queue_mapping: Queue mapping for multiqueue devices * @xmit_more: More SKBs are pending for this queue + * @pfmemalloc: skbuff was allocated from PFMEMALLOC reserves * @ndisc_nodetype: router type (from link layer) * @ooo_okay: allow the mapping of a socket to a queue to be changed * @l4_hash: indicate hash is a canonical 4-tuple hash over transport @@ -594,8 +595,8 @@ struct sk_buff { fclone:2, peeked:1, head_frag:1, - xmit_more:1; - /* one bit hole */ + xmit_more:1, + pfmemalloc:1; kmemcheck_bitfield_end(flags1); /* fields enclosed in headers_start/headers_end are copied @@ -615,19 +616,18 @@ struct sk_buff { __u8 __pkt_type_offset[0]; __u8 pkt_type:3; - __u8 pfmemalloc:1; __u8 ignore_df:1; __u8 nfctinfo:3; - __u8 nf_trace:1; + __u8 ip_summed:2; __u8 ooo_okay:1; __u8 l4_hash:1; __u8 sw_hash:1; __u8 wifi_acked_valid:1; __u8 wifi_acked:1; - __u8 no_fcs:1; + /* Indicates the inner headers are valid in the skbuff. */ __u8 encapsulation:1; __u8 encap_hdr_csum:1; @@ -635,11 +635,11 @@ struct sk_buff { __u8 csum_complete_sw:1; __u8 csum_level:2; __u8 csum_bad:1; - #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; #endif __u8 ipvs_property:1; + __u8 inner_protocol_type:1; __u8 remcsum_offload:1; /* 3 or 5 bit hole */ diff --git a/net/core/skbuff.c b/net/core/skbuff.c index fa02c680eebc..60ad04039d2a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -828,6 +828,8 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) n->cloned = 1; n->nohdr = 0; n->peeked = 0; + if (skb->pfmemalloc) + n->pfmemalloc = 1; n->destructor = NULL; C(tail); C(end); From 80a80f51cc3aeea3d7d9d2b859357f611de7a87d Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Fri, 13 Jul 2018 13:21:07 +0200 Subject: [PATCH 016/519] skbuff: Unconditionally copy pfmemalloc in __skb_clone() [ Upstream commit e78bfb0751d4e312699106ba7efbed2bab1a53ca ] Commit 8b7008620b84 ("net: Don't copy pfmemalloc flag in __copy_skb_header()") introduced a different handling for the pfmemalloc flag in copy and clone paths. In __skb_clone(), now, the flag is set only if it was set in the original skb, but not cleared if it wasn't. This is wrong and might lead to socket buffers being flagged with pfmemalloc even if the skb data wasn't allocated from pfmemalloc reserves. Copy the flag instead of ORing it. Reported-by: Sabrina Dubroca Fixes: 8b7008620b84 ("net: Don't copy pfmemalloc flag in __copy_skb_header()") Signed-off-by: Stefano Brivio Tested-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 60ad04039d2a..55be076706e5 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -828,8 +828,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) n->cloned = 1; n->nohdr = 0; n->peeked = 0; - if (skb->pfmemalloc) - n->pfmemalloc = 1; + C(pfmemalloc); n->destructor = NULL; C(tail); C(end); From fce27138ceeb47c2644d1bdabc36dfc4cf025e83 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 21 Jun 2018 16:19:41 +0300 Subject: [PATCH 017/519] xhci: Fix perceived dead host due to runtime suspend race with event handler commit 229bc19fd7aca4f37964af06e3583c1c8f36b5d6 upstream. Don't rely on event interrupt (EINT) bit alone to detect pending port change in resume. If no change event is detected the host may be suspended again, oterwise roothubs are resumed. There is a lag in xHC setting EINT. If we don't notice the pending change in resume, and the controller is runtime suspeded again, it causes the event handler to assume host is dead as it will fail to read xHC registers once PCI puts the controller to D3 state. [ 268.520969] xhci_hcd: xhci_resume: starting port polling. [ 268.520985] xhci_hcd: xhci_hub_status_data: stopping port polling. [ 268.521030] xhci_hcd: xhci_suspend: stopping port polling. [ 268.521040] xhci_hcd: // Setting command ring address to 0x349bd001 [ 268.521139] xhci_hcd: Port Status Change Event for port 3 [ 268.521149] xhci_hcd: resume root hub [ 268.521163] xhci_hcd: port resume event for port 3 [ 268.521168] xhci_hcd: xHC is not running. [ 268.521174] xhci_hcd: handle_port_status: starting port polling. [ 268.596322] xhci_hcd: xhci_hc_died: xHCI host controller not responding, assume dead The EINT lag is described in a additional note in xhci specs 4.19.2: "Due to internal xHC scheduling and system delays, there will be a lag between a change bit being set and the Port Status Change Event that it generated being written to the Event Ring. If SW reads the PORTSC and sees a change bit set, there is no guarantee that the corresponding Port Status Change Event has already been written into the Event Ring." Cc: Signed-off-by: Mathias Nyman Signed-off-by: Kai-Heng Feng Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 40 +++++++++++++++++++++++++++++++++++++--- drivers/usb/host/xhci.h | 4 ++++ 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index f2e9f59c90d6..2d837b6bd495 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -887,6 +887,41 @@ static void xhci_disable_port_wake_on_bits(struct xhci_hcd *xhci) spin_unlock_irqrestore(&xhci->lock, flags); } +static bool xhci_pending_portevent(struct xhci_hcd *xhci) +{ + __le32 __iomem **port_array; + int port_index; + u32 status; + u32 portsc; + + status = readl(&xhci->op_regs->status); + if (status & STS_EINT) + return true; + /* + * Checking STS_EINT is not enough as there is a lag between a change + * bit being set and the Port Status Change Event that it generated + * being written to the Event Ring. See note in xhci 1.1 section 4.19.2. + */ + + port_index = xhci->num_usb2_ports; + port_array = xhci->usb2_ports; + while (port_index--) { + portsc = readl(port_array[port_index]); + if (portsc & PORT_CHANGE_MASK || + (portsc & PORT_PLS_MASK) == XDEV_RESUME) + return true; + } + port_index = xhci->num_usb3_ports; + port_array = xhci->usb3_ports; + while (port_index--) { + portsc = readl(port_array[port_index]); + if (portsc & PORT_CHANGE_MASK || + (portsc & PORT_PLS_MASK) == XDEV_RESUME) + return true; + } + return false; +} + /* * Stop HC (not bus-specific) * @@ -983,7 +1018,7 @@ EXPORT_SYMBOL_GPL(xhci_suspend); */ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) { - u32 command, temp = 0, status; + u32 command, temp = 0; struct usb_hcd *hcd = xhci_to_hcd(xhci); struct usb_hcd *secondary_hcd; int retval = 0; @@ -1105,8 +1140,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) done: if (retval == 0) { /* Resume root hubs only when have pending events. */ - status = readl(&xhci->op_regs->status); - if (status & STS_EINT) { + if (xhci_pending_portevent(xhci)) { usb_hcd_resume_root_hub(xhci->shared_hcd); usb_hcd_resume_root_hub(hcd); } diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 1715705acc59..84d8871755b7 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -382,6 +382,10 @@ struct xhci_op_regs { #define PORT_PLC (1 << 22) /* port configure error change - port failed to configure its link partner */ #define PORT_CEC (1 << 23) +#define PORT_CHANGE_MASK (PORT_CSC | PORT_PEC | PORT_WRC | PORT_OCC | \ + PORT_RC | PORT_PLC | PORT_CEC) + + /* Cold Attach Status - xHC can set this bit to report device attached during * Sx state. Warm port reset should be perfomed to clear this bit and move port * to connected state. From f868639bf8896908ad45adf1e7c1f786bb3568cc Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 21 Jun 2018 09:23:24 -0700 Subject: [PATCH 018/519] x86/paravirt: Make native_save_fl() extern inline commit d0a8d9378d16eb3c69bd8e6d23779fbdbee3a8c7 upstream. native_save_fl() is marked static inline, but by using it as a function pointer in arch/x86/kernel/paravirt.c, it MUST be outlined. paravirt's use of native_save_fl() also requires that no GPRs other than %rax are clobbered. Compilers have different heuristics which they use to emit stack guard code, the emittance of which can break paravirt's callee saved assumption by clobbering %rcx. Marking a function definition extern inline means that if this version cannot be inlined, then the out-of-line version will be preferred. By having the out-of-line version be implemented in assembly, it cannot be instrumented with a stack protector, which might violate custom calling conventions that code like paravirt rely on. The semantics of extern inline has changed since gnu89. This means that folks using GCC versions >= 5.1 may see symbol redefinition errors at link time for subdirs that override KBUILD_CFLAGS (making the C standard used implicit) regardless of this patch. This has been cleaned up earlier in the patch set, but is left as a note in the commit message for future travelers. Reports: https://lkml.org/lkml/2018/5/7/534 https://github.com/ClangBuiltLinux/linux/issues/16 Discussion: https://bugs.llvm.org/show_bug.cgi?id=37512 https://lkml.org/lkml/2018/5/24/1371 Thanks to the many folks that participated in the discussion. [Backport for 4.4. 4.4 is missing commit 784d5699eddc "x86: move exports to actual definitions" which doesn't apply cleanly, and not really worth backporting IMO. It's simpler to change this patch from upstream: + #include rather than + #include ] Debugged-by: Alistair Strachan Debugged-by: Matthias Kaehlcke Suggested-by: Arnd Bergmann Suggested-by: H. Peter Anvin Suggested-by: Tom Stellar Reported-by: Sedat Dilek Tested-by: Sedat Dilek Signed-off-by: Nick Desaulniers Acked-by: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@redhat.com Cc: akataria@vmware.com Cc: akpm@linux-foundation.org Cc: andrea.parri@amarulasolutions.com Cc: ard.biesheuvel@linaro.org Cc: aryabinin@virtuozzo.com Cc: astrachan@google.com Cc: boris.ostrovsky@oracle.com Cc: brijesh.singh@amd.com Cc: caoj.fnst@cn.fujitsu.com Cc: geert@linux-m68k.org Cc: ghackmann@google.com Cc: gregkh@linuxfoundation.org Cc: jan.kiszka@siemens.com Cc: jarkko.sakkinen@linux.intel.com Cc: joe@perches.com Cc: jpoimboe@redhat.com Cc: keescook@google.com Cc: kirill.shutemov@linux.intel.com Cc: kstewart@linuxfoundation.org Cc: linux-efi@vger.kernel.org Cc: linux-kbuild@vger.kernel.org Cc: manojgupta@google.com Cc: mawilcox@microsoft.com Cc: michal.lkml@markovi.net Cc: mjg59@google.com Cc: mka@chromium.org Cc: pombredanne@nexb.com Cc: rientjes@google.com Cc: rostedt@goodmis.org Cc: thomas.lendacky@amd.com Cc: tweek@google.com Cc: virtualization@lists.linux-foundation.org Cc: will.deacon@arm.com Cc: yamada.masahiro@socionext.com Link: http://lkml.kernel.org/r/20180621162324.36656-4-ndesaulniers@google.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/irqflags.h | 2 +- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/irqflags.S | 26 ++++++++++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 arch/x86/kernel/irqflags.S diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index b77f5edb03b0..0056bc945cd1 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -8,7 +8,7 @@ * Interrupt control: */ -static inline unsigned long native_save_fl(void) +extern inline unsigned long native_save_fl(void) { unsigned long flags; diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index b1b78ffe01d0..7947cee61f61 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -41,6 +41,7 @@ obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o obj-y += tsc.o tsc_msr.o io_delay.o rtc.o obj-y += pci-iommu_table.o obj-y += resource.o +obj-y += irqflags.o obj-y += process.o obj-y += fpu/ diff --git a/arch/x86/kernel/irqflags.S b/arch/x86/kernel/irqflags.S new file mode 100644 index 000000000000..3817eb748eb4 --- /dev/null +++ b/arch/x86/kernel/irqflags.S @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include +#include + +/* + * unsigned long native_save_fl(void) + */ +ENTRY(native_save_fl) + pushf + pop %_ASM_AX + ret +ENDPROC(native_save_fl) +EXPORT_SYMBOL(native_save_fl) + +/* + * void native_restore_fl(unsigned long flags) + * %eax/%rdi: flags + */ +ENTRY(native_restore_fl) + push %_ASM_ARG1 + popf + ret +ENDPROC(native_restore_fl) +EXPORT_SYMBOL(native_restore_fl) From 8cee8b4cdd50c5f90f8c63b63bcfba6d1f3839b7 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 14 Jul 2018 02:29:15 -0700 Subject: [PATCH 019/519] x86/cpufeatures: Add CPUID_7_EDX CPUID leaf (cherry picked from commit 95ca0ee8636059ea2800dfbac9ecac6212d6b38f) This is a pure feature bits leaf. There are two AVX512 feature bits in it already which were handled as scattered bits, and three more from this leaf are going to be added for speculation control features. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Borislav Petkov Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-2-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeature.h | 7 +++++-- arch/x86/include/asm/cpufeatures.h | 6 +++++- arch/x86/include/asm/disabled-features.h | 3 ++- arch/x86/include/asm/required-features.h | 3 ++- arch/x86/kernel/cpu/common.c | 1 + 5 files changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index dd0089841a0f..d72c1db64679 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -28,6 +28,7 @@ enum cpuid_leafs CPUID_8000_000A_EDX, CPUID_7_ECX, CPUID_8000_0007_EBX, + CPUID_7_EDX, }; #ifdef CONFIG_X86_FEATURE_NAMES @@ -78,8 +79,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ REQUIRED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 18)) + BUILD_BUG_ON_ZERO(NCAPINTS != 19)) #define DISABLED_MASK_BIT_SET(feature_bit) \ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ @@ -100,8 +102,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ DISABLED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 18)) + BUILD_BUG_ON_ZERO(NCAPINTS != 19)) #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 205ce70c1d6c..da14ca894a15 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -12,7 +12,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 18 /* N 32-bit words worth of info */ +#define NCAPINTS 19 /* N 32-bit words worth of info */ #define NBUGINTS 1 /* N 32-bit bug flags */ /* @@ -285,6 +285,10 @@ #define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */ #define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */ +/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ +#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ +#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ + /* * BUG word(s) */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 21c5ac15657b..1f8cca459c6c 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -59,6 +59,7 @@ #define DISABLED_MASK15 0 #define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE) #define DISABLED_MASK17 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) +#define DISABLED_MASK18 0 +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index fac9a5c0abe9..6847d85400a8 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -100,6 +100,7 @@ #define REQUIRED_MASK15 0 #define REQUIRED_MASK16 0 #define REQUIRED_MASK17 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) +#define REQUIRED_MASK18 0 +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) #endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 736e2843139b..ac7c52682d33 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -695,6 +695,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); c->x86_capability[CPUID_7_0_EBX] = ebx; c->x86_capability[CPUID_7_ECX] = ecx; + c->x86_capability[CPUID_7_EDX] = edx; } /* Extended state features: level 0x0000000d */ From 7169b43e7c68edd550efa812c295685947ffa8a0 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 14 Jul 2018 02:29:24 -0700 Subject: [PATCH 020/519] x86/cpufeatures: Add Intel feature bits for Speculation Control (cherry picked from commit fc67dd70adb711a45d2ef34e12d1a8be75edde61) Add three feature bits exposed by new microcode on Intel CPUs for speculation control. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Borislav Petkov Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-3-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index da14ca894a15..f50e8576eb3b 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -288,6 +288,9 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */ +#define X86_FEATURE_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ /* * BUG word(s) From b00f820b5143a2fc0a9c859a52be2ef2244834ba Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 14 Jul 2018 02:29:33 -0700 Subject: [PATCH 021/519] x86/cpufeatures: Add AMD feature bits for Speculation Control (cherry picked from commit 5d10cbc91d9eb5537998b65608441b592eec65e7) AMD exposes the PRED_CMD/SPEC_CTRL MSRs slightly differently to Intel. See http://lkml.kernel.org/r/2b3e25cc-286d-8bd0-aeaf-9ac4aae39de8@amd.com Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Cc: Tom Lendacky Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-4-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index f50e8576eb3b..a5671b849837 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -251,6 +251,9 @@ /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ +#define X86_FEATURE_AMD_PRED_CMD (13*32+12) /* Prediction Command MSR (AMD) */ +#define X86_FEATURE_AMD_SPEC_CTRL (13*32+14) /* Speculation Control MSR only (AMD) */ +#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors (AMD) */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ From 4fbcf1a84d8ad1bf15937fa6f9623045da153b4e Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 14 Jul 2018 02:29:43 -0700 Subject: [PATCH 022/519] x86/msr: Add definitions for new speculation control MSRs (cherry picked from commit 1e340c60d0dd3ae07b5bedc16a0469c14b9f3410) Add MSR and bit definitions for SPEC_CTRL, PRED_CMD and ARCH_CAPABILITIES. See Intel's 336996-Speculative-Execution-Side-Channel-Mitigations.pdf Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-5-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index b8911aecf035..f4701f0e613a 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -32,6 +32,13 @@ #define EFER_FFXSR (1<<_EFER_FFXSR) /* Intel MSRs. Some also available on other CPUs */ +#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ +#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ +#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ + +#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ +#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ + #define MSR_IA32_PERFCTR0 0x000000c1 #define MSR_IA32_PERFCTR1 0x000000c2 #define MSR_FSB_FREQ 0x000000cd @@ -45,6 +52,11 @@ #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) #define MSR_MTRRcap 0x000000fe + +#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a +#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ +#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ + #define MSR_IA32_BBL_CR_CTL 0x00000119 #define MSR_IA32_BBL_CR_CTL3 0x0000011e From 4ef0c99359c55ce60ba3859eb615c36c6cbc392c Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 14 Jul 2018 02:29:52 -0700 Subject: [PATCH 023/519] x86/pti: Do not enable PTI on CPUs which are not vulnerable to Meltdown (cherry picked from commit fec9434a12f38d3aeafeb75711b71d8a1fdef621) Also, for CPUs which don't speculate at all, don't report that they're vulnerable to the Spectre variants either. Leave the cpu_no_meltdown[] match table with just X86_VENDOR_AMD in it for now, even though that could be done with a simple comparison, on the assumption that we'll have more to add. Based on suggestions from Dave Hansen and Alan Cox. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Borislav Petkov Acked-by: Dave Hansen Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-6-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 48 ++++++++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ac7c52682d33..d6c097cdbefb 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -43,6 +43,8 @@ #include #include #include +#include +#include #ifdef CONFIG_X86_LOCAL_APIC #include @@ -794,6 +796,41 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #endif } +static const __initdata struct x86_cpu_id cpu_no_speculation[] = { + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY }, + { X86_VENDOR_CENTAUR, 5 }, + { X86_VENDOR_INTEL, 5 }, + { X86_VENDOR_NSC, 5 }, + { X86_VENDOR_ANY, 4 }, + {} +}; + +static const __initdata struct x86_cpu_id cpu_no_meltdown[] = { + { X86_VENDOR_AMD }, + {} +}; + +static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c) +{ + u64 ia32_cap = 0; + + if (x86_match_cpu(cpu_no_meltdown)) + return false; + + if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); + + /* Rogue Data Cache Load? No! */ + if (ia32_cap & ARCH_CAP_RDCL_NO) + return false; + + return true; +} + /* * Do minimum CPU detection early. * Fields really needed: vendor, cpuid_level, family, model, mask, @@ -840,11 +877,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_force_cpu_cap(X86_FEATURE_ALWAYS); - if (c->x86_vendor != X86_VENDOR_AMD) - setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); - - setup_force_cpu_bug(X86_BUG_SPECTRE_V1); - setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + if (!x86_match_cpu(cpu_no_speculation)) { + if (cpu_vulnerable_to_meltdown(c)) + setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); + setup_force_cpu_bug(X86_BUG_SPECTRE_V1); + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + } fpu__init_system(c); From c64410cf4d3abd6c9f5abdd38db0a855926304c5 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 14 Jul 2018 02:30:01 -0700 Subject: [PATCH 024/519] x86/cpufeature: Blacklist SPEC_CTRL/PRED_CMD on early Spectre v2 microcodes (cherry picked from commit a5b2966364538a0e68c9fa29bc0a3a1651799035) This doesn't refuse to load the affected microcodes; it just refuses to use the Spectre v2 mitigation features if they're detected, by clearing the appropriate feature bits. The AMD CPUID bits are handled here too, because hypervisors *may* have been exposing those bits even on Intel chips, for fine-grained control of what's available. It is non-trivial to use x86_match_cpu() for this table because that doesn't handle steppings. And the approach taken in commit bd9240a18 almost made me lose my lunch. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-7-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/intel-family.h | 5 ++- arch/x86/kernel/cpu/intel.c | 67 +++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 6999f7d01a0d..12fa187865c2 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -12,6 +12,7 @@ */ #define INTEL_FAM6_CORE_YONAH 0x0E + #define INTEL_FAM6_CORE2_MEROM 0x0F #define INTEL_FAM6_CORE2_MEROM_L 0x16 #define INTEL_FAM6_CORE2_PENRYN 0x17 @@ -20,6 +21,7 @@ #define INTEL_FAM6_NEHALEM 0x1E #define INTEL_FAM6_NEHALEM_EP 0x1A #define INTEL_FAM6_NEHALEM_EX 0x2E + #define INTEL_FAM6_WESTMERE 0x25 #define INTEL_FAM6_WESTMERE2 0x1F #define INTEL_FAM6_WESTMERE_EP 0x2C @@ -36,9 +38,9 @@ #define INTEL_FAM6_HASWELL_GT3E 0x46 #define INTEL_FAM6_BROADWELL_CORE 0x3D -#define INTEL_FAM6_BROADWELL_XEON_D 0x56 #define INTEL_FAM6_BROADWELL_GT3E 0x47 #define INTEL_FAM6_BROADWELL_X 0x4F +#define INTEL_FAM6_BROADWELL_XEON_D 0x56 #define INTEL_FAM6_SKYLAKE_MOBILE 0x4E #define INTEL_FAM6_SKYLAKE_DESKTOP 0x5E @@ -60,6 +62,7 @@ #define INTEL_FAM6_ATOM_MERRIFIELD2 0x5A /* Annidale */ #define INTEL_FAM6_ATOM_GOLDMONT 0x5C #define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */ +#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A /* Xeon Phi */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 9299e3bdfad6..23ba9cc0cc0d 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -13,6 +13,7 @@ #include #include #include +#include #ifdef CONFIG_X86_64 #include @@ -25,6 +26,59 @@ #include #endif +/* + * Early microcode releases for the Spectre v2 mitigation were broken. + * Information taken from; + * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf + * - https://kb.vmware.com/s/article/52345 + * - Microcode revisions observed in the wild + * - Release note from 20180108 microcode release + */ +struct sku_microcode { + u8 model; + u8 stepping; + u32 microcode; +}; +static const struct sku_microcode spectre_bad_microcodes[] = { + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x84 }, + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x84 }, + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x84 }, + { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x84 }, + { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 }, + { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, + { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, + { INTEL_FAM6_SKYLAKE_MOBILE, 0x03, 0xc2 }, + { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, + { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, + { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, + { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, + { INTEL_FAM6_BROADWELL_XEON_D, 0x03, 0x07000011 }, + { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 }, + { INTEL_FAM6_HASWELL_ULT, 0x01, 0x21 }, + { INTEL_FAM6_HASWELL_GT3E, 0x01, 0x18 }, + { INTEL_FAM6_HASWELL_CORE, 0x03, 0x23 }, + { INTEL_FAM6_HASWELL_X, 0x02, 0x3b }, + { INTEL_FAM6_HASWELL_X, 0x04, 0x10 }, + { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a }, + /* Updated in the 20180108 release; blacklist until we know otherwise */ + { INTEL_FAM6_ATOM_GEMINI_LAKE, 0x01, 0x22 }, + /* Observed in the wild */ + { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b }, + { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 }, +}; + +static bool bad_spectre_microcode(struct cpuinfo_x86 *c) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { + if (c->x86_model == spectre_bad_microcodes[i].model && + c->x86_mask == spectre_bad_microcodes[i].stepping) + return (c->microcode <= spectre_bad_microcodes[i].microcode); + } + return false; +} + static void early_init_intel(struct cpuinfo_x86 *c) { u64 misc_enable; @@ -51,6 +105,19 @@ static void early_init_intel(struct cpuinfo_x86 *c) rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode); } + if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || + cpu_has(c, X86_FEATURE_STIBP) || + cpu_has(c, X86_FEATURE_AMD_SPEC_CTRL) || + cpu_has(c, X86_FEATURE_AMD_PRED_CMD) || + cpu_has(c, X86_FEATURE_AMD_STIBP)) && bad_spectre_microcode(c)) { + pr_warn("Intel Spectre v2 broken microcode detected; disabling SPEC_CTRL\n"); + clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL); + clear_cpu_cap(c, X86_FEATURE_STIBP); + clear_cpu_cap(c, X86_FEATURE_AMD_SPEC_CTRL); + clear_cpu_cap(c, X86_FEATURE_AMD_PRED_CMD); + clear_cpu_cap(c, X86_FEATURE_AMD_STIBP); + } + /* * Atom erratum AAE44/AAF40/AAG38/AAH41: * From 5ff6b14190322e92489254dc4d10c28f203ee5fc Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 14 Jul 2018 02:30:10 -0700 Subject: [PATCH 025/519] x86/speculation: Add basic IBPB (Indirect Branch Prediction Barrier) support (cherry picked from commit 20ffa1caecca4db8f79fe665acdeaa5af815a24d) Expose indirect_branch_prediction_barrier() for use in subsequent patches. [ tglx: Add IBPB status to spectre_v2 sysfs file ] Co-developed-by: KarimAllah Ahmed Signed-off-by: KarimAllah Ahmed Signed-off-by: David Woodhouse Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: dave.hansen@intel.com Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-8-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 2 ++ arch/x86/include/asm/nospec-branch.h | 13 +++++++++++++ arch/x86/kernel/cpu/bugs.c | 10 +++++++++- 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index a5671b849837..b4e370b5b761 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -201,6 +201,8 @@ /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ +#define X86_FEATURE_IBPB ( 7*32+21) /* Indirect Branch Prediction Barrier enabled*/ + /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 8b910416243c..41851afd44af 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -194,6 +194,19 @@ static inline void vmexit_fill_RSB(void) #endif } +static inline void indirect_branch_prediction_barrier(void) +{ + asm volatile(ALTERNATIVE("", + "movl %[msr], %%ecx\n\t" + "movl %[val], %%eax\n\t" + "movl $0, %%edx\n\t" + "wrmsr", + X86_FEATURE_IBPB) + : : [msr] "i" (MSR_IA32_PRED_CMD), + [val] "i" (PRED_CMD_IBPB) + : "eax", "ecx", "edx", "memory"); +} + #endif /* __ASSEMBLY__ */ /* diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 2bbc74f8a4a8..7def33ada730 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -296,6 +296,13 @@ static void __init spectre_v2_select_mitigation(void) setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); pr_info("Filling RSB on context switch\n"); } + + /* Initialize Indirect Branch Prediction Barrier if supported */ + if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) || + boot_cpu_has(X86_FEATURE_AMD_PRED_CMD)) { + setup_force_cpu_cap(X86_FEATURE_IBPB); + pr_info("Enabling Indirect Branch Prediction Barrier\n"); + } } #undef pr_fmt @@ -325,7 +332,8 @@ ssize_t cpu_show_spectre_v2(struct device *dev, if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return sprintf(buf, "Not affected\n"); - return sprintf(buf, "%s%s\n", spectre_v2_strings[spectre_v2_enabled], + return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + boot_cpu_has(X86_FEATURE_IBPB) ? ", IBPB" : "", spectre_v2_module_string()); } #endif From 9a016c16d87fef47ad24ce8a9f30e8fce030225e Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 14 Jul 2018 02:30:20 -0700 Subject: [PATCH 026/519] x86/cpufeatures: Clean up Spectre v2 related CPUID flags (cherry picked from commit 2961298efe1ea1b6fc0d7ee8b76018fa6c0bcef2) We want to expose the hardware features simply in /proc/cpuinfo as "ibrs", "ibpb" and "stibp". Since AMD has separate CPUID bits for those, use them as the user-visible bits. When the Intel SPEC_CTRL bit is set which indicates both IBRS and IBPB capability, set those (AMD) bits accordingly. Likewise if the Intel STIBP bit is set, set the AMD STIBP that's used for the generic hardware capability. Hide the rest from /proc/cpuinfo by putting "" in the comments. Including RETPOLINE and RETPOLINE_AMD which shouldn't be visible there. There are patches to make the sysfs vulnerabilities information non-readable by non-root, and the same should apply to all information about which mitigations are actually in use. Those *shouldn't* appear in /proc/cpuinfo. The feature bit for whether IBPB is actually used, which is needed for ALTERNATIVEs, is renamed to X86_FEATURE_USE_IBPB. Originally-by: Borislav Petkov Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: ak@linux.intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1517070274-12128-2-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 18 ++++++++-------- arch/x86/include/asm/nospec-branch.h | 2 +- arch/x86/kernel/cpu/bugs.c | 7 +++---- arch/x86/kernel/cpu/intel.c | 31 +++++++++++++++++++--------- 4 files changed, 34 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b4e370b5b761..782005d7eb48 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -194,14 +194,14 @@ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ -#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ -#define X86_FEATURE_RETPOLINE ( 7*32+29) /* Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_AMD ( 7*32+30) /* AMD Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE ( 7*32+29) /* "" Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_AMD ( 7*32+30) /* "" AMD Retpoline mitigation for Spectre variant 2 */ /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ -#define X86_FEATURE_IBPB ( 7*32+21) /* Indirect Branch Prediction Barrier enabled*/ +#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled*/ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ @@ -253,9 +253,9 @@ /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ -#define X86_FEATURE_AMD_PRED_CMD (13*32+12) /* Prediction Command MSR (AMD) */ -#define X86_FEATURE_AMD_SPEC_CTRL (13*32+14) /* Speculation Control MSR only (AMD) */ -#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors (AMD) */ +#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ +#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ +#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ @@ -293,8 +293,8 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ -#define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */ -#define X86_FEATURE_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ +#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ /* diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 41851afd44af..8dcecb912365 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -201,7 +201,7 @@ static inline void indirect_branch_prediction_barrier(void) "movl %[val], %%eax\n\t" "movl $0, %%edx\n\t" "wrmsr", - X86_FEATURE_IBPB) + X86_FEATURE_USE_IBPB) : : [msr] "i" (MSR_IA32_PRED_CMD), [val] "i" (PRED_CMD_IBPB) : "eax", "ecx", "edx", "memory"); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 7def33ada730..1968baf66c48 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -298,9 +298,8 @@ static void __init spectre_v2_select_mitigation(void) } /* Initialize Indirect Branch Prediction Barrier if supported */ - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) || - boot_cpu_has(X86_FEATURE_AMD_PRED_CMD)) { - setup_force_cpu_cap(X86_FEATURE_IBPB); + if (boot_cpu_has(X86_FEATURE_IBPB)) { + setup_force_cpu_cap(X86_FEATURE_USE_IBPB); pr_info("Enabling Indirect Branch Prediction Barrier\n"); } } @@ -333,7 +332,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, return sprintf(buf, "Not affected\n"); return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], - boot_cpu_has(X86_FEATURE_IBPB) ? ", IBPB" : "", + boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", spectre_v2_module_string()); } #endif diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 23ba9cc0cc0d..fee94ee8efe7 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -105,17 +105,28 @@ static void early_init_intel(struct cpuinfo_x86 *c) rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode); } - if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || - cpu_has(c, X86_FEATURE_STIBP) || - cpu_has(c, X86_FEATURE_AMD_SPEC_CTRL) || - cpu_has(c, X86_FEATURE_AMD_PRED_CMD) || - cpu_has(c, X86_FEATURE_AMD_STIBP)) && bad_spectre_microcode(c)) { - pr_warn("Intel Spectre v2 broken microcode detected; disabling SPEC_CTRL\n"); - clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL); + /* + * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, + * and they also have a different bit for STIBP support. Also, + * a hypervisor might have set the individual AMD bits even on + * Intel CPUs, for finer-grained selection of what's available. + */ + if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { + set_cpu_cap(c, X86_FEATURE_IBRS); + set_cpu_cap(c, X86_FEATURE_IBPB); + } + if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) + set_cpu_cap(c, X86_FEATURE_STIBP); + + /* Now if any of them are set, check the blacklist and clear the lot */ + if ((cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || + cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) { + pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n"); + clear_cpu_cap(c, X86_FEATURE_IBRS); + clear_cpu_cap(c, X86_FEATURE_IBPB); clear_cpu_cap(c, X86_FEATURE_STIBP); - clear_cpu_cap(c, X86_FEATURE_AMD_SPEC_CTRL); - clear_cpu_cap(c, X86_FEATURE_AMD_PRED_CMD); - clear_cpu_cap(c, X86_FEATURE_AMD_STIBP); + clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL); + clear_cpu_cap(c, X86_FEATURE_INTEL_STIBP); } /* From 769b27207746415f530615a0f4faca12c432bbc4 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 14 Jul 2018 02:30:29 -0700 Subject: [PATCH 027/519] x86/cpuid: Fix up "virtual" IBRS/IBPB/STIBP feature bits on Intel (cherry picked from commit 7fcae1118f5fd44a862aa5c3525248e35ee67c3b) Despite the fact that all the other code there seems to be doing it, just using set_cpu_cap() in early_intel_init() doesn't actually work. For CPUs with PKU support, setup_pku() calls get_cpu_cap() after c->c_init() has set those feature bits. That resets those bits back to what was queried from the hardware. Turning the bits off for bad microcode is easy to fix. That can just use setup_clear_cpu_cap() to force them off for all CPUs. I was less keen on forcing the feature bits *on* that way, just in case of inconsistencies. I appreciate that the kernel is going to get this utterly wrong if CPU features are not consistent, because it has already applied alternatives by the time secondary CPUs are brought up. But at least if setup_force_cpu_cap() isn't being used, we might have a chance of *detecting* the lack of the corresponding bit and either panicking or refusing to bring the offending CPU online. So ensure that the appropriate feature bits are set within get_cpu_cap() regardless of how many extra times it's called. Fixes: 2961298e ("x86/cpufeatures: Clean up Spectre v2 related CPUID flags") Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: karahmed@amazon.de Cc: peterz@infradead.org Cc: bp@alien8.de Link: https://lkml.kernel.org/r/1517322623-15261-1-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 21 +++++++++++++++++++++ arch/x86/kernel/cpu/intel.c | 27 ++++++++------------------- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d6c097cdbefb..72d7e5a3f5d4 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -676,6 +676,26 @@ static void apply_forced_caps(struct cpuinfo_x86 *c) } } +static void init_speculation_control(struct cpuinfo_x86 *c) +{ + /* + * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, + * and they also have a different bit for STIBP support. Also, + * a hypervisor might have set the individual AMD bits even on + * Intel CPUs, for finer-grained selection of what's available. + * + * We use the AMD bits in 0x8000_0008 EBX as the generic hardware + * features, which are visible in /proc/cpuinfo and used by the + * kernel. So set those accordingly from the Intel bits. + */ + if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { + set_cpu_cap(c, X86_FEATURE_IBRS); + set_cpu_cap(c, X86_FEATURE_IBPB); + } + if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) + set_cpu_cap(c, X86_FEATURE_STIBP); +} + void get_cpu_cap(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; @@ -768,6 +788,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); init_scattered_cpuid_features(c); + init_speculation_control(c); } static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index fee94ee8efe7..0f1318901777 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -105,28 +105,17 @@ static void early_init_intel(struct cpuinfo_x86 *c) rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode); } - /* - * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, - * and they also have a different bit for STIBP support. Also, - * a hypervisor might have set the individual AMD bits even on - * Intel CPUs, for finer-grained selection of what's available. - */ - if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { - set_cpu_cap(c, X86_FEATURE_IBRS); - set_cpu_cap(c, X86_FEATURE_IBPB); - } - if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) - set_cpu_cap(c, X86_FEATURE_STIBP); - /* Now if any of them are set, check the blacklist and clear the lot */ - if ((cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || + if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || + cpu_has(c, X86_FEATURE_INTEL_STIBP) || + cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) { pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n"); - clear_cpu_cap(c, X86_FEATURE_IBRS); - clear_cpu_cap(c, X86_FEATURE_IBPB); - clear_cpu_cap(c, X86_FEATURE_STIBP); - clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL); - clear_cpu_cap(c, X86_FEATURE_INTEL_STIBP); + setup_clear_cpu_cap(X86_FEATURE_IBRS); + setup_clear_cpu_cap(X86_FEATURE_IBPB); + setup_clear_cpu_cap(X86_FEATURE_STIBP); + setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); + setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); } /* From 1aae84c2807e6ee1358725aee9eabf1137d055e6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 14 Jul 2018 02:30:37 -0700 Subject: [PATCH 028/519] x86/pti: Mark constant arrays as __initconst (cherry picked from commit 4bf5d56d429cbc96c23d809a08f63cd29e1a702e) I'm seeing build failures from the two newly introduced arrays that are marked 'const' and '__initdata', which are mutually exclusive: arch/x86/kernel/cpu/common.c:882:43: error: 'cpu_no_speculation' causes a section type conflict with 'e820_table_firmware_init' arch/x86/kernel/cpu/common.c:895:43: error: 'cpu_no_meltdown' causes a section type conflict with 'e820_table_firmware_init' The correct annotation is __initconst. Fixes: fec9434a12f3 ("x86/pti: Do not enable PTI on CPUs which are not vulnerable to Meltdown") Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Cc: Ricardo Neri Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Thomas Garnier Cc: David Woodhouse Link: https://lkml.kernel.org/r/20180202213959.611210-1-arnd@arndb.de Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 72d7e5a3f5d4..48499b41351c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -817,7 +817,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #endif } -static const __initdata struct x86_cpu_id cpu_no_speculation[] = { +static const __initconst struct x86_cpu_id cpu_no_speculation[] = { { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY }, { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY }, { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY }, @@ -830,7 +830,7 @@ static const __initdata struct x86_cpu_id cpu_no_speculation[] = { {} }; -static const __initdata struct x86_cpu_id cpu_no_meltdown[] = { +static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { { X86_VENDOR_AMD }, {} }; From 659cc61a987662a6674022d5980a5b5eb0a9b4fe Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 14 Jul 2018 02:30:46 -0700 Subject: [PATCH 029/519] x86/asm/entry/32: Simplify pushes of zeroed pt_regs->REGs commit 778843f934e362ed4ed734520f60a44a78a074b4 upstream Use of a temporary R8 register here seems to be unnecessary. "push %r8" is a two-byte insn (it needs REX prefix to specify R8), "push $0" is two-byte too. It seems just using the latter would be no worse. Thus, code had an unnecessary "xorq %r8,%r8" insn. It probably costs nothing in execution time here since we are probably limited by store bandwidth at this point, but still. Run-tested under QEMU: 32-bit calls still work: / # ./test_syscall_vdso32 [RUN] Executing 6-argument 32-bit syscall via VDSO [OK] Arguments are preserved across syscall [NOTE] R11 has changed:0000000000200ed7 - assuming clobbered by SYSRET insn [OK] R8..R15 did not leak kernel data [RUN] Executing 6-argument 32-bit syscall via INT 80 [OK] Arguments are preserved across syscall [OK] R8..R15 did not leak kernel data [RUN] Running tests under ptrace [RUN] Executing 6-argument 32-bit syscall via VDSO [OK] Arguments are preserved across syscall [NOTE] R11 has changed:0000000000200ed7 - assuming clobbered by SYSRET insn [OK] R8..R15 did not leak kernel data [RUN] Executing 6-argument 32-bit syscall via INT 80 [OK] Arguments are preserved across syscall [OK] R8..R15 did not leak kernel data Signed-off-by: Denys Vlasenko Acked-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Drewry Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1462201010-16846-1-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64_compat.S | 45 +++++++++++++++----------------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index d03bf0e28b8b..e479ff847023 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -79,24 +79,23 @@ ENTRY(entry_SYSENTER_compat) ASM_CLAC /* Clear AC after saving FLAGS */ pushq $__USER32_CS /* pt_regs->cs */ - xorq %r8,%r8 - pushq %r8 /* pt_regs->ip = 0 (placeholder) */ + pushq $0 /* pt_regs->ip = 0 (placeholder) */ pushq %rax /* pt_regs->orig_ax */ pushq %rdi /* pt_regs->di */ pushq %rsi /* pt_regs->si */ pushq %rdx /* pt_regs->dx */ pushq %rcx /* pt_regs->cx */ pushq $-ENOSYS /* pt_regs->ax */ - pushq %r8 /* pt_regs->r8 = 0 */ - pushq %r8 /* pt_regs->r9 = 0 */ - pushq %r8 /* pt_regs->r10 = 0 */ - pushq %r8 /* pt_regs->r11 = 0 */ + pushq $0 /* pt_regs->r8 = 0 */ + pushq $0 /* pt_regs->r9 = 0 */ + pushq $0 /* pt_regs->r10 = 0 */ + pushq $0 /* pt_regs->r11 = 0 */ pushq %rbx /* pt_regs->rbx */ pushq %rbp /* pt_regs->rbp (will be overwritten) */ - pushq %r8 /* pt_regs->r12 = 0 */ - pushq %r8 /* pt_regs->r13 = 0 */ - pushq %r8 /* pt_regs->r14 = 0 */ - pushq %r8 /* pt_regs->r15 = 0 */ + pushq $0 /* pt_regs->r12 = 0 */ + pushq $0 /* pt_regs->r13 = 0 */ + pushq $0 /* pt_regs->r14 = 0 */ + pushq $0 /* pt_regs->r15 = 0 */ cld /* @@ -185,17 +184,16 @@ ENTRY(entry_SYSCALL_compat) pushq %rdx /* pt_regs->dx */ pushq %rbp /* pt_regs->cx (stashed in bp) */ pushq $-ENOSYS /* pt_regs->ax */ - xorq %r8,%r8 - pushq %r8 /* pt_regs->r8 = 0 */ - pushq %r8 /* pt_regs->r9 = 0 */ - pushq %r8 /* pt_regs->r10 = 0 */ - pushq %r8 /* pt_regs->r11 = 0 */ + pushq $0 /* pt_regs->r8 = 0 */ + pushq $0 /* pt_regs->r9 = 0 */ + pushq $0 /* pt_regs->r10 = 0 */ + pushq $0 /* pt_regs->r11 = 0 */ pushq %rbx /* pt_regs->rbx */ pushq %rbp /* pt_regs->rbp (will be overwritten) */ - pushq %r8 /* pt_regs->r12 = 0 */ - pushq %r8 /* pt_regs->r13 = 0 */ - pushq %r8 /* pt_regs->r14 = 0 */ - pushq %r8 /* pt_regs->r15 = 0 */ + pushq $0 /* pt_regs->r12 = 0 */ + pushq $0 /* pt_regs->r13 = 0 */ + pushq $0 /* pt_regs->r14 = 0 */ + pushq $0 /* pt_regs->r15 = 0 */ /* * User mode is traced as though IRQs are on, and SYSENTER @@ -292,11 +290,10 @@ ENTRY(entry_INT80_compat) pushq %rdx /* pt_regs->dx */ pushq %rcx /* pt_regs->cx */ pushq $-ENOSYS /* pt_regs->ax */ - xorq %r8,%r8 - pushq %r8 /* pt_regs->r8 = 0 */ - pushq %r8 /* pt_regs->r9 = 0 */ - pushq %r8 /* pt_regs->r10 = 0 */ - pushq %r8 /* pt_regs->r11 = 0 */ + pushq $0 /* pt_regs->r8 = 0 */ + pushq $0 /* pt_regs->r9 = 0 */ + pushq $0 /* pt_regs->r10 = 0 */ + pushq $0 /* pt_regs->r11 = 0 */ pushq %rbx /* pt_regs->rbx */ pushq %rbp /* pt_regs->rbp */ pushq %r12 /* pt_regs->r12 */ From 5c91dde1312e8d9cc2b7e6a60cdc22debc711c8e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 14 Jul 2018 02:30:55 -0700 Subject: [PATCH 030/519] x86/entry/64/compat: Clear registers for compat syscalls, to reduce speculation attack surface commit 6b8cf5cc9965673951f1ab3f0e3cf23d06e3e2ee upstream. At entry userspace may have populated registers with values that could otherwise be useful in a speculative execution attack. Clear them to minimize the kernel's attack surface. Originally-From: Andi Kleen Signed-off-by: Dan Williams Cc: Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/151787989697.7847.4083702787288600552.stgit@dwillia2-desk3.amr.corp.intel.com [ Made small improvements to the changelog. ] Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64_compat.S | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index e479ff847023..48c27c3fdfdb 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -87,15 +87,25 @@ ENTRY(entry_SYSENTER_compat) pushq %rcx /* pt_regs->cx */ pushq $-ENOSYS /* pt_regs->ax */ pushq $0 /* pt_regs->r8 = 0 */ + xorq %r8, %r8 /* nospec r8 */ pushq $0 /* pt_regs->r9 = 0 */ + xorq %r9, %r9 /* nospec r9 */ pushq $0 /* pt_regs->r10 = 0 */ + xorq %r10, %r10 /* nospec r10 */ pushq $0 /* pt_regs->r11 = 0 */ + xorq %r11, %r11 /* nospec r11 */ pushq %rbx /* pt_regs->rbx */ + xorl %ebx, %ebx /* nospec rbx */ pushq %rbp /* pt_regs->rbp (will be overwritten) */ + xorl %ebp, %ebp /* nospec rbp */ pushq $0 /* pt_regs->r12 = 0 */ + xorq %r12, %r12 /* nospec r12 */ pushq $0 /* pt_regs->r13 = 0 */ + xorq %r13, %r13 /* nospec r13 */ pushq $0 /* pt_regs->r14 = 0 */ + xorq %r14, %r14 /* nospec r14 */ pushq $0 /* pt_regs->r15 = 0 */ + xorq %r15, %r15 /* nospec r15 */ cld /* @@ -185,15 +195,25 @@ ENTRY(entry_SYSCALL_compat) pushq %rbp /* pt_regs->cx (stashed in bp) */ pushq $-ENOSYS /* pt_regs->ax */ pushq $0 /* pt_regs->r8 = 0 */ + xorq %r8, %r8 /* nospec r8 */ pushq $0 /* pt_regs->r9 = 0 */ + xorq %r9, %r9 /* nospec r9 */ pushq $0 /* pt_regs->r10 = 0 */ + xorq %r10, %r10 /* nospec r10 */ pushq $0 /* pt_regs->r11 = 0 */ + xorq %r11, %r11 /* nospec r11 */ pushq %rbx /* pt_regs->rbx */ + xorl %ebx, %ebx /* nospec rbx */ pushq %rbp /* pt_regs->rbp (will be overwritten) */ + xorl %ebp, %ebp /* nospec rbp */ pushq $0 /* pt_regs->r12 = 0 */ + xorq %r12, %r12 /* nospec r12 */ pushq $0 /* pt_regs->r13 = 0 */ + xorq %r13, %r13 /* nospec r13 */ pushq $0 /* pt_regs->r14 = 0 */ + xorq %r14, %r14 /* nospec r14 */ pushq $0 /* pt_regs->r15 = 0 */ + xorq %r15, %r15 /* nospec r15 */ /* * User mode is traced as though IRQs are on, and SYSENTER @@ -291,15 +311,25 @@ ENTRY(entry_INT80_compat) pushq %rcx /* pt_regs->cx */ pushq $-ENOSYS /* pt_regs->ax */ pushq $0 /* pt_regs->r8 = 0 */ + xorq %r8, %r8 /* nospec r8 */ pushq $0 /* pt_regs->r9 = 0 */ + xorq %r9, %r9 /* nospec r9 */ pushq $0 /* pt_regs->r10 = 0 */ + xorq %r10, %r10 /* nospec r10 */ pushq $0 /* pt_regs->r11 = 0 */ + xorq %r11, %r11 /* nospec r11 */ pushq %rbx /* pt_regs->rbx */ + xorl %ebx, %ebx /* nospec rbx */ pushq %rbp /* pt_regs->rbp */ + xorl %ebp, %ebp /* nospec rbp */ pushq %r12 /* pt_regs->r12 */ + xorq %r12, %r12 /* nospec r12 */ pushq %r13 /* pt_regs->r13 */ + xorq %r13, %r13 /* nospec r13 */ pushq %r14 /* pt_regs->r14 */ + xorq %r14, %r14 /* nospec r14 */ pushq %r15 /* pt_regs->r15 */ + xorq %r15, %r15 /* nospec r15 */ cld /* From b7c492fb9e33857cf983c7807929f1410655765c Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 14 Jul 2018 02:31:04 -0700 Subject: [PATCH 031/519] x86/speculation: Update Speculation Control microcode blacklist commit 1751342095f0d2b36fa8114d8e12c5688c455ac4 upstream. Intel have retroactively blessed the 0xc2 microcode on Skylake mobile and desktop parts, and the Gemini Lake 0x22 microcode is apparently fine too. We blacklisted the latter purely because it was present with all the other problematic ones in the 2018-01-08 release, but now it's explicitly listed as OK. We still list 0x84 for the various Kaby Lake / Coffee Lake parts, as that appeared in one version of the blacklist and then reverted to 0x80 again. We can change it if 0x84 is actually announced to be safe. Signed-off-by: David Woodhouse Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arjan.van.de.ven@intel.com Cc: jmattson@google.com Cc: karahmed@amazon.de Cc: kvm@vger.kernel.org Cc: pbonzini@redhat.com Cc: rkrcmar@redhat.com Cc: sironi@amazon.de Link: http://lkml.kernel.org/r/1518305967-31356-2-git-send-email-dwmw@amazon.co.uk Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/intel.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 0f1318901777..71492d29456f 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -47,8 +47,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = { { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 }, { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, - { INTEL_FAM6_SKYLAKE_MOBILE, 0x03, 0xc2 }, - { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, @@ -60,8 +58,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = { { INTEL_FAM6_HASWELL_X, 0x02, 0x3b }, { INTEL_FAM6_HASWELL_X, 0x04, 0x10 }, { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a }, - /* Updated in the 20180108 release; blacklist until we know otherwise */ - { INTEL_FAM6_ATOM_GEMINI_LAKE, 0x01, 0x22 }, /* Observed in the wild */ { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b }, { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 }, From 307261be84cca663b9497a68c2fbc8bc1061f494 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 14 Jul 2018 02:31:13 -0700 Subject: [PATCH 032/519] x86/speculation: Correct Speculation Control microcode blacklist again commit d37fc6d360a404b208547ba112e7dabb6533c7fc upstream. Arjan points out that the Intel document only clears the 0xc2 microcode on *some* parts with CPUID 506E3 (INTEL_FAM6_SKYLAKE_DESKTOP stepping 3). For the Skylake H/S platform it's OK but for Skylake E3 which has the same CPUID it isn't (yet) cleared. So removing it from the blacklist was premature. Put it back for now. Also, Arjan assures me that the 0x84 microcode for Kaby Lake which was featured in one of the early revisions of the Intel document was never released to the public, and won't be until/unless it is also validated as safe. So those can change to 0x80 which is what all *other* versions of the doc have identified. Once the retrospective testing of existing public microcodes is done, we should be back into a mode where new microcodes are only released in batches and we shouldn't even need to update the blacklist for those anyway, so this tweaking of the list isn't expected to be a thing which keeps happening. Requested-by: Arjan van de Ven Signed-off-by: David Woodhouse Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arjan.van.de.ven@intel.com Cc: dave.hansen@intel.com Cc: kvm@vger.kernel.org Cc: pbonzini@redhat.com Link: http://lkml.kernel.org/r/1518449255-2182-1-git-send-email-dwmw@amazon.co.uk Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/intel.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 71492d29456f..b69d258f9aae 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -40,13 +40,14 @@ struct sku_microcode { u32 microcode; }; static const struct sku_microcode spectre_bad_microcodes[] = { - { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x84 }, - { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x84 }, - { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x84 }, - { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x84 }, - { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 }, + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x80 }, + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x80 }, + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x80 }, + { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x80 }, + { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x80 }, { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, + { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, From d10b55dd5a1612a76e58bb80c7b0ec92672c0e5b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 14 Jul 2018 02:31:23 -0700 Subject: [PATCH 033/519] x86/speculation: Clean up various Spectre related details commit 21e433bdb95bdf3aa48226fd3d33af608437f293 upstream. Harmonize all the Spectre messages so that a: dmesg | grep -i spectre ... gives us most Spectre related kernel boot messages. Also fix a few other details: - clarify a comment about firmware speculation control - s/KPTI/PTI - remove various line-breaks that made the code uglier Acked-by: David Woodhouse Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 1968baf66c48..fea368ddcd85 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -162,8 +162,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) return SPECTRE_V2_CMD_NONE; else { - ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, - sizeof(arg)); + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg)); if (ret < 0) return SPECTRE_V2_CMD_AUTO; @@ -184,8 +183,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) cmd == SPECTRE_V2_CMD_RETPOLINE_AMD || cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) && !IS_ENABLED(CONFIG_RETPOLINE)) { - pr_err("%s selected but not compiled in. Switching to AUTO select\n", - mitigation_options[i].option); + pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; } @@ -255,14 +253,14 @@ static void __init spectre_v2_select_mitigation(void) goto retpoline_auto; break; } - pr_err("kernel not compiled with retpoline; no mitigation available!"); + pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!"); return; retpoline_auto: if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { retpoline_amd: if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { - pr_err("LFENCE not serializing. Switching to generic retpoline\n"); + pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n"); goto retpoline_generic; } mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD : @@ -280,7 +278,7 @@ static void __init spectre_v2_select_mitigation(void) pr_info("%s\n", spectre_v2_strings[mode]); /* - * If neither SMEP or KPTI are available, there is a risk of + * If neither SMEP nor PTI are available, there is a risk of * hitting userspace addresses in the RSB after a context switch * from a shallow call stack to a deeper one. To prevent this fill * the entire RSB, even when using IBRS. @@ -294,21 +292,20 @@ static void __init spectre_v2_select_mitigation(void) if ((!boot_cpu_has(X86_FEATURE_KAISER) && !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); - pr_info("Filling RSB on context switch\n"); + pr_info("Spectre v2 mitigation: Filling RSB on context switch\n"); } /* Initialize Indirect Branch Prediction Barrier if supported */ if (boot_cpu_has(X86_FEATURE_IBPB)) { setup_force_cpu_cap(X86_FEATURE_USE_IBPB); - pr_info("Enabling Indirect Branch Prediction Barrier\n"); + pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); } } #undef pr_fmt #ifdef CONFIG_SYSFS -ssize_t cpu_show_meltdown(struct device *dev, - struct device_attribute *attr, char *buf) +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) { if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) return sprintf(buf, "Not affected\n"); @@ -317,16 +314,14 @@ ssize_t cpu_show_meltdown(struct device *dev, return sprintf(buf, "Vulnerable\n"); } -ssize_t cpu_show_spectre_v1(struct device *dev, - struct device_attribute *attr, char *buf) +ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) { if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) return sprintf(buf, "Not affected\n"); return sprintf(buf, "Mitigation: __user pointer sanitization\n"); } -ssize_t cpu_show_spectre_v2(struct device *dev, - struct device_attribute *attr, char *buf) +ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) { if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return sprintf(buf, "Not affected\n"); From 1af5c9661555bff49d50d38b5723a11ad85fbc97 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 14 Jul 2018 02:31:32 -0700 Subject: [PATCH 034/519] x86/speculation: Fix up array_index_nospec_mask() asm constraint commit be3233fbfcb8f5acb6e3bcd0895c3ef9e100d470 upstream. Allow the compiler to handle @size as an immediate value or memory directly rather than allocating a register. Reported-by: Linus Torvalds Signed-off-by: Dan Williams Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/151797010204.1289.1510000292250184993.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/barrier.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index e3a6f66d288c..7f5dcb64cedb 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -40,7 +40,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, asm volatile ("cmp %1,%2; sbb %0,%0;" :"=r" (mask) - :"r"(size),"r" (index) + :"g"(size),"r" (index) :"cc"); return mask; } From 4b9593083546b76299b28f0abb76505b4988860f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 14 Jul 2018 02:31:40 -0700 Subject: [PATCH 035/519] x86/speculation: Add dependency commit ea00f301285ea2f07393678cd2b6057878320c9d upstream. Joe Konno reported a compile failure resulting from using an MSR without inclusion of , and while the current code builds fine (by accident) this needs fixing for future patches. Reported-by: Joe Konno Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arjan@linux.intel.com Cc: bp@alien8.de Cc: dan.j.williams@intel.com Cc: dave.hansen@linux.intel.com Cc: dwmw2@infradead.org Cc: dwmw@amazon.co.uk Cc: gregkh@linuxfoundation.org Cc: hpa@zytor.com Cc: jpoimboe@redhat.com Cc: linux-tip-commits@vger.kernel.org Cc: luto@kernel.org Fixes: 20ffa1caecca ("x86/speculation: Add basic IBPB (Indirect Branch Prediction Barrier) support") Link: http://lkml.kernel.org/r/20180213132819.GJ25201@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 8dcecb912365..bca286042e4b 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -6,6 +6,7 @@ #include #include #include +#include /* * Fill the CPU return stack buffer. From 7c55236675b8426f861a6e63b75ec1e17057c8a0 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Sat, 14 Jul 2018 02:31:49 -0700 Subject: [PATCH 036/519] x86/xen: Zero MSR_IA32_SPEC_CTRL before suspend commit 71c208dd54ab971036d83ff6d9837bae4976e623 upstream. Older Xen versions (4.5 and before) might have problems migrating pv guests with MSR_IA32_SPEC_CTRL having a non-zero value. So before suspending zero that MSR and restore it after being resumed. Signed-off-by: Juergen Gross Signed-off-by: Thomas Gleixner Reviewed-by: Jan Beulich Cc: stable@vger.kernel.org Cc: xen-devel@lists.xenproject.org Cc: boris.ostrovsky@oracle.com Link: https://lkml.kernel.org/r/20180226140818.4849-1-jgross@suse.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/suspend.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 7f664c416faf..4ecd0de08557 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -1,11 +1,14 @@ #include #include +#include #include #include #include #include +#include +#include #include #include #include @@ -68,6 +71,8 @@ static void xen_pv_post_suspend(int suspend_cancelled) xen_mm_unpin_all(); } +static DEFINE_PER_CPU(u64, spec_ctrl); + void xen_arch_pre_suspend(void) { if (xen_pv_domain()) @@ -84,6 +89,9 @@ void xen_arch_post_suspend(int cancelled) static void xen_vcpu_notify_restore(void *data) { + if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL)) + wrmsrl(MSR_IA32_SPEC_CTRL, this_cpu_read(spec_ctrl)); + /* Boot processor notified via generic timekeeping_resume() */ if (smp_processor_id() == 0) return; @@ -93,7 +101,15 @@ static void xen_vcpu_notify_restore(void *data) static void xen_vcpu_notify_suspend(void *data) { + u64 tmp; + tick_suspend_local(); + + if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL)) { + rdmsrl(MSR_IA32_SPEC_CTRL, tmp); + this_cpu_write(spec_ctrl, tmp); + wrmsrl(MSR_IA32_SPEC_CTRL, 0); + } } void xen_arch_resume(void) From 58ac8c59dbb3a8e8b6414524c2d8f4f0a7bbeaa4 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sat, 14 Jul 2018 02:31:57 -0700 Subject: [PATCH 037/519] x86/mm: Factor out LDT init from context init commit 39a0526fb3f7d93433d146304278477eb463f8af upstream The arch-specific mm_context_t is a great place to put protection-key allocation state. But, we need to initialize the allocation state because pkey 0 is always "allocated". All of the runtime initialization of mm_context_t is done in *_ldt() manipulation functions. This renames the existing LDT functions like this: init_new_context() -> init_new_context_ldt() destroy_context() -> destroy_context_ldt() and makes init_new_context() and destroy_context() available for generic use. Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20160212210234.DB34FCC5@viggo.jf.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mmu_context.h | 21 ++++++++++++++++----- arch/x86/kernel/ldt.c | 4 ++-- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 9bfc5fd77015..1c4794f861d7 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -52,15 +52,15 @@ struct ldt_struct { /* * Used for LDT copy/destruction. */ -int init_new_context(struct task_struct *tsk, struct mm_struct *mm); -void destroy_context(struct mm_struct *mm); +int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm); +void destroy_context_ldt(struct mm_struct *mm); #else /* CONFIG_MODIFY_LDT_SYSCALL */ -static inline int init_new_context(struct task_struct *tsk, - struct mm_struct *mm) +static inline int init_new_context_ldt(struct task_struct *tsk, + struct mm_struct *mm) { return 0; } -static inline void destroy_context(struct mm_struct *mm) {} +static inline void destroy_context_ldt(struct mm_struct *mm) {} #endif static inline void load_mm_ldt(struct mm_struct *mm) @@ -102,6 +102,17 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); } +static inline int init_new_context(struct task_struct *tsk, + struct mm_struct *mm) +{ + init_new_context_ldt(tsk, mm); + return 0; +} +static inline void destroy_context(struct mm_struct *mm) +{ + destroy_context_ldt(mm); +} + extern void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk); diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index bc429365b72a..8bc68cfc0d33 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -119,7 +119,7 @@ static void free_ldt_struct(struct ldt_struct *ldt) * we do not have to muck with descriptors here, that is * done in switch_mm() as needed. */ -int init_new_context(struct task_struct *tsk, struct mm_struct *mm) +int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm) { struct ldt_struct *new_ldt; struct mm_struct *old_mm; @@ -160,7 +160,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) * * 64bit: Don't touch the LDT register - we're already in the next thread. */ -void destroy_context(struct mm_struct *mm) +void destroy_context_ldt(struct mm_struct *mm) { free_ldt_struct(mm->context.ldt); mm->context.ldt = NULL; From 937dad078f557ddd6151e62f7f0028c136ffba4a Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 14 Jul 2018 02:32:07 -0700 Subject: [PATCH 038/519] x86/mm: Give each mm TLB flush generation a unique ID commit f39681ed0f48498b80455095376f11535feea332 upstream. This adds two new variables to mmu_context_t: ctx_id and tlb_gen. ctx_id uniquely identifies the mm_struct and will never be reused. For a given mm_struct (and hence ctx_id), tlb_gen is a monotonic count of the number of times that a TLB flush has been requested. The pair (ctx_id, tlb_gen) can be used as an identifier for TLB flush actions and will be used in subsequent patches to reliably determine whether all needed TLB flushes have occurred on a given CPU. This patch is split out for ease of review. By itself, it has no real effect other than creating and updating the new variables. Signed-off-by: Andy Lutomirski Reviewed-by: Nadav Amit Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dave Hansen Cc: Linus Torvalds Cc: Mel Gorman Cc: Peter Zijlstra Cc: Rik van Riel Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/413a91c24dab3ed0caa5f4e4d017d87b0857f920.1498751203.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Tim Chen Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mmu.h | 15 +++++++++++++-- arch/x86/include/asm/mmu_context.h | 4 ++++ arch/x86/mm/tlb.c | 2 ++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 7680b76adafc..3359dfedc7ee 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -3,12 +3,18 @@ #include #include +#include /* - * The x86 doesn't have a mmu context, but - * we put the segment information here. + * x86 has arch-specific MMU state beyond what lives in mm_struct. */ typedef struct { + /* + * ctx_id uniquely identifies this mm_struct. A ctx_id will never + * be reused, and zero is not a valid ctx_id. + */ + u64 ctx_id; + #ifdef CONFIG_MODIFY_LDT_SYSCALL struct ldt_struct *ldt; #endif @@ -24,6 +30,11 @@ typedef struct { atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */ } mm_context_t; +#define INIT_MM_CONTEXT(mm) \ + .context = { \ + .ctx_id = 1, \ + } + void leave_mm(int cpu); #endif /* _ASM_X86_MMU_H */ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 1c4794f861d7..effc12767cbf 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -11,6 +11,9 @@ #include #include #include + +extern atomic64_t last_mm_ctx_id; + #ifndef CONFIG_PARAVIRT static inline void paravirt_activate_mm(struct mm_struct *prev, struct mm_struct *next) @@ -105,6 +108,7 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { + mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); init_new_context_ldt(tsk, mm); return 0; } diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 7cad01af6dcd..efec198d271a 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -29,6 +29,8 @@ * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi */ +atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1); + struct flush_tlb_info { struct mm_struct *flush_mm; unsigned long flush_start; From 2997b0617b252f6e8630c1aa410697e2b0ed3b0d Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sat, 14 Jul 2018 02:32:16 -0700 Subject: [PATCH 039/519] x86/speculation: Use Indirect Branch Prediction Barrier in context switch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 18bf3c3ea8ece8f03b6fc58508f2dfd23c7711c7 upstream. Flush indirect branches when switching into a process that marked itself non dumpable. This protects high value processes like gpg better, without having too high performance overhead. If done naïvely, we could switch to a kernel idle thread and then back to the original process, such as: process A -> idle -> process A In such scenario, we do not have to do IBPB here even though the process is non-dumpable, as we are switching back to the same process after a hiatus. To avoid the redundant IBPB, which is expensive, we track the last mm user context ID. The cost is to have an extra u64 mm context id to track the last mm we were using before switching to the init_mm used by idle. Avoiding the extra IBPB is probably worth the extra memory for this common scenario. For those cases where tlb_defer_switch_to_init_mm() returns true (non PCID), lazy tlb will defer switch to init_mm, so we will not be changing the mm for the process A -> idle -> process A switch. So IBPB will be skipped for this case. Thanks to the reviewers and Andy Lutomirski for the suggestion of using ctx_id which got rid of the problem of mm pointer recycling. Signed-off-by: Tim Chen Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: ak@linux.intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: linux@dominikbrodowski.net Cc: peterz@infradead.org Cc: bp@alien8.de Cc: luto@kernel.org Cc: pbonzini@redhat.com Link: https://lkml.kernel.org/r/1517263487-3708-1-git-send-email-dwmw@amazon.co.uk Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/tlbflush.h | 2 ++ arch/x86/mm/tlb.c | 31 +++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index e2a89d2577fb..8ce07db77299 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -68,6 +68,8 @@ static inline void invpcid_flush_all_nonglobals(void) struct tlb_state { struct mm_struct *active_mm; int state; + /* last user mm's ctx id */ + u64 last_ctx_id; /* * Access to this CR4 shadow and to H/W CR4 is protected by diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index efec198d271a..6d683bbb3502 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -106,6 +107,36 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, unsigned cpu = smp_processor_id(); if (likely(prev != next)) { + u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id); + + /* + * Avoid user/user BTB poisoning by flushing the branch + * predictor when switching between processes. This stops + * one process from doing Spectre-v2 attacks on another. + * + * As an optimization, flush indirect branches only when + * switching into processes that disable dumping. This + * protects high value processes like gpg, without having + * too high performance overhead. IBPB is *expensive*! + * + * This will not flush branches when switching into kernel + * threads. It will also not flush if we switch to idle + * thread and back to the same process. It will flush if we + * switch to a different non-dumpable process. + */ + if (tsk && tsk->mm && + tsk->mm->context.ctx_id != last_ctx_id && + get_dumpable(tsk->mm) != SUID_DUMP_USER) + indirect_branch_prediction_barrier(); + + /* + * Record last user mm's context id, so we can avoid + * flushing branch buffer with IBPB if we switch back + * to the same user. + */ + if (next != &init_mm) + this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id); + this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); this_cpu_write(cpu_tlbstate.active_mm, next); cpumask_set_cpu(cpu, mm_cpumask(next)); From 56c4a02fe0fc9fbdeee19eaef11c5b67c8bef371 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Sat, 14 Jul 2018 02:32:25 -0700 Subject: [PATCH 040/519] x86/spectre_v2: Don't check microcode versions when running under hypervisors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 36268223c1e9981d6cfc33aff8520b3bde4b8114 upstream. As: 1) It's known that hypervisors lie about the environment anyhow (host mismatch) 2) Even if the hypervisor (Xen, KVM, VMWare, etc) provided a valid "correct" value, it all gets to be very murky when migration happens (do you provide the "new" microcode of the machine?). And in reality the cloud vendors are the ones that should make sure that the microcode that is running is correct and we should just sing lalalala and trust them. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Paolo Bonzini Cc: Wanpeng Li Cc: kvm Cc: Krčmář Cc: Borislav Petkov CC: "H. Peter Anvin" CC: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180226213019.GE9497@char.us.oracle.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/intel.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index b69d258f9aae..dcc03498cf10 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -68,6 +68,13 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c) { int i; + /* + * We know that the hypervisor lie to us on the microcode version so + * we may as well hope that it is running the correct version. + */ + if (cpu_has(c, X86_FEATURE_HYPERVISOR)) + return false; + for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { if (c->x86_model == spectre_bad_microcodes[i].model && c->x86_mask == spectre_bad_microcodes[i].stepping) From 7ec391255421d5d311c66d6fbfb33cdfca789b9f Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 14 Jul 2018 02:32:33 -0700 Subject: [PATCH 041/519] x86/speculation: Use IBRS if available before calling into firmware commit dd84441a797150dcc49298ec95c459a8891d8bb1 upstream. Retpoline means the kernel is safe because it has no indirect branches. But firmware isn't, so use IBRS for firmware calls if it's available. Block preemption while IBRS is set, although in practice the call sites already had to be doing that. Ignore hpwdt.c for now. It's taking spinlocks and calling into firmware code, from an NMI handler. I don't want to touch that with a bargepole. Signed-off-by: David Woodhouse Reviewed-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Cc: arjan.van.de.ven@intel.com Cc: bp@alien8.de Cc: dave.hansen@intel.com Cc: jmattson@google.com Cc: karahmed@amazon.de Cc: kvm@vger.kernel.org Cc: pbonzini@redhat.com Cc: rkrcmar@redhat.com Link: http://lkml.kernel.org/r/1519037457-7643-2-git-send-email-dwmw@amazon.co.uk Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman [ Srivatsa: Backported to 4.4.y, patching the efi_call_virt() family of functions, which are the 4.4.y-equivalents of arch_efi_call_virt_setup()/teardown() ] Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/apm.h | 6 +++++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/efi.h | 7 +++++ arch/x86/include/asm/nospec-branch.h | 39 +++++++++++++++++++++------- arch/x86/kernel/cpu/bugs.c | 12 ++++++++- arch/x86/platform/efi/efi_64.c | 3 +++ 6 files changed, 58 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h index 20370c6db74b..3d1ec41ae09a 100644 --- a/arch/x86/include/asm/apm.h +++ b/arch/x86/include/asm/apm.h @@ -6,6 +6,8 @@ #ifndef _ASM_X86_MACH_DEFAULT_APM_H #define _ASM_X86_MACH_DEFAULT_APM_H +#include + #ifdef APM_ZERO_SEGS # define APM_DO_ZERO_SEGS \ "pushl %%ds\n\t" \ @@ -31,6 +33,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, * N.B. We do NOT need a cld after the BIOS call * because we always save and restore the flags. */ + firmware_restrict_branch_speculation_start(); __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" @@ -43,6 +46,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, "=S" (*esi) : "a" (func), "b" (ebx_in), "c" (ecx_in) : "memory", "cc"); + firmware_restrict_branch_speculation_end(); } static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in, @@ -55,6 +59,7 @@ static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in, * N.B. We do NOT need a cld after the BIOS call * because we always save and restore the flags. */ + firmware_restrict_branch_speculation_start(); __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" @@ -67,6 +72,7 @@ static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in, "=S" (si) : "a" (func), "b" (ebx_in), "c" (ecx_in) : "memory", "cc"); + firmware_restrict_branch_speculation_end(); return error; } diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 782005d7eb48..bc76bf39bb2f 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -202,6 +202,7 @@ #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled*/ +#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 0010c78c4998..7e5a2ffb6938 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -3,6 +3,7 @@ #include #include +#include /* * We map the EFI regions needed for runtime services non-contiguously, @@ -39,8 +40,10 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...); ({ \ efi_status_t __s; \ kernel_fpu_begin(); \ + firmware_restrict_branch_speculation_start(); \ __s = ((efi_##f##_t __attribute__((regparm(0)))*) \ efi.systab->runtime->f)(args); \ + firmware_restrict_branch_speculation_end(); \ kernel_fpu_end(); \ __s; \ }) @@ -49,8 +52,10 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...); #define __efi_call_virt(f, args...) \ ({ \ kernel_fpu_begin(); \ + firmware_restrict_branch_speculation_start(); \ ((efi_##f##_t __attribute__((regparm(0)))*) \ efi.systab->runtime->f)(args); \ + firmware_restrict_branch_speculation_end(); \ kernel_fpu_end(); \ }) @@ -71,7 +76,9 @@ extern u64 asmlinkage efi_call(void *fp, ...); efi_sync_low_kernel_mappings(); \ preempt_disable(); \ __kernel_fpu_begin(); \ + firmware_restrict_branch_speculation_start(); \ __s = efi_call((void *)efi.systab->runtime->f, __VA_ARGS__); \ + firmware_restrict_branch_speculation_end(); \ __kernel_fpu_end(); \ preempt_enable(); \ __s; \ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index bca286042e4b..36ded24ca381 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -195,17 +195,38 @@ static inline void vmexit_fill_RSB(void) #endif } +#define alternative_msr_write(_msr, _val, _feature) \ + asm volatile(ALTERNATIVE("", \ + "movl %[msr], %%ecx\n\t" \ + "movl %[val], %%eax\n\t" \ + "movl $0, %%edx\n\t" \ + "wrmsr", \ + _feature) \ + : : [msr] "i" (_msr), [val] "i" (_val) \ + : "eax", "ecx", "edx", "memory") + static inline void indirect_branch_prediction_barrier(void) { - asm volatile(ALTERNATIVE("", - "movl %[msr], %%ecx\n\t" - "movl %[val], %%eax\n\t" - "movl $0, %%edx\n\t" - "wrmsr", - X86_FEATURE_USE_IBPB) - : : [msr] "i" (MSR_IA32_PRED_CMD), - [val] "i" (PRED_CMD_IBPB) - : "eax", "ecx", "edx", "memory"); + alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, + X86_FEATURE_USE_IBPB); +} + +/* + * With retpoline, we must use IBRS to restrict branch prediction + * before calling into firmware. + */ +static inline void firmware_restrict_branch_speculation_start(void) +{ + preempt_disable(); + alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, + X86_FEATURE_USE_IBRS_FW); +} + +static inline void firmware_restrict_branch_speculation_end(void) +{ + alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, + X86_FEATURE_USE_IBRS_FW); + preempt_enable(); } #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index fea368ddcd85..b294fdc0faf2 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -300,6 +300,15 @@ static void __init spectre_v2_select_mitigation(void) setup_force_cpu_cap(X86_FEATURE_USE_IBPB); pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); } + + /* + * Retpoline means the kernel is safe because it has no indirect + * branches. But firmware isn't, so use IBRS to protect that. + */ + if (boot_cpu_has(X86_FEATURE_IBRS)) { + setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); + pr_info("Enabling Restricted Speculation for firmware calls\n"); + } } #undef pr_fmt @@ -326,8 +335,9 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return sprintf(buf, "Not affected\n"); - return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", spectre_v2_module_string()); } #endif diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index a0ac0f9c307f..f5a8cd96bae4 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -40,6 +40,7 @@ #include #include #include +#include /* * We allocate runtime services regions bottom-up, starting from -4G, i.e. @@ -347,6 +348,7 @@ extern efi_status_t efi64_thunk(u32, ...); \ efi_sync_low_kernel_mappings(); \ local_irq_save(flags); \ + firmware_restrict_branch_speculation_start(); \ \ efi_scratch.prev_cr3 = read_cr3(); \ write_cr3((unsigned long)efi_scratch.efi_pgt); \ @@ -357,6 +359,7 @@ extern efi_status_t efi64_thunk(u32, ...); \ write_cr3(efi_scratch.prev_cr3); \ __flush_tlb_all(); \ + firmware_restrict_branch_speculation_end(); \ local_irq_restore(flags); \ \ __s; \ From bdf186811576fdec0a42b554b884ed8ae2df54a2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 14 Jul 2018 02:32:43 -0700 Subject: [PATCH 042/519] x86/speculation: Move firmware_restrict_branch_speculation_*() from C to CPP commit d72f4e29e6d84b7ec02ae93088aa459ac70e733b upstream. firmware_restrict_branch_speculation_*() recently started using preempt_enable()/disable(), but those are relatively high level primitives and cause build failures on some 32-bit builds. Since we want to keep low level, convert them to macros to avoid header hell... Cc: David Woodhouse Cc: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Cc: arjan.van.de.ven@intel.com Cc: bp@alien8.de Cc: dave.hansen@intel.com Cc: jmattson@google.com Cc: karahmed@amazon.de Cc: kvm@vger.kernel.org Cc: pbonzini@redhat.com Cc: rkrcmar@redhat.com Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 36ded24ca381..b9dd1d9ef8af 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -214,20 +214,22 @@ static inline void indirect_branch_prediction_barrier(void) /* * With retpoline, we must use IBRS to restrict branch prediction * before calling into firmware. + * + * (Implemented as CPP macros due to header hell.) */ -static inline void firmware_restrict_branch_speculation_start(void) -{ - preempt_disable(); - alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, - X86_FEATURE_USE_IBRS_FW); -} +#define firmware_restrict_branch_speculation_start() \ +do { \ + preempt_disable(); \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, \ + X86_FEATURE_USE_IBRS_FW); \ +} while (0) -static inline void firmware_restrict_branch_speculation_end(void) -{ - alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, - X86_FEATURE_USE_IBRS_FW); - preempt_enable(); -} +#define firmware_restrict_branch_speculation_end() \ +do { \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, \ + X86_FEATURE_USE_IBRS_FW); \ + preempt_enable(); \ +} while (0) #endif /* __ASSEMBLY__ */ From 0990b1ff53bec71e4fdce971b6c4c5e1b7b91d01 Mon Sep 17 00:00:00 2001 From: Alexander Sergeyev Date: Sat, 14 Jul 2018 02:32:52 -0700 Subject: [PATCH 043/519] x86/speculation: Remove Skylake C2 from Speculation Control microcode blacklist commit e3b3121fa8da94cb20f9e0c64ab7981ae47fd085 upstream. In accordance with Intel's microcode revision guidance from March 6 MCU rev 0xc2 is cleared on both Skylake H/S and Skylake Xeon E3 processors that share CPUID 506E3. Signed-off-by: Alexander Sergeyev Signed-off-by: Thomas Gleixner Cc: Jia Zhang Cc: Greg Kroah-Hartman Cc: Kyle Huey Cc: David Woodhouse Link: https://lkml.kernel.org/r/20180313193856.GA8580@localhost.localdomain Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/intel.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index dcc03498cf10..77d9f6809b05 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -29,7 +29,7 @@ /* * Early microcode releases for the Spectre v2 mitigation were broken. * Information taken from; - * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf + * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/03/microcode-update-guidance.pdf * - https://kb.vmware.com/s/article/52345 * - Microcode revisions observed in the wild * - Release note from 20180108 microcode release @@ -47,7 +47,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = { { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x80 }, { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, - { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, From b4553a4ec2f64b818193ba85434b1f42c68d5aff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Sat, 14 Jul 2018 02:33:00 -0700 Subject: [PATCH 044/519] selftest/seccomp: Fix the flag name SECCOMP_FILTER_FLAG_TSYNC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6c045d07bb305c527140bdec4cf8ab50f7c980d8 upstream Rename SECCOMP_FLAG_FILTER_TSYNC to SECCOMP_FILTER_FLAG_TSYNC to match the UAPI. Signed-off-by: Mickaël Salaün Cc: Andy Lutomirski Cc: Kees Cook Cc: Shuah Khan Cc: Will Drewry Acked-by: Kees Cook Signed-off-by: Shuah Khan Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/seccomp/seccomp_bpf.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 882fe83a3554..d446346d9146 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1476,8 +1476,8 @@ TEST_F(TRACE_syscall, syscall_dropped) #define SECCOMP_SET_MODE_FILTER 1 #endif -#ifndef SECCOMP_FLAG_FILTER_TSYNC -#define SECCOMP_FLAG_FILTER_TSYNC 1 +#ifndef SECCOMP_FILTER_FLAG_TSYNC +#define SECCOMP_FILTER_FLAG_TSYNC 1 #endif #ifndef seccomp @@ -1592,7 +1592,7 @@ TEST(TSYNC_first) TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); } - ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &prog); ASSERT_NE(ENOSYS, errno) { TH_LOG("Kernel does not support seccomp syscall!"); @@ -1810,7 +1810,7 @@ TEST_F(TSYNC, two_siblings_with_ancestor) self->sibling_count++; } - ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &self->apply_prog); ASSERT_EQ(0, ret) { TH_LOG("Could install filter on all threads!"); @@ -1871,7 +1871,7 @@ TEST_F(TSYNC, two_siblings_with_no_filter) TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); } - ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &self->apply_prog); ASSERT_NE(ENOSYS, errno) { TH_LOG("Kernel does not support seccomp syscall!"); @@ -1919,7 +1919,7 @@ TEST_F(TSYNC, two_siblings_with_one_divergence) self->sibling_count++; } - ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &self->apply_prog); ASSERT_EQ(self->sibling[0].system_tid, ret) { TH_LOG("Did not fail on diverged sibling."); @@ -1971,7 +1971,7 @@ TEST_F(TSYNC, two_siblings_not_under_filter) TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); } - ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &self->apply_prog); ASSERT_EQ(ret, self->sibling[0].system_tid) { TH_LOG("Did not fail on diverged sibling."); @@ -2000,7 +2000,7 @@ TEST_F(TSYNC, two_siblings_not_under_filter) /* Switch to the remaining sibling */ sib = !sib; - ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &self->apply_prog); ASSERT_EQ(0, ret) { TH_LOG("Expected the remaining sibling to sync"); @@ -2023,7 +2023,7 @@ TEST_F(TSYNC, two_siblings_not_under_filter) while (!kill(self->sibling[sib].system_tid, 0)) sleep(0.1); - ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &self->apply_prog); ASSERT_EQ(0, ret); /* just us chickens */ } From 9f62897343da5fbbbd0db7441750ecc4c833a6ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Sat, 14 Jul 2018 02:33:08 -0700 Subject: [PATCH 045/519] selftest/seccomp: Fix the seccomp(2) signature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 505ce68c6da3432454c62e43c24a22ea5b1d754b upstream Signed-off-by: Mickaël Salaün Cc: Andy Lutomirski Cc: Kees Cook Cc: Shuah Khan Cc: Will Drewry Acked-by: Kees Cook Signed-off-by: Shuah Khan Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/seccomp/seccomp_bpf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index d446346d9146..29487e0437ad 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1481,10 +1481,10 @@ TEST_F(TRACE_syscall, syscall_dropped) #endif #ifndef seccomp -int seccomp(unsigned int op, unsigned int flags, struct sock_fprog *filter) +int seccomp(unsigned int op, unsigned int flags, void *args) { errno = 0; - return syscall(__NR_seccomp, op, flags, filter); + return syscall(__NR_seccomp, op, flags, args); } #endif From 237a1870da36fcd10f67503928c485d964726d83 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Sat, 14 Jul 2018 02:33:16 -0700 Subject: [PATCH 046/519] xen: set cpu capabilities from xen_start_kernel() Upstream commit: 0808e80cb760de2733c0527d2090ed2205a1eef8 ("xen: set cpu capabilities from xen_start_kernel()") There is no need to set the same capabilities for each cpu individually. This can easily be done for all cpus when starting the kernel. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/enlighten.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index cbef64b508e1..2d7ab4e23e9e 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -460,6 +460,14 @@ static void __init xen_init_cpuid_mask(void) cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32)); } +static void __init xen_init_capabilities(void) +{ + if (xen_pv_domain()) { + setup_clear_cpu_cap(X86_BUG_SYSRET_SS_ATTRS); + setup_force_cpu_cap(X86_FEATURE_XENPV); + } +} + static void xen_set_debugreg(int reg, unsigned long val) { HYPERVISOR_set_debugreg(reg, val); @@ -1587,6 +1595,7 @@ asmlinkage __visible void __init xen_start_kernel(void) xen_init_irq_ops(); xen_init_cpuid_mask(); + xen_init_capabilities(); #ifdef CONFIG_X86_LOCAL_APIC /* @@ -1883,14 +1892,6 @@ bool xen_hvm_need_lapic(void) } EXPORT_SYMBOL_GPL(xen_hvm_need_lapic); -static void xen_set_cpu_features(struct cpuinfo_x86 *c) -{ - if (xen_pv_domain()) { - clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); - set_cpu_cap(c, X86_FEATURE_XENPV); - } -} - const struct hypervisor_x86 x86_hyper_xen = { .name = "Xen", .detect = xen_platform, @@ -1898,7 +1899,6 @@ const struct hypervisor_x86 x86_hyper_xen = { .init_platform = xen_hvm_guest_init, #endif .x2apic_available = xen_x2apic_para_available, - .set_cpu_features = xen_set_cpu_features, }; EXPORT_SYMBOL(x86_hyper_xen); From e57a81c43ca5ea32bf28c4634e903e7a4a1cbbd3 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 14 Jul 2018 02:33:24 -0700 Subject: [PATCH 047/519] x86/amd: don't set X86_BUG_SYSRET_SS_ATTRS when running under Xen commit def9331a12977770cc6132d79f8e6565871e8e38 upstream When running as Xen pv guest X86_BUG_SYSRET_SS_ATTRS must not be set on AMD cpus. This bug/feature bit is kind of special as it will be used very early when switching threads. Setting the bit and clearing it a little bit later leaves a critical window where things can go wrong. This time window has enlarged a little bit by using setup_clear_cpu_cap() instead of the hypervisor's set_cpu_features callback. It seems this larger window now makes it rather easy to hit the problem. The proper solution is to never set the bit in case of Xen. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Acked-by: Thomas Gleixner Signed-off-by: Juergen Gross Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/amd.c | 5 +++-- arch/x86/xen/enlighten.c | 4 +--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f4fb8f5b0be4..9b2941422b68 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -791,8 +791,9 @@ static void init_amd(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM)) set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH); - /* AMD CPUs don't reset SS attributes on SYSRET */ - set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); + /* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */ + if (!cpu_has(c, X86_FEATURE_XENPV)) + set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 2d7ab4e23e9e..82fd84d5e1aa 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -462,10 +462,8 @@ static void __init xen_init_cpuid_mask(void) static void __init xen_init_capabilities(void) { - if (xen_pv_domain()) { - setup_clear_cpu_cap(X86_BUG_SYSRET_SS_ATTRS); + if (xen_pv_domain()) setup_force_cpu_cap(X86_FEATURE_XENPV); - } } static void xen_set_debugreg(int reg, unsigned long val) From b2dab2dc776cea8e1f190523456b32b850506ce3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 14 Jul 2018 02:33:32 -0700 Subject: [PATCH 048/519] x86/nospec: Simplify alternative_msr_write() commit 1aa7a5735a41418d8e01fa7c9565eb2657e2ea3f upstream The macro is not type safe and I did look for why that "g" constraint for the asm doesn't work: it's because the asm is more fundamentally wrong. It does movl %[val], %%eax but "val" isn't a 32-bit value, so then gcc will pass it in a register, and generate code like movl %rsi, %eax and gas will complain about a nonsensical 'mov' instruction (it's moving a 64-bit register to a 32-bit one). Passing it through memory will just hide the real bug - gcc still thinks the memory location is 64-bit, but the "movl" will only load the first 32 bits and it all happens to work because x86 is little-endian. Convert it to a type safe inline function with a little trick which hands the feature into the ALTERNATIVE macro. Signed-off-by: Linus Torvalds Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index b9dd1d9ef8af..6403016d3445 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -195,15 +195,16 @@ static inline void vmexit_fill_RSB(void) #endif } -#define alternative_msr_write(_msr, _val, _feature) \ - asm volatile(ALTERNATIVE("", \ - "movl %[msr], %%ecx\n\t" \ - "movl %[val], %%eax\n\t" \ - "movl $0, %%edx\n\t" \ - "wrmsr", \ - _feature) \ - : : [msr] "i" (_msr), [val] "i" (_val) \ - : "eax", "ecx", "edx", "memory") +static __always_inline +void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) +{ + asm volatile(ALTERNATIVE("", "wrmsr", %c[feature]) + : : "c" (msr), + "a" (val), + "d" (val >> 32), + [feature] "i" (feature) + : "memory"); +} static inline void indirect_branch_prediction_barrier(void) { From d77421663170a2d660fa63a50c664805d132e69d Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Sat, 14 Jul 2018 02:33:40 -0700 Subject: [PATCH 049/519] x86/bugs: Concentrate bug detection into a separate function commit 4a28bfe3267b68e22c663ac26185aa16c9b879ef upstream Combine the various logic which goes through all those x86_cpu_id matching structures in one function. Suggested-by: Borislav Petkov Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 48499b41351c..97558d1f9ef6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -835,21 +835,27 @@ static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { {} }; -static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c) +static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = 0; + if (x86_match_cpu(cpu_no_speculation)) + return; + + setup_force_cpu_bug(X86_BUG_SPECTRE_V1); + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + if (x86_match_cpu(cpu_no_meltdown)) - return false; + return; if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); /* Rogue Data Cache Load? No! */ if (ia32_cap & ARCH_CAP_RDCL_NO) - return false; + return; - return true; + setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); } /* @@ -898,12 +904,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_force_cpu_cap(X86_FEATURE_ALWAYS); - if (!x86_match_cpu(cpu_no_speculation)) { - if (cpu_vulnerable_to_meltdown(c)) - setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); - setup_force_cpu_bug(X86_BUG_SPECTRE_V1); - setup_force_cpu_bug(X86_BUG_SPECTRE_V2); - } + cpu_set_bug_bits(c); fpu__init_system(c); From 96df48c0c42c6816d5b2808ed9e18a428cbf9598 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Sat, 14 Jul 2018 02:33:49 -0700 Subject: [PATCH 050/519] x86/bugs: Concentrate bug reporting into a separate function commit d1059518b4789cabe34bb4b714d07e6089c82ca1 upstream Those SysFS functions have a similar preamble, as such make common code to handle them. Suggested-by: Borislav Petkov Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 48 ++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b294fdc0faf2..75f3d4974102 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -314,30 +314,48 @@ static void __init spectre_v2_select_mitigation(void) #undef pr_fmt #ifdef CONFIG_SYSFS + +ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, + char *buf, unsigned int bug) +{ + if (!boot_cpu_has_bug(bug)) + return sprintf(buf, "Not affected\n"); + + switch (bug) { + case X86_BUG_CPU_MELTDOWN: + if (boot_cpu_has(X86_FEATURE_KAISER)) + return sprintf(buf, "Mitigation: PTI\n"); + + break; + + case X86_BUG_SPECTRE_V1: + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); + + case X86_BUG_SPECTRE_V2: + return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + spectre_v2_module_string()); + + default: + break; + } + + return sprintf(buf, "Vulnerable\n"); +} + ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) { - if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) - return sprintf(buf, "Not affected\n"); - if (boot_cpu_has(X86_FEATURE_KAISER)) - return sprintf(buf, "Mitigation: PTI\n"); - return sprintf(buf, "Vulnerable\n"); + return cpu_show_common(dev, attr, buf, X86_BUG_CPU_MELTDOWN); } ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) { - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) - return sprintf(buf, "Not affected\n"); - return sprintf(buf, "Mitigation: __user pointer sanitization\n"); + return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V1); } ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) { - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - return sprintf(buf, "Not affected\n"); - - return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], - boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", - boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", - spectre_v2_module_string()); + return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V2); } #endif From 51f37b2f0248911465d8f84fb6f547be5316a261 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Sat, 14 Jul 2018 02:33:57 -0700 Subject: [PATCH 051/519] x86/bugs: Read SPEC_CTRL MSR during boot and re-use reserved bits commit 1b86883ccb8d5d9506529d42dbe1a5257cb30b18 upstream The 336996-Speculative-Execution-Side-Channel-Mitigations.pdf refers to all the other bits as reserved. The Intel SDM glossary defines reserved as implementation specific - aka unknown. As such at bootup this must be taken it into account and proper masking for the bits in use applied. A copy of this document is available at https://bugzilla.kernel.org/show_bug.cgi?id=199511 [ tglx: Made x86_spec_ctrl_base __ro_after_init ] [ Srivatsa: Removed __ro_after_init for 4.4.y ] Suggested-by: Jon Masters Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 24 ++++++++++++++++++++---- arch/x86/kernel/cpu/bugs.c | 27 +++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 6403016d3445..daec31829827 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -172,6 +172,17 @@ enum spectre_v2_mitigation { SPECTRE_V2_IBRS, }; +/* + * The Intel specification for the SPEC_CTRL MSR requires that we + * preserve any already set reserved bits at boot time (e.g. for + * future additions that this kernel is not currently aware of). + * We then set any additional mitigation bits that we want + * ourselves and always use this as the base for SPEC_CTRL. + * We also use this when handling guest entry/exit as below. + */ +extern void x86_spec_ctrl_set(u64); +extern u64 x86_spec_ctrl_get_default(void); + extern char __indirect_thunk_start[]; extern char __indirect_thunk_end[]; @@ -208,8 +219,9 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) static inline void indirect_branch_prediction_barrier(void) { - alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, - X86_FEATURE_USE_IBPB); + u64 val = PRED_CMD_IBPB; + + alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB); } /* @@ -220,14 +232,18 @@ static inline void indirect_branch_prediction_barrier(void) */ #define firmware_restrict_branch_speculation_start() \ do { \ + u64 val = x86_spec_ctrl_get_default() | SPEC_CTRL_IBRS; \ + \ preempt_disable(); \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ X86_FEATURE_USE_IBRS_FW); \ } while (0) #define firmware_restrict_branch_speculation_end() \ do { \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, \ + u64 val = x86_spec_ctrl_get_default(); \ + \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ X86_FEATURE_USE_IBRS_FW); \ preempt_enable(); \ } while (0) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 75f3d4974102..42c22042f863 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -27,6 +27,12 @@ static void __init spectre_v2_select_mitigation(void); +/* + * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any + * writes to SPEC_CTRL contain whatever reserved bits have been set. + */ +static u64 x86_spec_ctrl_base; + void __init check_bugs(void) { identify_boot_cpu(); @@ -36,6 +42,13 @@ void __init check_bugs(void) print_cpu_info(&boot_cpu_data); } + /* + * Read the SPEC_CTRL MSR to account for reserved bits which may + * have unknown values. + */ + if (boot_cpu_has(X86_FEATURE_IBRS)) + rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + /* Select the proper spectre mitigation before patching alternatives */ spectre_v2_select_mitigation(); @@ -94,6 +107,20 @@ static const char *spectre_v2_strings[] = { static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; +void x86_spec_ctrl_set(u64 val) +{ + if (val & ~SPEC_CTRL_IBRS) + WARN_ONCE(1, "SPEC_CTRL MSR value 0x%16llx is unknown.\n", val); + else + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base | val); +} +EXPORT_SYMBOL_GPL(x86_spec_ctrl_set); + +u64 x86_spec_ctrl_get_default(void) +{ + return x86_spec_ctrl_base; +} +EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); #ifdef RETPOLINE static bool spectre_v2_bad_module; From 3e1ec1698244de1b808ae0142dd653e5aded91d7 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Sat, 14 Jul 2018 02:34:05 -0700 Subject: [PATCH 052/519] x86/bugs, KVM: Support the combination of guest and host IBRS commit 5cf687548705412da47c9cec342fd952d71ed3d5 upstream A guest may modify the SPEC_CTRL MSR from the value used by the kernel. Since the kernel doesn't use IBRS, this means a value of zero is what is needed in the host. But the 336996-Speculative-Execution-Side-Channel-Mitigations.pdf refers to the other bits as reserved so the kernel should respect the boot time SPEC_CTRL value and use that. This allows to deal with future extensions to the SPEC_CTRL interface if any at all. Note: This uses wrmsrl() instead of native_wrmsl(). I does not make any difference as paravirt will over-write the callq *0xfff.. with the wrmsrl assembler code. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman [ Srivatsa: Backported to 4.4.y, skipping the KVM changes in this patch. ] Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 10 ++++++++++ arch/x86/kernel/cpu/bugs.c | 18 ++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index daec31829827..11db69a965dc 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -183,6 +183,16 @@ enum spectre_v2_mitigation { extern void x86_spec_ctrl_set(u64); extern u64 x86_spec_ctrl_get_default(void); +/* + * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR + * the guest has, while on VMEXIT we restore the host view. This + * would be easier if SPEC_CTRL were architecturally maskable or + * shadowable for guests but this is not (currently) the case. + * Takes the guest view of SPEC_CTRL MSR as a parameter. + */ +extern void x86_spec_ctrl_set_guest(u64); +extern void x86_spec_ctrl_restore_host(u64); + extern char __indirect_thunk_start[]; extern char __indirect_thunk_end[]; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 42c22042f863..e71e28154e1a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -122,6 +122,24 @@ u64 x86_spec_ctrl_get_default(void) } EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); +void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) +{ + if (!boot_cpu_has(X86_FEATURE_IBRS)) + return; + if (x86_spec_ctrl_base != guest_spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, guest_spec_ctrl); +} +EXPORT_SYMBOL_GPL(x86_spec_ctrl_set_guest); + +void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) +{ + if (!boot_cpu_has(X86_FEATURE_IBRS)) + return; + if (x86_spec_ctrl_base != guest_spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); +} +EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host); + #ifdef RETPOLINE static bool spectre_v2_bad_module; From 498efb90b8ad36de4a51a2298887acbfc3cab616 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Sat, 14 Jul 2018 02:34:13 -0700 Subject: [PATCH 053/519] x86/cpu: Rename Merrifield2 to Moorefield commit f5fbf848303c8704d0e1a1e7cabd08fd0a49552f upstream Merrifield2 is actually Moorefield. Rename it accordingly and drop tail digit from Merrifield1. Signed-off-by: Andy Shevchenko Cc: Dave Hansen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160906184254.94440-1-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/intel-family.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 12fa187865c2..0b27c1ebd731 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -58,8 +58,8 @@ #define INTEL_FAM6_ATOM_SILVERMONT1 0x37 /* BayTrail/BYT / Valleyview */ #define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */ #define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */ -#define INTEL_FAM6_ATOM_MERRIFIELD1 0x4A /* Tangier */ -#define INTEL_FAM6_ATOM_MERRIFIELD2 0x5A /* Annidale */ +#define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */ +#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Annidale */ #define INTEL_FAM6_ATOM_GOLDMONT 0x5C #define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */ #define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A From 05f8bae8e6b68ee20a26ec6a0683b1bcc31aef28 Mon Sep 17 00:00:00 2001 From: Piotr Luc Date: Sat, 14 Jul 2018 02:34:22 -0700 Subject: [PATCH 054/519] x86/cpu/intel: Add Knights Mill to Intel family commit 0047f59834e5947d45f34f5f12eb330d158f700b upstream Add CPUID of Knights Mill (KNM) processor to Intel family list. Signed-off-by: Piotr Luc Reviewed-by: Dave Hansen Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20161012180520.30976-1-piotr.luc@intel.com Signed-off-by: Ingo Molnar Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/intel-family.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 0b27c1ebd731..e13ff5a14633 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -67,5 +67,6 @@ /* Xeon Phi */ #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ +#define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */ #endif /* _ASM_X86_INTEL_FAMILY_H */ From d8067aba239cbd2bfd64cdd548a914b20c58d189 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Sat, 14 Jul 2018 02:34:31 -0700 Subject: [PATCH 055/519] x86/bugs: Expose /sys/../spec_store_bypass commit c456442cd3a59eeb1d60293c26cbe2ff2c4e42cf upstream Add the sysfs file for the new vulerability. It does not do much except show the words 'Vulnerable' for recent x86 cores. Intel cores prior to family 6 are known not to be vulnerable, and so are some Atoms and some Xeon Phi. It assumes that older Cyrix, Centaur, etc. cores are immune. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- .../ABI/testing/sysfs-devices-system-cpu | 1 + arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/bugs.c | 5 ++++ arch/x86/kernel/cpu/common.c | 23 +++++++++++++++++++ drivers/base/cpu.c | 8 +++++++ include/linux/cpu.h | 2 ++ 6 files changed, 40 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index ea6a043f5beb..50f95689ab38 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -276,6 +276,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/meltdown /sys/devices/system/cpu/vulnerabilities/spectre_v1 /sys/devices/system/cpu/vulnerabilities/spectre_v2 + /sys/devices/system/cpu/vulnerabilities/spec_store_bypass Date: January 2018 Contact: Linux kernel mailing list Description: Information about CPU vulnerabilities diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index bc76bf39bb2f..08cf4f7a3f7d 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -315,5 +315,6 @@ #define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ #define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ #define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ +#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e71e28154e1a..0ad13b10afcc 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -403,4 +403,9 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c { return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V2); } + +ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 97558d1f9ef6..eb78ddf13f44 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -835,10 +835,33 @@ static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { {} }; +static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = { + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, + { X86_VENDOR_CENTAUR, 5, }, + { X86_VENDOR_INTEL, 5, }, + { X86_VENDOR_NSC, 5, }, + { X86_VENDOR_ANY, 4, }, + {} +}; + static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = 0; + if (!x86_match_cpu(cpu_no_spec_store_bypass)) + setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); + if (x86_match_cpu(cpu_no_speculation)) return; diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 3db71afbba93..143edea1076f 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -518,14 +518,22 @@ ssize_t __weak cpu_show_spectre_v2(struct device *dev, return sprintf(buf, "Not affected\n"); } +ssize_t __weak cpu_show_spec_store_bypass(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Not affected\n"); +} + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); +static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, &dev_attr_spectre_v1.attr, &dev_attr_spectre_v2.attr, + &dev_attr_spec_store_bypass.attr, NULL }; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 7e04bcd9af8e..2f9d12022100 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -46,6 +46,8 @@ extern ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_spec_store_bypass(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, From 1cdf94bc21610ffbabedd5b6d85700ed1017037d Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Sat, 14 Jul 2018 02:34:39 -0700 Subject: [PATCH 056/519] x86/cpufeatures: Add X86_FEATURE_RDS commit 0cc5fa00b0a88dad140b4e5c2cead9951ad36822 upstream Add the CPU feature bit CPUID.7.0.EDX[31] which indicates whether the CPU supports Reduced Data Speculation. [ tglx: Split it out from a later patch ] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 08cf4f7a3f7d..3fce65d4de78 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -297,6 +297,7 @@ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ +#define X86_FEATURE_RDS (18*32+31) /* Reduced Data Speculation */ /* * BUG word(s) From 46ea6e547d0595f88086bc56c2f032b0e2f3f9ac Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Sat, 14 Jul 2018 02:34:47 -0700 Subject: [PATCH 057/519] x86/bugs: Provide boot parameters for the spec_store_bypass_disable mitigation commit 24f7fc83b9204d20f878c57cb77d261ae825e033 upstream Contemporary high performance processors use a common industry-wide optimization known as "Speculative Store Bypass" in which loads from addresses to which a recent store has occurred may (speculatively) see an older value. Intel refers to this feature as "Memory Disambiguation" which is part of their "Smart Memory Access" capability. Memory Disambiguation can expose a cache side-channel attack against such speculatively read values. An attacker can create exploit code that allows them to read memory outside of a sandbox environment (for example, malicious JavaScript in a web page), or to perform more complex attacks against code running within the same privilege level, e.g. via the stack. As a first step to mitigate against such attacks, provide two boot command line control knobs: nospec_store_bypass_disable spec_store_bypass_disable=[off,auto,on] By default affected x86 processors will power on with Speculative Store Bypass enabled. Hence the provided kernel parameters are written from the point of view of whether to enable a mitigation or not. The parameters are as follows: - auto - Kernel detects whether your CPU model contains an implementation of Speculative Store Bypass and picks the most appropriate mitigation. - on - disable Speculative Store Bypass - off - enable Speculative Store Bypass [ tglx: Reordered the checks so that the whole evaluation is not done when the CPU does not support RDS ] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 33 +++++++++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/nospec-branch.h | 6 ++ arch/x86/kernel/cpu/bugs.c | 103 +++++++++++++++++++++++++++ 4 files changed, 143 insertions(+) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index e60d0b5809c1..dc138b8d9ecb 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2460,6 +2460,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. allow data leaks with this option, which is equivalent to spectre_v2=off. + nospec_store_bypass_disable + [HW] Disable all mitigations for the Speculative Store Bypass vulnerability + noxsave [BUGS=X86] Disables x86 extended register state save and restore using xsave. The kernel will fallback to enabling legacy floating-point and sse state. @@ -3623,6 +3626,36 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Not specifying this option is equivalent to spectre_v2=auto. + spec_store_bypass_disable= + [HW] Control Speculative Store Bypass (SSB) Disable mitigation + (Speculative Store Bypass vulnerability) + + Certain CPUs are vulnerable to an exploit against a + a common industry wide performance optimization known + as "Speculative Store Bypass" in which recent stores + to the same memory location may not be observed by + later loads during speculative execution. The idea + is that such stores are unlikely and that they can + be detected prior to instruction retirement at the + end of a particular speculation execution window. + + In vulnerable processors, the speculatively forwarded + store can be used in a cache side channel attack, for + example to read memory to which the attacker does not + directly have access (e.g. inside sandboxed code). + + This parameter controls whether the Speculative Store + Bypass optimization is used. + + on - Unconditionally disable Speculative Store Bypass + off - Unconditionally enable Speculative Store Bypass + auto - Kernel detects whether the CPU model contains an + implementation of Speculative Store Bypass and + picks the most appropriate mitigation + + Not specifying this option is equivalent to + spec_store_bypass_disable=auto. + spia_io_base= [HW,MTD] spia_fio_base= spia_pedr= diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 3fce65d4de78..9510f5f075c6 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -203,6 +203,7 @@ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled*/ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ +#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 11db69a965dc..c786d01faf51 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -193,6 +193,12 @@ extern u64 x86_spec_ctrl_get_default(void); extern void x86_spec_ctrl_set_guest(u64); extern void x86_spec_ctrl_restore_host(u64); +/* The Speculative Store Bypass disable variants */ +enum ssb_mitigation { + SPEC_STORE_BYPASS_NONE, + SPEC_STORE_BYPASS_DISABLE, +}; + extern char __indirect_thunk_start[]; extern char __indirect_thunk_end[]; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 0ad13b10afcc..826aa81a1b37 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -26,6 +26,7 @@ #include static void __init spectre_v2_select_mitigation(void); +static void __init ssb_select_mitigation(void); /* * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any @@ -52,6 +53,12 @@ void __init check_bugs(void) /* Select the proper spectre mitigation before patching alternatives */ spectre_v2_select_mitigation(); + /* + * Select proper mitigation for any exposure to the Speculative Store + * Bypass vulnerability. + */ + ssb_select_mitigation(); + #ifdef CONFIG_X86_32 /* * Check whether we are able to run this kernel safely on SMP. @@ -356,6 +363,99 @@ static void __init spectre_v2_select_mitigation(void) } } +#undef pr_fmt +#define pr_fmt(fmt) "Speculative Store Bypass: " fmt + +static enum ssb_mitigation ssb_mode = SPEC_STORE_BYPASS_NONE; + +/* The kernel command line selection */ +enum ssb_mitigation_cmd { + SPEC_STORE_BYPASS_CMD_NONE, + SPEC_STORE_BYPASS_CMD_AUTO, + SPEC_STORE_BYPASS_CMD_ON, +}; + +static const char *ssb_strings[] = { + [SPEC_STORE_BYPASS_NONE] = "Vulnerable", + [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled" +}; + +static const struct { + const char *option; + enum ssb_mitigation_cmd cmd; +} ssb_mitigation_options[] = { + { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ + { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ + { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ +}; + +static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void) +{ + enum ssb_mitigation_cmd cmd = SPEC_STORE_BYPASS_CMD_AUTO; + char arg[20]; + int ret, i; + + if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) { + return SPEC_STORE_BYPASS_CMD_NONE; + } else { + ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable", + arg, sizeof(arg)); + if (ret < 0) + return SPEC_STORE_BYPASS_CMD_AUTO; + + for (i = 0; i < ARRAY_SIZE(ssb_mitigation_options); i++) { + if (!match_option(arg, ret, ssb_mitigation_options[i].option)) + continue; + + cmd = ssb_mitigation_options[i].cmd; + break; + } + + if (i >= ARRAY_SIZE(ssb_mitigation_options)) { + pr_err("unknown option (%s). Switching to AUTO select\n", arg); + return SPEC_STORE_BYPASS_CMD_AUTO; + } + } + + return cmd; +} + +static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) +{ + enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE; + enum ssb_mitigation_cmd cmd; + + if (!boot_cpu_has(X86_FEATURE_RDS)) + return mode; + + cmd = ssb_parse_cmdline(); + if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS) && + (cmd == SPEC_STORE_BYPASS_CMD_NONE || + cmd == SPEC_STORE_BYPASS_CMD_AUTO)) + return mode; + + switch (cmd) { + case SPEC_STORE_BYPASS_CMD_AUTO: + case SPEC_STORE_BYPASS_CMD_ON: + mode = SPEC_STORE_BYPASS_DISABLE; + break; + case SPEC_STORE_BYPASS_CMD_NONE: + break; + } + + if (mode != SPEC_STORE_BYPASS_NONE) + setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE); + return mode; +} + +static void ssb_select_mitigation() +{ + ssb_mode = __ssb_select_mitigation(); + + if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) + pr_info("%s\n", ssb_strings[ssb_mode]); +} + #undef pr_fmt #ifdef CONFIG_SYSFS @@ -382,6 +482,9 @@ ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", spectre_v2_module_string()); + case X86_BUG_SPEC_STORE_BYPASS: + return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); + default: break; } From 7dc950c1ce909c11c3985802b1aba6b655d8dc23 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Sat, 14 Jul 2018 02:34:55 -0700 Subject: [PATCH 058/519] x86/bugs/intel: Set proper CPU features and setup RDS commit 772439717dbf703b39990be58d8d4e3e4ad0598a upstream Intel CPUs expose methods to: - Detect whether RDS capability is available via CPUID.7.0.EDX[31], - The SPEC_CTRL MSR(0x48), bit 2 set to enable RDS. - MSR_IA32_ARCH_CAPABILITIES, Bit(4) no need to enable RRS. With that in mind if spec_store_bypass_disable=[auto,on] is selected set at boot-time the SPEC_CTRL MSR to enable RDS if the platform requires it. Note that this does not fix the KVM case where the SPEC_CTRL is exposed to guests which can muck with it, see patch titled : KVM/SVM/VMX/x86/spectre_v2: Support the combination of guest and host IBRS. And for the firmware (IBRS to be set), see patch titled: x86/spectre_v2: Read SPEC_CTRL MSR during boot and re-use reserved bits [ tglx: Distangled it from the intel implementation and kept the call order ] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 6 ++++++ arch/x86/kernel/cpu/bugs.c | 30 ++++++++++++++++++++++++++++-- arch/x86/kernel/cpu/common.c | 10 ++++++---- arch/x86/kernel/cpu/cpu.h | 3 +++ arch/x86/kernel/cpu/intel.c | 1 + 5 files changed, 44 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index f4701f0e613a..a29edb723431 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -35,6 +35,7 @@ #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ #define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ #define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ +#define SPEC_CTRL_RDS (1 << 2) /* Reduced Data Speculation */ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ @@ -56,6 +57,11 @@ #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a #define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ #define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ +#define ARCH_CAP_RDS_NO (1 << 4) /* + * Not susceptible to Speculative Store Bypass + * attack, so no Reduced Data Speculation control + * required. + */ #define MSR_IA32_BBL_CR_CTL 0x00000119 #define MSR_IA32_BBL_CR_CTL3 0x0000011e diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 826aa81a1b37..56b84a5506d7 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -116,7 +116,7 @@ static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; void x86_spec_ctrl_set(u64 val) { - if (val & ~SPEC_CTRL_IBRS) + if (val & ~(SPEC_CTRL_IBRS | SPEC_CTRL_RDS)) WARN_ONCE(1, "SPEC_CTRL MSR value 0x%16llx is unknown.\n", val); else wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base | val); @@ -443,8 +443,28 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) break; } - if (mode != SPEC_STORE_BYPASS_NONE) + /* + * We have three CPU feature flags that are in play here: + * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. + * - X86_FEATURE_RDS - CPU is able to turn off speculative store bypass + * - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation + */ + if (mode != SPEC_STORE_BYPASS_NONE) { setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE); + /* + * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses + * a completely different MSR and bit dependent on family. + */ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + x86_spec_ctrl_base |= SPEC_CTRL_RDS; + x86_spec_ctrl_set(SPEC_CTRL_RDS); + break; + case X86_VENDOR_AMD: + break; + } + } + return mode; } @@ -458,6 +478,12 @@ static void ssb_select_mitigation() #undef pr_fmt +void x86_spec_ctrl_setup_ap(void) +{ + if (boot_cpu_has(X86_FEATURE_IBRS)) + x86_spec_ctrl_set(x86_spec_ctrl_base & (SPEC_CTRL_IBRS | SPEC_CTRL_RDS)); +} + #ifdef CONFIG_SYSFS ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index eb78ddf13f44..2f1d403cb0ea 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -859,7 +859,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = 0; - if (!x86_match_cpu(cpu_no_spec_store_bypass)) + if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); + + if (!x86_match_cpu(cpu_no_spec_store_bypass) && + !(ia32_cap & ARCH_CAP_RDS_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); if (x86_match_cpu(cpu_no_speculation)) @@ -871,9 +875,6 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (x86_match_cpu(cpu_no_meltdown)) return; - if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); - /* Rogue Data Cache Load? No! */ if (ia32_cap & ARCH_CAP_RDCL_NO) return; @@ -1216,6 +1217,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c) enable_sep_cpu(); #endif mtrr_ap_init(); + x86_spec_ctrl_setup_ap(); } struct msr_range { diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 2584265d4745..3b19d82f7932 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -46,4 +46,7 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[], extern void get_cpu_cap(struct cpuinfo_x86 *c); extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); + +extern void x86_spec_ctrl_setup_ap(void); + #endif /* ARCH_X86_CPU_H */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 77d9f6809b05..ac25d1e5e8e8 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -119,6 +119,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) setup_clear_cpu_cap(X86_FEATURE_STIBP); setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); + setup_clear_cpu_cap(X86_FEATURE_RDS); } /* From d9a58c4316857347b0ef77e94bde43379c87a746 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Sat, 14 Jul 2018 02:35:03 -0700 Subject: [PATCH 059/519] x86/bugs: Whitelist allowed SPEC_CTRL MSR values commit 1115a859f33276fe8afb31c60cf9d8e657872558 upstream Intel and AMD SPEC_CTRL (0x48) MSR semantics may differ in the future (or in fact use different MSRs for the same functionality). As such a run-time mechanism is required to whitelist the appropriate MSR values. [ tglx: Made the variable __ro_after_init ] [ Srivatsa: Removed __ro_after_init for 4.4.y ] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 56b84a5506d7..c37e2110d383 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -34,6 +34,12 @@ static void __init ssb_select_mitigation(void); */ static u64 x86_spec_ctrl_base; +/* + * The vendor and possibly platform specific bits which can be modified in + * x86_spec_ctrl_base. + */ +static u64 x86_spec_ctrl_mask = ~SPEC_CTRL_IBRS; + void __init check_bugs(void) { identify_boot_cpu(); @@ -116,7 +122,7 @@ static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; void x86_spec_ctrl_set(u64 val) { - if (val & ~(SPEC_CTRL_IBRS | SPEC_CTRL_RDS)) + if (val & x86_spec_ctrl_mask) WARN_ONCE(1, "SPEC_CTRL MSR value 0x%16llx is unknown.\n", val); else wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base | val); @@ -458,6 +464,7 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_INTEL: x86_spec_ctrl_base |= SPEC_CTRL_RDS; + x86_spec_ctrl_mask &= ~SPEC_CTRL_RDS; x86_spec_ctrl_set(SPEC_CTRL_RDS); break; case X86_VENDOR_AMD: @@ -481,7 +488,7 @@ static void ssb_select_mitigation() void x86_spec_ctrl_setup_ap(void) { if (boot_cpu_has(X86_FEATURE_IBRS)) - x86_spec_ctrl_set(x86_spec_ctrl_base & (SPEC_CTRL_IBRS | SPEC_CTRL_RDS)); + x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask); } #ifdef CONFIG_SYSFS From ec5bf1a308faac133951877c8b5fbbb0413529cb Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 14 Jul 2018 02:35:12 -0700 Subject: [PATCH 060/519] x86/bugs/AMD: Add support to disable RDS on Fam[15, 16, 17]h if requested commit 764f3c21588a059cd783c6ba0734d4db2d72822d upstream AMD does not need the Speculative Store Bypass mitigation to be enabled. The parameters for this are already available and can be done via MSR C001_1020. Each family uses a different bit in that MSR for this. [ tglx: Expose the bit mask via a variable and move the actual MSR fiddling into the bugs code as that's the right thing to do and also required to prepare for dynamic enable/disable ] [ Srivatsa: Removed __ro_after_init for 4.4.y ] Suggested-by: Borislav Petkov Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/nospec-branch.h | 4 ++++ arch/x86/kernel/cpu/amd.c | 26 ++++++++++++++++++++++++++ arch/x86/kernel/cpu/bugs.c | 27 ++++++++++++++++++++++++++- arch/x86/kernel/cpu/common.c | 4 ++++ 5 files changed, 61 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 9510f5f075c6..b7cdd1c05132 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -204,6 +204,7 @@ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled*/ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ +#define X86_FEATURE_AMD_RDS (7*32+24) /* "" AMD RDS implementation */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index c786d01faf51..ac2fdc9666e4 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -199,6 +199,10 @@ enum ssb_mitigation { SPEC_STORE_BYPASS_DISABLE, }; +/* AMD specific Speculative Store Bypass MSR data */ +extern u64 x86_amd_ls_cfg_base; +extern u64 x86_amd_ls_cfg_rds_mask; + extern char __indirect_thunk_start[]; extern char __indirect_thunk_end[]; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 9b2941422b68..4452f387ed32 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -519,6 +520,26 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_MWAITX)) use_mwaitx_delay(); + + if (c->x86 >= 0x15 && c->x86 <= 0x17) { + unsigned int bit; + + switch (c->x86) { + case 0x15: bit = 54; break; + case 0x16: bit = 33; break; + case 0x17: bit = 10; break; + default: return; + } + /* + * Try to cache the base value so further operations can + * avoid RMW. If that faults, do not enable RDS. + */ + if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) { + setup_force_cpu_cap(X86_FEATURE_RDS); + setup_force_cpu_cap(X86_FEATURE_AMD_RDS); + x86_amd_ls_cfg_rds_mask = 1ULL << bit; + } + } } static void early_init_amd(struct cpuinfo_x86 *c) @@ -794,6 +815,11 @@ static void init_amd(struct cpuinfo_x86 *c) /* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */ if (!cpu_has(c, X86_FEATURE_XENPV)) set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); + + if (boot_cpu_has(X86_FEATURE_AMD_RDS)) { + set_cpu_cap(c, X86_FEATURE_RDS); + set_cpu_cap(c, X86_FEATURE_AMD_RDS); + } } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c37e2110d383..b8911afbd6b8 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -40,6 +40,13 @@ static u64 x86_spec_ctrl_base; */ static u64 x86_spec_ctrl_mask = ~SPEC_CTRL_IBRS; +/* + * AMD specific MSR info for Speculative Store Bypass control. + * x86_amd_ls_cfg_rds_mask is initialized in identify_boot_cpu(). + */ +u64 x86_amd_ls_cfg_base; +u64 x86_amd_ls_cfg_rds_mask; + void __init check_bugs(void) { identify_boot_cpu(); @@ -51,7 +58,8 @@ void __init check_bugs(void) /* * Read the SPEC_CTRL MSR to account for reserved bits which may - * have unknown values. + * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD + * init code as it is not enumerated and depends on the family. */ if (boot_cpu_has(X86_FEATURE_IBRS)) rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); @@ -153,6 +161,14 @@ void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) } EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host); +static void x86_amd_rds_enable(void) +{ + u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_rds_mask; + + if (boot_cpu_has(X86_FEATURE_AMD_RDS)) + wrmsrl(MSR_AMD64_LS_CFG, msrval); +} + #ifdef RETPOLINE static bool spectre_v2_bad_module; @@ -442,6 +458,11 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) switch (cmd) { case SPEC_STORE_BYPASS_CMD_AUTO: + /* + * AMD platforms by default don't need SSB mitigation. + */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + break; case SPEC_STORE_BYPASS_CMD_ON: mode = SPEC_STORE_BYPASS_DISABLE; break; @@ -468,6 +489,7 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) x86_spec_ctrl_set(SPEC_CTRL_RDS); break; case X86_VENDOR_AMD: + x86_amd_rds_enable(); break; } } @@ -489,6 +511,9 @@ void x86_spec_ctrl_setup_ap(void) { if (boot_cpu_has(X86_FEATURE_IBRS)) x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask); + + if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) + x86_amd_rds_enable(); } #ifdef CONFIG_SYSFS diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2f1d403cb0ea..7405c8653f7a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -851,6 +851,10 @@ static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = { { X86_VENDOR_CENTAUR, 5, }, { X86_VENDOR_INTEL, 5, }, { X86_VENDOR_NSC, 5, }, + { X86_VENDOR_AMD, 0x12, }, + { X86_VENDOR_AMD, 0x11, }, + { X86_VENDOR_AMD, 0x10, }, + { X86_VENDOR_AMD, 0xf, }, { X86_VENDOR_ANY, 4, }, {} }; From 49d8e36618f7524611409b8608dd54d399e7097f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 14 Jul 2018 02:35:20 -0700 Subject: [PATCH 061/519] x86/speculation: Create spec-ctrl.h to avoid include hell commit 28a2775217b17208811fa43a9e96bd1fdf417b86 upstream Having everything in nospec-branch.h creates a hell of dependencies when adding the prctl based switching mechanism. Move everything which is not required in nospec-branch.h to spec-ctrl.h and fix up the includes in the relevant files. Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Reviewed-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 14 -------------- arch/x86/include/asm/spec-ctrl.h | 21 +++++++++++++++++++++ arch/x86/kernel/cpu/amd.c | 2 +- arch/x86/kernel/cpu/bugs.c | 2 +- arch/x86/kvm/svm.c | 2 +- arch/x86/kvm/vmx.c | 2 +- 6 files changed, 25 insertions(+), 18 deletions(-) create mode 100644 arch/x86/include/asm/spec-ctrl.h diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index ac2fdc9666e4..47c454c29535 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -183,26 +183,12 @@ enum spectre_v2_mitigation { extern void x86_spec_ctrl_set(u64); extern u64 x86_spec_ctrl_get_default(void); -/* - * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR - * the guest has, while on VMEXIT we restore the host view. This - * would be easier if SPEC_CTRL were architecturally maskable or - * shadowable for guests but this is not (currently) the case. - * Takes the guest view of SPEC_CTRL MSR as a parameter. - */ -extern void x86_spec_ctrl_set_guest(u64); -extern void x86_spec_ctrl_restore_host(u64); - /* The Speculative Store Bypass disable variants */ enum ssb_mitigation { SPEC_STORE_BYPASS_NONE, SPEC_STORE_BYPASS_DISABLE, }; -/* AMD specific Speculative Store Bypass MSR data */ -extern u64 x86_amd_ls_cfg_base; -extern u64 x86_amd_ls_cfg_rds_mask; - extern char __indirect_thunk_start[]; extern char __indirect_thunk_end[]; diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h new file mode 100644 index 000000000000..3ad64420a06e --- /dev/null +++ b/arch/x86/include/asm/spec-ctrl.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SPECCTRL_H_ +#define _ASM_X86_SPECCTRL_H_ + +#include + +/* + * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR + * the guest has, while on VMEXIT we restore the host view. This + * would be easier if SPEC_CTRL were architecturally maskable or + * shadowable for guests but this is not (currently) the case. + * Takes the guest view of SPEC_CTRL MSR as a parameter. + */ +extern void x86_spec_ctrl_set_guest(u64); +extern void x86_spec_ctrl_restore_host(u64); + +/* AMD specific Speculative Store Bypass MSR data */ +extern u64 x86_amd_ls_cfg_base; +extern u64 x86_amd_ls_cfg_rds_mask; + +#endif diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 4452f387ed32..14e984916f2f 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b8911afbd6b8..47a3cc08a889 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -12,7 +12,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 42654375b73f..df7827a981dd 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include "trace.h" diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 63c44a9bf6bb..18143886b186 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -48,7 +48,7 @@ #include #include #include -#include +#include #include "trace.h" #include "pmu.h" From 13fa2c65c9a8c2cd5f2a9799891582c40b6f5cfa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 14 Jul 2018 02:35:28 -0700 Subject: [PATCH 062/519] prctl: Add speculation control prctls commit b617cfc858161140d69cc0b5cc211996b557a1c7 upstream Add two new prctls to control aspects of speculation related vulnerabilites and their mitigations to provide finer grained control over performance impacting mitigations. PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature which is selected with arg2 of prctl(2). The return value uses bit 0-2 with the following meaning: Bit Define Description 0 PR_SPEC_PRCTL Mitigation can be controlled per task by PR_SET_SPECULATION_CTRL 1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is disabled 2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is enabled If all bits are 0 the CPU is not affected by the speculation misfeature. If PR_SPEC_PRCTL is set, then the per task control of the mitigation is available. If not set, prctl(PR_SET_SPECULATION_CTRL) for the speculation misfeature will fail. PR_SET_SPECULATION_CTRL allows to control the speculation misfeature, which is selected by arg2 of prctl(2) per task. arg3 is used to hand in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE. The common return values are: EINVAL prctl is not implemented by the architecture or the unused prctl() arguments are not 0 ENODEV arg2 is selecting a not supported speculation misfeature PR_SET_SPECULATION_CTRL has these additional return values: ERANGE arg3 is incorrect, i.e. it's not either PR_SPEC_ENABLE or PR_SPEC_DISABLE ENXIO prctl control of the selected speculation misfeature is disabled The first supported controlable speculation misfeature is PR_SPEC_STORE_BYPASS. Add the define so this can be shared between architectures. Based on an initial patch from Tim Chen and mostly rewritten. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- Documentation/spec_ctrl.txt | 86 +++++++++++++++++++++++++++++++++++++ include/linux/nospec.h | 5 +++ include/uapi/linux/prctl.h | 11 +++++ kernel/sys.c | 20 +++++++++ 4 files changed, 122 insertions(+) create mode 100644 Documentation/spec_ctrl.txt diff --git a/Documentation/spec_ctrl.txt b/Documentation/spec_ctrl.txt new file mode 100644 index 000000000000..ddbebcd01208 --- /dev/null +++ b/Documentation/spec_ctrl.txt @@ -0,0 +1,86 @@ +=================== +Speculation Control +=================== + +Quite some CPUs have speculation related misfeatures which are in fact +vulnerabilites causing data leaks in various forms even accross privilege +domains. + +The kernel provides mitigation for such vulnerabilities in various +forms. Some of these mitigations are compile time configurable and some on +the kernel command line. + +There is also a class of mitigations which are very expensive, but they can +be restricted to a certain set of processes or tasks in controlled +environments. The mechanism to control these mitigations is via +:manpage:`prctl(2)`. + +There are two prctl options which are related to this: + + * PR_GET_SPECULATION_CTRL + + * PR_SET_SPECULATION_CTRL + +PR_GET_SPECULATION_CTRL +----------------------- + +PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature +which is selected with arg2 of prctl(2). The return value uses bits 0-2 with +the following meaning: + +==== ================ =================================================== +Bit Define Description +==== ================ =================================================== +0 PR_SPEC_PRCTL Mitigation can be controlled per task by + PR_SET_SPECULATION_CTRL +1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is + disabled +2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is + enabled +==== ================ =================================================== + +If all bits are 0 the CPU is not affected by the speculation misfeature. + +If PR_SPEC_PRCTL is set, then the per task control of the mitigation is +available. If not set, prctl(PR_SET_SPECULATION_CTRL) for the speculation +misfeature will fail. + +PR_SET_SPECULATION_CTRL +----------------------- +PR_SET_SPECULATION_CTRL allows to control the speculation misfeature, which +is selected by arg2 of :manpage:`prctl(2)` per task. arg3 is used to hand +in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE. + +Common error codes +------------------ +======= ================================================================= +Value Meaning +======= ================================================================= +EINVAL The prctl is not implemented by the architecture or unused + prctl(2) arguments are not 0 + +ENODEV arg2 is selecting a not supported speculation misfeature +======= ================================================================= + +PR_SET_SPECULATION_CTRL error codes +----------------------------------- +======= ================================================================= +Value Meaning +======= ================================================================= +0 Success + +ERANGE arg3 is incorrect, i.e. it's neither PR_SPEC_ENABLE nor + PR_SPEC_DISABLE + +ENXIO Control of the selected speculation misfeature is not possible. + See PR_GET_SPECULATION_CTRL. +======= ================================================================= + +Speculation misfeature controls +------------------------------- +- PR_SPEC_STORE_BYPASS: Speculative Store Bypass + + Invocations: + * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, 0, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0); diff --git a/include/linux/nospec.h b/include/linux/nospec.h index e791ebc65c9c..700bb8a4e4ea 100644 --- a/include/linux/nospec.h +++ b/include/linux/nospec.h @@ -55,4 +55,9 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, \ (typeof(_i)) (_i & _mask); \ }) + +/* Speculation control prctl */ +int arch_prctl_spec_ctrl_get(unsigned long which); +int arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl); + #endif /* _LINUX_NOSPEC_H */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index a8d0759a9e40..3b316be71c56 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -197,4 +197,15 @@ struct prctl_mm_map { # define PR_CAP_AMBIENT_LOWER 3 # define PR_CAP_AMBIENT_CLEAR_ALL 4 +/* Per task speculation control */ +#define PR_GET_SPECULATION_CTRL 52 +#define PR_SET_SPECULATION_CTRL 53 +/* Speculation control variants */ +# define PR_SPEC_STORE_BYPASS 0 +/* Return and control values for PR_SET/GET_SPECULATION_CTRL */ +# define PR_SPEC_NOT_AFFECTED 0 +# define PR_SPEC_PRCTL (1UL << 0) +# define PR_SPEC_ENABLE (1UL << 1) +# define PR_SPEC_DISABLE (1UL << 2) + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/sys.c b/kernel/sys.c index 6624919ef0e7..d80c33f9aff7 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2075,6 +2075,16 @@ static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) } #endif +int __weak arch_prctl_spec_ctrl_get(unsigned long which) +{ + return -EINVAL; +} + +int __weak arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl) +{ + return -EINVAL; +} + SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5) { @@ -2269,6 +2279,16 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_GET_FP_MODE: error = GET_FP_MODE(me); break; + case PR_GET_SPECULATION_CTRL: + if (arg3 || arg4 || arg5) + return -EINVAL; + error = arch_prctl_spec_ctrl_get(arg2); + break; + case PR_SET_SPECULATION_CTRL: + if (arg4 || arg5) + return -EINVAL; + error = arch_prctl_spec_ctrl_set(arg2, arg3); + break; default: error = -EINVAL; break; From 1a529899c65aff1f4f2f4875e876457d0c2341c5 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Sat, 14 Jul 2018 02:35:36 -0700 Subject: [PATCH 063/519] x86/process: Optimize TIF checks in __switch_to_xtra() commit af8b3cd3934ec60f4c2a420d19a9d416554f140b upstream Help the compiler to avoid reevaluating the thread flags for each checked bit by reordering the bit checks and providing an explicit xor for evaluation. With default defconfigs for each arch, x86_64: arch/x86/kernel/process.o text data bss dec hex 3056 8577 16 11649 2d81 Before 3024 8577 16 11617 2d61 After i386: arch/x86/kernel/process.o text data bss dec hex 2957 8673 8 11638 2d76 Before 2925 8673 8 11606 2d56 After Originally-by: Thomas Gleixner Signed-off-by: Kyle Huey Cc: Peter Zijlstra Cc: Andy Lutomirski Link: http://lkml.kernel.org/r/20170214081104.9244-2-khuey@kylehuey.com Signed-off-by: Thomas Gleixner [dwmw2: backported to make TIF_RDS handling simpler. No deferred TR reload.] Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/process.c | 66 ++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 7c5c5dc90ffa..cc0f28878d09 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -188,48 +188,56 @@ int set_tsc_mode(unsigned int val) return 0; } -void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - struct tss_struct *tss) +static inline void switch_to_bitmap(struct tss_struct *tss, + struct thread_struct *prev, + struct thread_struct *next, + unsigned long tifp, unsigned long tifn) { - struct thread_struct *prev, *next; - - prev = &prev_p->thread; - next = &next_p->thread; - - if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^ - test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl &= ~DEBUGCTLMSR_BTF; - if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) - debugctl |= DEBUGCTLMSR_BTF; - - update_debugctlmsr(debugctl); - } - - if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ - test_tsk_thread_flag(next_p, TIF_NOTSC)) { - /* prev and next are different */ - if (test_tsk_thread_flag(next_p, TIF_NOTSC)) - hard_disable_TSC(); - else - hard_enable_TSC(); - } - - if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { + if (tifn & _TIF_IO_BITMAP) { /* * Copy the relevant range of the IO bitmap. * Normally this is 128 bytes or less: */ memcpy(tss->io_bitmap, next->io_bitmap_ptr, max(prev->io_bitmap_max, next->io_bitmap_max)); - } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { + } else if (tifp & _TIF_IO_BITMAP) { /* * Clear any possible leftover bits: */ memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); } +} + +void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, + struct tss_struct *tss) +{ + struct thread_struct *prev, *next; + unsigned long tifp, tifn; + + prev = &prev_p->thread; + next = &next_p->thread; + + tifn = READ_ONCE(task_thread_info(next_p)->flags); + tifp = READ_ONCE(task_thread_info(prev_p)->flags); + switch_to_bitmap(tss, prev, next, tifp, tifn); + propagate_user_return_notify(prev_p, next_p); + + if ((tifp ^ tifn) & _TIF_BLOCKSTEP) { + unsigned long debugctl = get_debugctlmsr(); + + debugctl &= ~DEBUGCTLMSR_BTF; + if (tifn & _TIF_BLOCKSTEP) + debugctl |= DEBUGCTLMSR_BTF; + update_debugctlmsr(debugctl); + } + + if ((tifp ^ tifn) & _TIF_NOTSC) { + if (tifn & _TIF_NOTSC) + hard_disable_TSC(); + else + hard_enable_TSC(); + } } /* From 5c5e95c4e50fbec4f101e057520a762662d6e7d7 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Sat, 14 Jul 2018 02:35:44 -0700 Subject: [PATCH 064/519] x86/process: Correct and optimize TIF_BLOCKSTEP switch commit b9894a2f5bd18b1691cb6872c9afe32b148d0132 upstream The debug control MSR is "highly magical" as the blockstep bit can be cleared by hardware under not well documented circumstances. So a task switch relying on the bit set by the previous task (according to the previous tasks thread flags) can trip over this and not update the flag for the next task. To fix this its required to handle DEBUGCTLMSR_BTF when either the previous or the next or both tasks have the TIF_BLOCKSTEP flag set. While at it avoid branching within the TIF_BLOCKSTEP case and evaluating boot_cpu_data twice in kernels without CONFIG_X86_DEBUGCTLMSR. x86_64: arch/x86/kernel/process.o text data bss dec hex 3024 8577 16 11617 2d61 Before 3008 8577 16 11601 2d51 After i386: No change [ tglx: Made the shift value explicit, use a local variable to make the code readable and massaged changelog] Originally-by: Thomas Gleixner Signed-off-by: Kyle Huey Cc: Peter Zijlstra Cc: Andy Lutomirski Link: http://lkml.kernel.org/r/20170214081104.9244-3-khuey@kylehuey.com Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 1 + arch/x86/kernel/process.c | 12 +++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index a29edb723431..71a2c84013b3 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -150,6 +150,7 @@ /* DEBUGCTLMSR bits (others vary by model): */ #define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ +#define DEBUGCTLMSR_BTF_SHIFT 1 #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ #define DEBUGCTLMSR_TR (1UL << 6) #define DEBUGCTLMSR_BTS (1UL << 7) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index cc0f28878d09..166aef38ec98 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -223,13 +223,15 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, propagate_user_return_notify(prev_p, next_p); - if ((tifp ^ tifn) & _TIF_BLOCKSTEP) { - unsigned long debugctl = get_debugctlmsr(); + if ((tifp & _TIF_BLOCKSTEP || tifn & _TIF_BLOCKSTEP) && + arch_has_block_step()) { + unsigned long debugctl, msk; + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); debugctl &= ~DEBUGCTLMSR_BTF; - if (tifn & _TIF_BLOCKSTEP) - debugctl |= DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); + msk = tifn & _TIF_BLOCKSTEP; + debugctl |= (msk >> TIF_BLOCKSTEP) << DEBUGCTLMSR_BTF_SHIFT; + wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); } if ((tifp ^ tifn) & _TIF_NOTSC) { From a1cb23a5e2ea4ed75d3ac37c6a0739c5435406ff Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 14 Jul 2018 02:35:52 -0700 Subject: [PATCH 065/519] x86/process: Optimize TIF_NOTSC switch commit 5a920155e388ec22a22e0532fb695b9215c9b34d upstream Provide and use a toggle helper instead of doing it with a branch. x86_64: arch/x86/kernel/process.o text data bss dec hex 3008 8577 16 11601 2d51 Before 2976 8577 16 11569 2d31 After i386: arch/x86/kernel/process.o text data bss dec hex 2925 8673 8 11606 2d56 Before 2893 8673 8 11574 2d36 After Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Andy Lutomirski Link: http://lkml.kernel.org/r/20170214081104.9244-4-khuey@kylehuey.com Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/tlbflush.h | 10 ++++++++++ arch/x86/kernel/process.c | 22 ++++------------------ 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 8ce07db77299..72cfe3e53af1 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -111,6 +111,16 @@ static inline void cr4_clear_bits(unsigned long mask) } } +static inline void cr4_toggle_bits(unsigned long mask) +{ + unsigned long cr4; + + cr4 = this_cpu_read(cpu_tlbstate.cr4); + cr4 ^= mask; + this_cpu_write(cpu_tlbstate.cr4, cr4); + __write_cr4(cr4); +} + /* Read the CR4 shadow. */ static inline unsigned long cr4_read_shadow(void) { diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 166aef38ec98..d112963902fe 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -130,11 +130,6 @@ void flush_thread(void) fpu__clear(&tsk->thread.fpu); } -static void hard_disable_TSC(void) -{ - cr4_set_bits(X86_CR4_TSD); -} - void disable_TSC(void) { preempt_disable(); @@ -143,15 +138,10 @@ void disable_TSC(void) * Must flip the CPU state synchronously with * TIF_NOTSC in the current running context. */ - hard_disable_TSC(); + cr4_set_bits(X86_CR4_TSD); preempt_enable(); } -static void hard_enable_TSC(void) -{ - cr4_clear_bits(X86_CR4_TSD); -} - static void enable_TSC(void) { preempt_disable(); @@ -160,7 +150,7 @@ static void enable_TSC(void) * Must flip the CPU state synchronously with * TIF_NOTSC in the current running context. */ - hard_enable_TSC(); + cr4_clear_bits(X86_CR4_TSD); preempt_enable(); } @@ -234,12 +224,8 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); } - if ((tifp ^ tifn) & _TIF_NOTSC) { - if (tifn & _TIF_NOTSC) - hard_disable_TSC(); - else - hard_enable_TSC(); - } + if ((tifp ^ tifn) & _TIF_NOTSC) + cr4_toggle_bits(X86_CR4_TSD); } /* From b04a020d0745a7ba18800e86ea678676aeb21278 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 14 Jul 2018 02:36:00 -0700 Subject: [PATCH 066/519] x86/process: Allow runtime control of Speculative Store Bypass commit 885f82bfbc6fefb6664ea27965c3ab9ac4194b8c upstream The Speculative Store Bypass vulnerability can be mitigated with the Reduced Data Speculation (RDS) feature. To allow finer grained control of this eventually expensive mitigation a per task mitigation control is required. Add a new TIF_RDS flag and put it into the group of TIF flags which are evaluated for mismatch in switch_to(). If these bits differ in the previous and the next task, then the slow path function __switch_to_xtra() is invoked. Implement the TIF_RDS dependent mitigation control in the slow path. If the prctl for controlling Speculative Store Bypass is disabled or no task uses the prctl then there is no overhead in the switch_to() fast path. Update the KVM related speculation control functions to take TID_RDS into account as well. Based on a patch from Tim Chen. Completely rewritten. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 3 ++- arch/x86/include/asm/spec-ctrl.h | 17 +++++++++++++++++ arch/x86/include/asm/thread_info.h | 6 ++++-- arch/x86/kernel/cpu/bugs.c | 26 +++++++++++++++++++++----- arch/x86/kernel/process.c | 22 ++++++++++++++++++++++ 5 files changed, 66 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 71a2c84013b3..883cf0d6b1f9 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -35,7 +35,8 @@ #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ #define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ #define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ -#define SPEC_CTRL_RDS (1 << 2) /* Reduced Data Speculation */ +#define SPEC_CTRL_RDS_SHIFT 2 /* Reduced Data Speculation bit */ +#define SPEC_CTRL_RDS (1 << SPEC_CTRL_RDS_SHIFT) /* Reduced Data Speculation */ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 3ad64420a06e..45ef00ad5105 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -2,6 +2,7 @@ #ifndef _ASM_X86_SPECCTRL_H_ #define _ASM_X86_SPECCTRL_H_ +#include #include /* @@ -18,4 +19,20 @@ extern void x86_spec_ctrl_restore_host(u64); extern u64 x86_amd_ls_cfg_base; extern u64 x86_amd_ls_cfg_rds_mask; +/* The Intel SPEC CTRL MSR base value cache */ +extern u64 x86_spec_ctrl_base; + +static inline u64 rds_tif_to_spec_ctrl(u64 tifn) +{ + BUILD_BUG_ON(TIF_RDS < SPEC_CTRL_RDS_SHIFT); + return (tifn & _TIF_RDS) >> (TIF_RDS - SPEC_CTRL_RDS_SHIFT); +} + +static inline u64 rds_tif_to_amd_ls_cfg(u64 tifn) +{ + return (tifn & _TIF_RDS) ? x86_amd_ls_cfg_rds_mask : 0ULL; +} + +extern void speculative_store_bypass_update(void); + #endif diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 18c9aaa8c043..36a49b4ba4b5 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -92,6 +92,7 @@ struct thread_info { #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ +#define TIF_RDS 5 /* Reduced data speculation */ #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ @@ -114,8 +115,9 @@ struct thread_info { #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) -#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) +#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) +#define _TIF_RDS (1 << TIF_RDS) #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) @@ -147,7 +149,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) + (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_RDS) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 47a3cc08a889..0f8303ec0532 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -32,7 +32,7 @@ static void __init ssb_select_mitigation(void); * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any * writes to SPEC_CTRL contain whatever reserved bits have been set. */ -static u64 x86_spec_ctrl_base; +u64 x86_spec_ctrl_base; /* * The vendor and possibly platform specific bits which can be modified in @@ -139,25 +139,41 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_set); u64 x86_spec_ctrl_get_default(void) { - return x86_spec_ctrl_base; + u64 msrval = x86_spec_ctrl_base; + + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + msrval |= rds_tif_to_spec_ctrl(current_thread_info()->flags); + return msrval; } EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) { + u64 host = x86_spec_ctrl_base; + if (!boot_cpu_has(X86_FEATURE_IBRS)) return; - if (x86_spec_ctrl_base != guest_spec_ctrl) + + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + host |= rds_tif_to_spec_ctrl(current_thread_info()->flags); + + if (host != guest_spec_ctrl) wrmsrl(MSR_IA32_SPEC_CTRL, guest_spec_ctrl); } EXPORT_SYMBOL_GPL(x86_spec_ctrl_set_guest); void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) { + u64 host = x86_spec_ctrl_base; + if (!boot_cpu_has(X86_FEATURE_IBRS)) return; - if (x86_spec_ctrl_base != guest_spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + host |= rds_tif_to_spec_ctrl(current_thread_info()->flags); + + if (host != guest_spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, host); } EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index d112963902fe..9689e92e72dd 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -31,6 +31,7 @@ #include #include #include +#include /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, @@ -198,6 +199,24 @@ static inline void switch_to_bitmap(struct tss_struct *tss, } } +static __always_inline void __speculative_store_bypass_update(unsigned long tifn) +{ + u64 msr; + + if (static_cpu_has(X86_FEATURE_AMD_RDS)) { + msr = x86_amd_ls_cfg_base | rds_tif_to_amd_ls_cfg(tifn); + wrmsrl(MSR_AMD64_LS_CFG, msr); + } else { + msr = x86_spec_ctrl_base | rds_tif_to_spec_ctrl(tifn); + wrmsrl(MSR_IA32_SPEC_CTRL, msr); + } +} + +void speculative_store_bypass_update(void) +{ + __speculative_store_bypass_update(current_thread_info()->flags); +} + void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, struct tss_struct *tss) { @@ -226,6 +245,9 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, if ((tifp ^ tifn) & _TIF_NOTSC) cr4_toggle_bits(X86_CR4_TSD); + + if ((tifp ^ tifn) & _TIF_RDS) + __speculative_store_bypass_update(tifn); } /* From 2cb00ce1273d48dafce848f4e0ea353eb5839475 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 14 Jul 2018 02:36:09 -0700 Subject: [PATCH 067/519] x86/speculation: Add prctl for Speculative Store Bypass mitigation commit a73ec77ee17ec556fe7f165d00314cb7c047b1ac upstream Add prctl based control for Speculative Store Bypass mitigation and make it the default mitigation for Intel and AMD. Andi Kleen provided the following rationale (slightly redacted): There are multiple levels of impact of Speculative Store Bypass: 1) JITed sandbox. It cannot invoke system calls, but can do PRIME+PROBE and may have call interfaces to other code 2) Native code process. No protection inside the process at this level. 3) Kernel. 4) Between processes. The prctl tries to protect against case (1) doing attacks. If the untrusted code can do random system calls then control is already lost in a much worse way. So there needs to be system call protection in some way (using a JIT not allowing them or seccomp). Or rather if the process can subvert its environment somehow to do the prctl it can already execute arbitrary code, which is much worse than SSB. To put it differently, the point of the prctl is to not allow JITed code to read data it shouldn't read from its JITed sandbox. If it already has escaped its sandbox then it can already read everything it wants in its address space, and do much worse. The ability to control Speculative Store Bypass allows to enable the protection selectively without affecting overall system performance. Based on an initial patch from Tim Chen. Completely rewritten. Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 6 +- arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 83 ++++++++++++++++++++++++---- 3 files changed, 79 insertions(+), 11 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index dc138b8d9ecb..80202debbdbe 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3651,7 +3651,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. off - Unconditionally enable Speculative Store Bypass auto - Kernel detects whether the CPU model contains an implementation of Speculative Store Bypass and - picks the most appropriate mitigation + picks the most appropriate mitigation. + prctl - Control Speculative Store Bypass per thread + via prctl. Speculative Store Bypass is enabled + for a process by default. The state of the control + is inherited on fork. Not specifying this option is equivalent to spec_store_bypass_disable=auto. diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 47c454c29535..155d955ab801 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -187,6 +187,7 @@ extern u64 x86_spec_ctrl_get_default(void); enum ssb_mitigation { SPEC_STORE_BYPASS_NONE, SPEC_STORE_BYPASS_DISABLE, + SPEC_STORE_BYPASS_PRCTL, }; extern char __indirect_thunk_start[]; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 0f8303ec0532..bcfccd3d6542 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include @@ -411,20 +413,23 @@ enum ssb_mitigation_cmd { SPEC_STORE_BYPASS_CMD_NONE, SPEC_STORE_BYPASS_CMD_AUTO, SPEC_STORE_BYPASS_CMD_ON, + SPEC_STORE_BYPASS_CMD_PRCTL, }; static const char *ssb_strings[] = { [SPEC_STORE_BYPASS_NONE] = "Vulnerable", - [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled" + [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled", + [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl" }; static const struct { const char *option; enum ssb_mitigation_cmd cmd; } ssb_mitigation_options[] = { - { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ - { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ - { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ + { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ + { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ + { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ + { "prctl", SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */ }; static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void) @@ -474,14 +479,15 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) switch (cmd) { case SPEC_STORE_BYPASS_CMD_AUTO: - /* - * AMD platforms by default don't need SSB mitigation. - */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) - break; + /* Choose prctl as the default mode */ + mode = SPEC_STORE_BYPASS_PRCTL; + break; case SPEC_STORE_BYPASS_CMD_ON: mode = SPEC_STORE_BYPASS_DISABLE; break; + case SPEC_STORE_BYPASS_CMD_PRCTL: + mode = SPEC_STORE_BYPASS_PRCTL; + break; case SPEC_STORE_BYPASS_CMD_NONE: break; } @@ -492,7 +498,7 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) * - X86_FEATURE_RDS - CPU is able to turn off speculative store bypass * - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation */ - if (mode != SPEC_STORE_BYPASS_NONE) { + if (mode == SPEC_STORE_BYPASS_DISABLE) { setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE); /* * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses @@ -523,6 +529,63 @@ static void ssb_select_mitigation() #undef pr_fmt +static int ssb_prctl_set(unsigned long ctrl) +{ + bool rds = !!test_tsk_thread_flag(current, TIF_RDS); + + if (ssb_mode != SPEC_STORE_BYPASS_PRCTL) + return -ENXIO; + + if (ctrl == PR_SPEC_ENABLE) + clear_tsk_thread_flag(current, TIF_RDS); + else + set_tsk_thread_flag(current, TIF_RDS); + + if (rds != !!test_tsk_thread_flag(current, TIF_RDS)) + speculative_store_bypass_update(); + + return 0; +} + +static int ssb_prctl_get(void) +{ + switch (ssb_mode) { + case SPEC_STORE_BYPASS_DISABLE: + return PR_SPEC_DISABLE; + case SPEC_STORE_BYPASS_PRCTL: + if (test_tsk_thread_flag(current, TIF_RDS)) + return PR_SPEC_PRCTL | PR_SPEC_DISABLE; + return PR_SPEC_PRCTL | PR_SPEC_ENABLE; + default: + if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) + return PR_SPEC_ENABLE; + return PR_SPEC_NOT_AFFECTED; + } +} + +int arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl) +{ + if (ctrl != PR_SPEC_ENABLE && ctrl != PR_SPEC_DISABLE) + return -ERANGE; + + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssb_prctl_set(ctrl); + default: + return -ENODEV; + } +} + +int arch_prctl_spec_ctrl_get(unsigned long which) +{ + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssb_prctl_get(); + default: + return -ENODEV; + } +} + void x86_spec_ctrl_setup_ap(void) { if (boot_cpu_has(X86_FEATURE_IBRS)) From b6f4a6285d7979b45d629e65c880279930b98ef1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 14 Jul 2018 02:36:17 -0700 Subject: [PATCH 068/519] nospec: Allow getting/setting on non-current task commit 7bbf1373e228840bb0295a2ca26d548ef37f448e upstream Adjust arch_prctl_get/set_spec_ctrl() to operate on tasks other than current. This is needed both for /proc/$pid/status queries and for seccomp (since thread-syncing can trigger seccomp in non-current threads). Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 27 ++++++++++++++++----------- include/linux/nospec.h | 7 +++++-- kernel/sys.c | 9 +++++---- 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index bcfccd3d6542..64b54a4c30f5 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -529,31 +529,35 @@ static void ssb_select_mitigation() #undef pr_fmt -static int ssb_prctl_set(unsigned long ctrl) +static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) { - bool rds = !!test_tsk_thread_flag(current, TIF_RDS); + bool rds = !!test_tsk_thread_flag(task, TIF_RDS); if (ssb_mode != SPEC_STORE_BYPASS_PRCTL) return -ENXIO; if (ctrl == PR_SPEC_ENABLE) - clear_tsk_thread_flag(current, TIF_RDS); + clear_tsk_thread_flag(task, TIF_RDS); else - set_tsk_thread_flag(current, TIF_RDS); + set_tsk_thread_flag(task, TIF_RDS); - if (rds != !!test_tsk_thread_flag(current, TIF_RDS)) + /* + * If being set on non-current task, delay setting the CPU + * mitigation until it is next scheduled. + */ + if (task == current && rds != !!test_tsk_thread_flag(task, TIF_RDS)) speculative_store_bypass_update(); return 0; } -static int ssb_prctl_get(void) +static int ssb_prctl_get(struct task_struct *task) { switch (ssb_mode) { case SPEC_STORE_BYPASS_DISABLE: return PR_SPEC_DISABLE; case SPEC_STORE_BYPASS_PRCTL: - if (test_tsk_thread_flag(current, TIF_RDS)) + if (test_tsk_thread_flag(task, TIF_RDS)) return PR_SPEC_PRCTL | PR_SPEC_DISABLE; return PR_SPEC_PRCTL | PR_SPEC_ENABLE; default: @@ -563,24 +567,25 @@ static int ssb_prctl_get(void) } } -int arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl) +int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, + unsigned long ctrl) { if (ctrl != PR_SPEC_ENABLE && ctrl != PR_SPEC_DISABLE) return -ERANGE; switch (which) { case PR_SPEC_STORE_BYPASS: - return ssb_prctl_set(ctrl); + return ssb_prctl_set(task, ctrl); default: return -ENODEV; } } -int arch_prctl_spec_ctrl_get(unsigned long which) +int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) { switch (which) { case PR_SPEC_STORE_BYPASS: - return ssb_prctl_get(); + return ssb_prctl_get(task); default: return -ENODEV; } diff --git a/include/linux/nospec.h b/include/linux/nospec.h index 700bb8a4e4ea..a908c954484d 100644 --- a/include/linux/nospec.h +++ b/include/linux/nospec.h @@ -7,6 +7,8 @@ #define _LINUX_NOSPEC_H #include +struct task_struct; + /** * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise * @index: array element index @@ -57,7 +59,8 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, }) /* Speculation control prctl */ -int arch_prctl_spec_ctrl_get(unsigned long which); -int arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl); +int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which); +int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, + unsigned long ctrl); #endif /* _LINUX_NOSPEC_H */ diff --git a/kernel/sys.c b/kernel/sys.c index d80c33f9aff7..f718742e55e6 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2075,12 +2075,13 @@ static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) } #endif -int __weak arch_prctl_spec_ctrl_get(unsigned long which) +int __weak arch_prctl_spec_ctrl_get(struct task_struct *t, unsigned long which) { return -EINVAL; } -int __weak arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl) +int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which, + unsigned long ctrl) { return -EINVAL; } @@ -2282,12 +2283,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_GET_SPECULATION_CTRL: if (arg3 || arg4 || arg5) return -EINVAL; - error = arch_prctl_spec_ctrl_get(arg2); + error = arch_prctl_spec_ctrl_get(me, arg2); break; case PR_SET_SPECULATION_CTRL: if (arg4 || arg5) return -EINVAL; - error = arch_prctl_spec_ctrl_set(arg2, arg3); + error = arch_prctl_spec_ctrl_set(me, arg2, arg3); break; default: error = -EINVAL; From 484964fa3e5a0d8467891aab8368dab34e8eb13c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 14 Jul 2018 02:36:25 -0700 Subject: [PATCH 069/519] proc: Provide details on speculation flaw mitigations commit fae1fa0fc6cca8beee3ab8ed71d54f9a78fa3f64 upstream As done with seccomp and no_new_privs, also show speculation flaw mitigation state in /proc/$pid/status. Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- fs/proc/array.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/fs/proc/array.c b/fs/proc/array.c index b6c00ce0e29e..bb48358b99a3 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -79,6 +79,7 @@ #include #include #include +#include #include #include #include @@ -332,6 +333,28 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p) #ifdef CONFIG_SECCOMP seq_printf(m, "Seccomp:\t%d\n", p->seccomp.mode); #endif + seq_printf(m, "\nSpeculation Store Bypass:\t"); + switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) { + case -EINVAL: + seq_printf(m, "unknown"); + break; + case PR_SPEC_NOT_AFFECTED: + seq_printf(m, "not vulnerable"); + break; + case PR_SPEC_PRCTL | PR_SPEC_DISABLE: + seq_printf(m, "thread mitigated"); + break; + case PR_SPEC_PRCTL | PR_SPEC_ENABLE: + seq_printf(m, "thread vulnerable"); + break; + case PR_SPEC_DISABLE: + seq_printf(m, "globally mitigated"); + break; + default: + seq_printf(m, "vulnerable"); + break; + } + seq_putc(m, '\n'); } static inline void task_context_switch_counts(struct seq_file *m, From 0b1174054e0f4afd999c56ddecbbfb18f598f099 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 14 Jul 2018 02:36:33 -0700 Subject: [PATCH 070/519] seccomp: Enable speculation flaw mitigations commit 5c3070890d06ff82eecb808d02d2ca39169533ef upstream When speculation flaw mitigations are opt-in (via prctl), using seccomp will automatically opt-in to these protections, since using seccomp indicates at least some level of sandboxing is desired. Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- kernel/seccomp.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index efd384f3f852..bfb1ee845ba6 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include #include #include @@ -214,6 +216,19 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode) return true; } +/* + * If a given speculation mitigation is opt-in (prctl()-controlled), + * select it, by disabling speculation (enabling mitigation). + */ +static inline void spec_mitigate(struct task_struct *task, + unsigned long which) +{ + int state = arch_prctl_spec_ctrl_get(task, which); + + if (state > 0 && (state & PR_SPEC_PRCTL)) + arch_prctl_spec_ctrl_set(task, which, PR_SPEC_DISABLE); +} + static inline void seccomp_assign_mode(struct task_struct *task, unsigned long seccomp_mode) { @@ -225,6 +240,8 @@ static inline void seccomp_assign_mode(struct task_struct *task, * filter) is set. */ smp_mb__before_atomic(); + /* Assume seccomp processes want speculation flaw mitigation. */ + spec_mitigate(task, PR_SPEC_STORE_BYPASS); set_tsk_thread_flag(task, TIF_SECCOMP); } From 3f9cb20f9126db1edb1fad78a0e94ff8e9ae94e2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 14 Jul 2018 02:36:41 -0700 Subject: [PATCH 071/519] prctl: Add force disable speculation commit 356e4bfff2c5489e016fdb925adbf12a1e3950ee upstream For certain use cases it is desired to enforce mitigations so they cannot be undone afterwards. That's important for loader stubs which want to prevent a child from disabling the mitigation again. Will also be used for seccomp(). The extra state preserving of the prctl state for SSB is a preparatory step for EBPF dymanic speculation control. Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- Documentation/spec_ctrl.txt | 34 +++++++++++++++++++++------------- arch/x86/kernel/cpu/bugs.c | 35 +++++++++++++++++++++++++---------- fs/proc/array.c | 3 +++ include/linux/sched.h | 9 +++++++++ include/uapi/linux/prctl.h | 1 + 5 files changed, 59 insertions(+), 23 deletions(-) diff --git a/Documentation/spec_ctrl.txt b/Documentation/spec_ctrl.txt index ddbebcd01208..1b3690d30943 100644 --- a/Documentation/spec_ctrl.txt +++ b/Documentation/spec_ctrl.txt @@ -25,19 +25,21 @@ PR_GET_SPECULATION_CTRL ----------------------- PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature -which is selected with arg2 of prctl(2). The return value uses bits 0-2 with +which is selected with arg2 of prctl(2). The return value uses bits 0-3 with the following meaning: -==== ================ =================================================== -Bit Define Description -==== ================ =================================================== -0 PR_SPEC_PRCTL Mitigation can be controlled per task by - PR_SET_SPECULATION_CTRL -1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is - disabled -2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is - enabled -==== ================ =================================================== +==== ===================== =================================================== +Bit Define Description +==== ===================== =================================================== +0 PR_SPEC_PRCTL Mitigation can be controlled per task by + PR_SET_SPECULATION_CTRL +1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is + disabled +2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is + enabled +3 PR_SPEC_FORCE_DISABLE Same as PR_SPEC_DISABLE, but cannot be undone. A + subsequent prctl(..., PR_SPEC_ENABLE) will fail. +==== ===================== =================================================== If all bits are 0 the CPU is not affected by the speculation misfeature. @@ -47,9 +49,11 @@ misfeature will fail. PR_SET_SPECULATION_CTRL ----------------------- + PR_SET_SPECULATION_CTRL allows to control the speculation misfeature, which is selected by arg2 of :manpage:`prctl(2)` per task. arg3 is used to hand -in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE. +in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE or +PR_SPEC_FORCE_DISABLE. Common error codes ------------------ @@ -70,10 +74,13 @@ Value Meaning 0 Success ERANGE arg3 is incorrect, i.e. it's neither PR_SPEC_ENABLE nor - PR_SPEC_DISABLE + PR_SPEC_DISABLE nor PR_SPEC_FORCE_DISABLE ENXIO Control of the selected speculation misfeature is not possible. See PR_GET_SPECULATION_CTRL. + +EPERM Speculation was disabled with PR_SPEC_FORCE_DISABLE and caller + tried to enable it again. ======= ================================================================= Speculation misfeature controls @@ -84,3 +91,4 @@ Speculation misfeature controls * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, 0, 0, 0); * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0); * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 64b54a4c30f5..d6897caa8f47 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -531,21 +531,37 @@ static void ssb_select_mitigation() static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) { - bool rds = !!test_tsk_thread_flag(task, TIF_RDS); + bool update; if (ssb_mode != SPEC_STORE_BYPASS_PRCTL) return -ENXIO; - if (ctrl == PR_SPEC_ENABLE) - clear_tsk_thread_flag(task, TIF_RDS); - else - set_tsk_thread_flag(task, TIF_RDS); + switch (ctrl) { + case PR_SPEC_ENABLE: + /* If speculation is force disabled, enable is not allowed */ + if (task_spec_ssb_force_disable(task)) + return -EPERM; + task_clear_spec_ssb_disable(task); + update = test_and_clear_tsk_thread_flag(task, TIF_RDS); + break; + case PR_SPEC_DISABLE: + task_set_spec_ssb_disable(task); + update = !test_and_set_tsk_thread_flag(task, TIF_RDS); + break; + case PR_SPEC_FORCE_DISABLE: + task_set_spec_ssb_disable(task); + task_set_spec_ssb_force_disable(task); + update = !test_and_set_tsk_thread_flag(task, TIF_RDS); + break; + default: + return -ERANGE; + } /* * If being set on non-current task, delay setting the CPU * mitigation until it is next scheduled. */ - if (task == current && rds != !!test_tsk_thread_flag(task, TIF_RDS)) + if (task == current && update) speculative_store_bypass_update(); return 0; @@ -557,7 +573,9 @@ static int ssb_prctl_get(struct task_struct *task) case SPEC_STORE_BYPASS_DISABLE: return PR_SPEC_DISABLE; case SPEC_STORE_BYPASS_PRCTL: - if (test_tsk_thread_flag(task, TIF_RDS)) + if (task_spec_ssb_force_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; + if (task_spec_ssb_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_DISABLE; return PR_SPEC_PRCTL | PR_SPEC_ENABLE; default: @@ -570,9 +588,6 @@ static int ssb_prctl_get(struct task_struct *task) int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, unsigned long ctrl) { - if (ctrl != PR_SPEC_ENABLE && ctrl != PR_SPEC_DISABLE) - return -ERANGE; - switch (which) { case PR_SPEC_STORE_BYPASS: return ssb_prctl_set(task, ctrl); diff --git a/fs/proc/array.c b/fs/proc/array.c index bb48358b99a3..31414787c971 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -341,6 +341,9 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p) case PR_SPEC_NOT_AFFECTED: seq_printf(m, "not vulnerable"); break; + case PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE: + seq_printf(m, "thread force mitigated"); + break; case PR_SPEC_PRCTL | PR_SPEC_DISABLE: seq_printf(m, "thread mitigated"); break; diff --git a/include/linux/sched.h b/include/linux/sched.h index 90bea398e5e0..725498cc5d30 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2167,6 +2167,8 @@ static inline void memalloc_noio_restore(unsigned int flags) #define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ #define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ #define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ +#define PFA_SPEC_SSB_DISABLE 4 /* Speculative Store Bypass disabled */ +#define PFA_SPEC_SSB_FORCE_DISABLE 5 /* Speculative Store Bypass force disabled*/ #define TASK_PFA_TEST(name, func) \ @@ -2190,6 +2192,13 @@ TASK_PFA_TEST(SPREAD_SLAB, spread_slab) TASK_PFA_SET(SPREAD_SLAB, spread_slab) TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) +TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable) +TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable) +TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable) + +TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) +TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) + /* * task->jobctl flags */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 3b316be71c56..64776b72e1eb 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -207,5 +207,6 @@ struct prctl_mm_map { # define PR_SPEC_PRCTL (1UL << 0) # define PR_SPEC_ENABLE (1UL << 1) # define PR_SPEC_DISABLE (1UL << 2) +# define PR_SPEC_FORCE_DISABLE (1UL << 3) #endif /* _LINUX_PRCTL_H */ From a08c3f484c34df1e3bec3c47818d570483bf67fa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 14 Jul 2018 02:36:49 -0700 Subject: [PATCH 072/519] seccomp: Use PR_SPEC_FORCE_DISABLE commit b849a812f7eb92e96d1c8239b06581b2cfd8b275 upstream Use PR_SPEC_FORCE_DISABLE in seccomp() because seccomp does not allow to widen restrictions. Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- kernel/seccomp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index bfb1ee845ba6..f33539f31bc0 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -226,7 +226,7 @@ static inline void spec_mitigate(struct task_struct *task, int state = arch_prctl_spec_ctrl_get(task, which); if (state > 0 && (state & PR_SPEC_PRCTL)) - arch_prctl_spec_ctrl_set(task, which, PR_SPEC_DISABLE); + arch_prctl_spec_ctrl_set(task, which, PR_SPEC_FORCE_DISABLE); } static inline void seccomp_assign_mode(struct task_struct *task, From c463c0f037f2d83aea54415ed7c61deb0b90333b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 14 Jul 2018 02:36:57 -0700 Subject: [PATCH 073/519] seccomp: Add filter flag to opt-out of SSB mitigation commit 00a02d0c502a06d15e07b857f8ff921e3e402675 upstream If a seccomp user is not interested in Speculative Store Bypass mitigation by default, it can set the new SECCOMP_FILTER_FLAG_SPEC_ALLOW flag when adding filters. Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- include/linux/seccomp.h | 3 +- include/uapi/linux/seccomp.h | 4 +- kernel/seccomp.c | 19 +++-- tools/testing/selftests/seccomp/seccomp_bpf.c | 78 ++++++++++++++++++- 4 files changed, 93 insertions(+), 11 deletions(-) diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 2296e6b2f690..5a53d34bba26 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -3,7 +3,8 @@ #include -#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC) +#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \ + SECCOMP_FILTER_FLAG_SPEC_ALLOW) #ifdef CONFIG_SECCOMP diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index 0f238a43ff1e..e4acb615792b 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -15,7 +15,9 @@ #define SECCOMP_SET_MODE_FILTER 1 /* Valid flags for SECCOMP_SET_MODE_FILTER */ -#define SECCOMP_FILTER_FLAG_TSYNC 1 +#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) +/* In v4.14+ SECCOMP_FILTER_FLAG_LOG is (1UL << 1) */ +#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) /* * All BPF programs must return a 32-bit value. diff --git a/kernel/seccomp.c b/kernel/seccomp.c index f33539f31bc0..4bb8a5a5d68a 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -230,7 +230,8 @@ static inline void spec_mitigate(struct task_struct *task, } static inline void seccomp_assign_mode(struct task_struct *task, - unsigned long seccomp_mode) + unsigned long seccomp_mode, + unsigned long flags) { assert_spin_locked(&task->sighand->siglock); @@ -240,8 +241,9 @@ static inline void seccomp_assign_mode(struct task_struct *task, * filter) is set. */ smp_mb__before_atomic(); - /* Assume seccomp processes want speculation flaw mitigation. */ - spec_mitigate(task, PR_SPEC_STORE_BYPASS); + /* Assume default seccomp processes want spec flaw mitigation. */ + if ((flags & SECCOMP_FILTER_FLAG_SPEC_ALLOW) == 0) + spec_mitigate(task, PR_SPEC_STORE_BYPASS); set_tsk_thread_flag(task, TIF_SECCOMP); } @@ -309,7 +311,7 @@ static inline pid_t seccomp_can_sync_threads(void) * without dropping the locks. * */ -static inline void seccomp_sync_threads(void) +static inline void seccomp_sync_threads(unsigned long flags) { struct task_struct *thread, *caller; @@ -350,7 +352,8 @@ static inline void seccomp_sync_threads(void) * allow one thread to transition the other. */ if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) - seccomp_assign_mode(thread, SECCOMP_MODE_FILTER); + seccomp_assign_mode(thread, SECCOMP_MODE_FILTER, + flags); } } @@ -469,7 +472,7 @@ static long seccomp_attach_filter(unsigned int flags, /* Now that the new filter is in place, synchronize to all threads. */ if (flags & SECCOMP_FILTER_FLAG_TSYNC) - seccomp_sync_threads(); + seccomp_sync_threads(flags); return 0; } @@ -764,7 +767,7 @@ static long seccomp_set_mode_strict(void) #ifdef TIF_NOTSC disable_TSC(); #endif - seccomp_assign_mode(current, seccomp_mode); + seccomp_assign_mode(current, seccomp_mode, 0); ret = 0; out: @@ -822,7 +825,7 @@ static long seccomp_set_mode_filter(unsigned int flags, /* Do not free the successfully attached filter. */ prepared = NULL; - seccomp_assign_mode(current, seccomp_mode); + seccomp_assign_mode(current, seccomp_mode, flags); out: spin_unlock_irq(¤t->sighand->siglock); if (flags & SECCOMP_FILTER_FLAG_TSYNC) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 29487e0437ad..b3f345433ec7 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1477,7 +1477,11 @@ TEST_F(TRACE_syscall, syscall_dropped) #endif #ifndef SECCOMP_FILTER_FLAG_TSYNC -#define SECCOMP_FILTER_FLAG_TSYNC 1 +#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) +#endif + +#ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW +#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) #endif #ifndef seccomp @@ -1576,6 +1580,78 @@ TEST(seccomp_syscall_mode_lock) } } +/* + * Test detection of known and unknown filter flags. Userspace needs to be able + * to check if a filter flag is supported by the current kernel and a good way + * of doing that is by attempting to enter filter mode, with the flag bit in + * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates + * that the flag is valid and EINVAL indicates that the flag is invalid. + */ +TEST(detect_seccomp_filter_flags) +{ + unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC, + SECCOMP_FILTER_FLAG_SPEC_ALLOW }; + unsigned int flag, all_flags; + int i; + long ret; + + /* Test detection of known-good filter flags */ + for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { + int bits = 0; + + flag = flags[i]; + /* Make sure the flag is a single bit! */ + while (flag) { + if (flag & 0x1) + bits ++; + flag >>= 1; + } + ASSERT_EQ(1, bits); + flag = flags[i]; + + ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } + EXPECT_EQ(-1, ret); + EXPECT_EQ(EFAULT, errno) { + TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!", + flag); + } + + all_flags |= flag; + } + + /* Test detection of all known-good filter flags */ + ret = seccomp(SECCOMP_SET_MODE_FILTER, all_flags, NULL); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EFAULT, errno) { + TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!", + all_flags); + } + + /* Test detection of an unknown filter flag */ + flag = -1; + ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!", + flag); + } + + /* + * Test detection of an unknown filter flag that may simply need to be + * added to this test + */ + flag = flags[ARRAY_SIZE(flags) - 1] << 1; + ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?", + flag); + } +} + TEST(TSYNC_first) { struct sock_filter filter[] = { From 9237a1b0828962191107e702cf56c88db9f9d455 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 14 Jul 2018 02:37:05 -0700 Subject: [PATCH 074/519] seccomp: Move speculation migitation control to arch code commit 8bf37d8c067bb7eb8e7c381bdadf9bd89182b6bc upstream The migitation control is simpler to implement in architecture code as it avoids the extra function call to check the mode. Aside of that having an explicit seccomp enabled mode in the architecture mitigations would require even more workarounds. Move it into architecture code and provide a weak function in the seccomp code. Remove the 'which' argument as this allows the architecture to decide which mitigations are relevant for seccomp. Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 29 ++++++++++++++++++----------- include/linux/nospec.h | 2 ++ kernel/seccomp.c | 15 ++------------- 3 files changed, 22 insertions(+), 24 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d6897caa8f47..b005ef721dc7 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -567,6 +567,24 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) return 0; } +int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, + unsigned long ctrl) +{ + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssb_prctl_set(task, ctrl); + default: + return -ENODEV; + } +} + +#ifdef CONFIG_SECCOMP +void arch_seccomp_spec_mitigate(struct task_struct *task) +{ + ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); +} +#endif + static int ssb_prctl_get(struct task_struct *task) { switch (ssb_mode) { @@ -585,17 +603,6 @@ static int ssb_prctl_get(struct task_struct *task) } } -int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, - unsigned long ctrl) -{ - switch (which) { - case PR_SPEC_STORE_BYPASS: - return ssb_prctl_set(task, ctrl); - default: - return -ENODEV; - } -} - int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) { switch (which) { diff --git a/include/linux/nospec.h b/include/linux/nospec.h index a908c954484d..0c5ef54fd416 100644 --- a/include/linux/nospec.h +++ b/include/linux/nospec.h @@ -62,5 +62,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which); int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, unsigned long ctrl); +/* Speculation control for seccomp enforced mitigation */ +void arch_seccomp_spec_mitigate(struct task_struct *task); #endif /* _LINUX_NOSPEC_H */ diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 4bb8a5a5d68a..9a9203b15cde 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -216,18 +216,7 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode) return true; } -/* - * If a given speculation mitigation is opt-in (prctl()-controlled), - * select it, by disabling speculation (enabling mitigation). - */ -static inline void spec_mitigate(struct task_struct *task, - unsigned long which) -{ - int state = arch_prctl_spec_ctrl_get(task, which); - - if (state > 0 && (state & PR_SPEC_PRCTL)) - arch_prctl_spec_ctrl_set(task, which, PR_SPEC_FORCE_DISABLE); -} +void __weak arch_seccomp_spec_mitigate(struct task_struct *task) { } static inline void seccomp_assign_mode(struct task_struct *task, unsigned long seccomp_mode, @@ -243,7 +232,7 @@ static inline void seccomp_assign_mode(struct task_struct *task, smp_mb__before_atomic(); /* Assume default seccomp processes want spec flaw mitigation. */ if ((flags & SECCOMP_FILTER_FLAG_SPEC_ALLOW) == 0) - spec_mitigate(task, PR_SPEC_STORE_BYPASS); + arch_seccomp_spec_mitigate(task); set_tsk_thread_flag(task, TIF_SECCOMP); } From afc6bf9131efc36d4ae8a003e8597119a2190661 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 14 Jul 2018 02:37:13 -0700 Subject: [PATCH 075/519] x86/speculation: Make "seccomp" the default mode for Speculative Store Bypass commit f21b53b20c754021935ea43364dbf53778eeba32 upstream Unless explicitly opted out of, anything running under seccomp will have SSB mitigations enabled. Choosing the "prctl" mode will disable this. [ tglx: Adjusted it to the new arch_seccomp_spec_mitigate() mechanism ] Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 26 ++++++++++++++-------- arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 32 ++++++++++++++++++++-------- 3 files changed, 41 insertions(+), 18 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 80202debbdbe..3fd53e193b7f 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3647,19 +3647,27 @@ bytes respectively. Such letter suffixes can also be entirely omitted. This parameter controls whether the Speculative Store Bypass optimization is used. - on - Unconditionally disable Speculative Store Bypass - off - Unconditionally enable Speculative Store Bypass - auto - Kernel detects whether the CPU model contains an - implementation of Speculative Store Bypass and - picks the most appropriate mitigation. - prctl - Control Speculative Store Bypass per thread - via prctl. Speculative Store Bypass is enabled - for a process by default. The state of the control - is inherited on fork. + on - Unconditionally disable Speculative Store Bypass + off - Unconditionally enable Speculative Store Bypass + auto - Kernel detects whether the CPU model contains an + implementation of Speculative Store Bypass and + picks the most appropriate mitigation. If the + CPU is not vulnerable, "off" is selected. If the + CPU is vulnerable the default mitigation is + architecture and Kconfig dependent. See below. + prctl - Control Speculative Store Bypass per thread + via prctl. Speculative Store Bypass is enabled + for a process by default. The state of the control + is inherited on fork. + seccomp - Same as "prctl" above, but all seccomp threads + will disable SSB unless they explicitly opt out. Not specifying this option is equivalent to spec_store_bypass_disable=auto. + Default mitigations: + X86: If CONFIG_SECCOMP=y "seccomp", otherwise "prctl" + spia_io_base= [HW,MTD] spia_fio_base= spia_pedr= diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 155d955ab801..930c15941157 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -188,6 +188,7 @@ enum ssb_mitigation { SPEC_STORE_BYPASS_NONE, SPEC_STORE_BYPASS_DISABLE, SPEC_STORE_BYPASS_PRCTL, + SPEC_STORE_BYPASS_SECCOMP, }; extern char __indirect_thunk_start[]; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b005ef721dc7..6fd3fcf680bf 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -414,22 +414,25 @@ enum ssb_mitigation_cmd { SPEC_STORE_BYPASS_CMD_AUTO, SPEC_STORE_BYPASS_CMD_ON, SPEC_STORE_BYPASS_CMD_PRCTL, + SPEC_STORE_BYPASS_CMD_SECCOMP, }; static const char *ssb_strings[] = { [SPEC_STORE_BYPASS_NONE] = "Vulnerable", [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled", - [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl" + [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl", + [SPEC_STORE_BYPASS_SECCOMP] = "Mitigation: Speculative Store Bypass disabled via prctl and seccomp", }; static const struct { const char *option; enum ssb_mitigation_cmd cmd; } ssb_mitigation_options[] = { - { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ - { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ - { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ - { "prctl", SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */ + { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ + { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ + { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ + { "prctl", SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */ + { "seccomp", SPEC_STORE_BYPASS_CMD_SECCOMP }, /* Disable Speculative Store Bypass via prctl and seccomp */ }; static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void) @@ -479,8 +482,15 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) switch (cmd) { case SPEC_STORE_BYPASS_CMD_AUTO: - /* Choose prctl as the default mode */ - mode = SPEC_STORE_BYPASS_PRCTL; + case SPEC_STORE_BYPASS_CMD_SECCOMP: + /* + * Choose prctl+seccomp as the default mode if seccomp is + * enabled. + */ + if (IS_ENABLED(CONFIG_SECCOMP)) + mode = SPEC_STORE_BYPASS_SECCOMP; + else + mode = SPEC_STORE_BYPASS_PRCTL; break; case SPEC_STORE_BYPASS_CMD_ON: mode = SPEC_STORE_BYPASS_DISABLE; @@ -528,12 +538,14 @@ static void ssb_select_mitigation() } #undef pr_fmt +#define pr_fmt(fmt) "Speculation prctl: " fmt static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) { bool update; - if (ssb_mode != SPEC_STORE_BYPASS_PRCTL) + if (ssb_mode != SPEC_STORE_BYPASS_PRCTL && + ssb_mode != SPEC_STORE_BYPASS_SECCOMP) return -ENXIO; switch (ctrl) { @@ -581,7 +593,8 @@ int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, #ifdef CONFIG_SECCOMP void arch_seccomp_spec_mitigate(struct task_struct *task) { - ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); + if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP) + ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); } #endif @@ -590,6 +603,7 @@ static int ssb_prctl_get(struct task_struct *task) switch (ssb_mode) { case SPEC_STORE_BYPASS_DISABLE: return PR_SPEC_DISABLE; + case SPEC_STORE_BYPASS_SECCOMP: case SPEC_STORE_BYPASS_PRCTL: if (task_spec_ssb_force_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; From 6e2119e4b8767a6c3a415875ad09596ada00755c Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Sat, 14 Jul 2018 02:37:21 -0700 Subject: [PATCH 076/519] x86/bugs: Rename _RDS to _SSBD commit 9f65fb29374ee37856dbad847b4e121aab72b510 upstream Intel collateral will reference the SSB mitigation bit in IA32_SPEC_CTL[2] as SSBD (Speculative Store Bypass Disable). Hence changing it. It is unclear yet what the MSR_IA32_ARCH_CAPABILITIES (0x10a) Bit(4) name is going to be. Following the rename it would be SSBD_NO but that rolls out to Speculative Store Bypass Disable No. Also fixed the missing space in X86_FEATURE_AMD_SSBD. [ tglx: Fixup x86_amd_rds_enable() and rds_tif_to_amd_ls_cfg() as well ] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman [ Srivatsa: Backported to 4.4.y, skipping the KVM changes in this patch. ] Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 4 ++-- arch/x86/include/asm/msr-index.h | 10 ++++----- arch/x86/include/asm/spec-ctrl.h | 12 +++++----- arch/x86/include/asm/thread_info.h | 6 ++--- arch/x86/kernel/cpu/amd.c | 14 ++++++------ arch/x86/kernel/cpu/bugs.c | 36 +++++++++++++++--------------- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/cpu/intel.c | 2 +- arch/x86/kernel/process.c | 8 +++---- 9 files changed, 47 insertions(+), 47 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b7cdd1c05132..97926be21a7f 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -204,7 +204,7 @@ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled*/ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ -#define X86_FEATURE_AMD_RDS (7*32+24) /* "" AMD RDS implementation */ +#define X86_FEATURE_AMD_SSBD (7*32+24) /* "" AMD SSBD implementation */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ @@ -299,7 +299,7 @@ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ -#define X86_FEATURE_RDS (18*32+31) /* Reduced Data Speculation */ +#define X86_FEATURE_SSBD (18*32+31) /* Speculative Store Bypass Disable */ /* * BUG word(s) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 883cf0d6b1f9..2ea2ff1a81e8 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -35,8 +35,8 @@ #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ #define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ #define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ -#define SPEC_CTRL_RDS_SHIFT 2 /* Reduced Data Speculation bit */ -#define SPEC_CTRL_RDS (1 << SPEC_CTRL_RDS_SHIFT) /* Reduced Data Speculation */ +#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ +#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ @@ -58,10 +58,10 @@ #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a #define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ #define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ -#define ARCH_CAP_RDS_NO (1 << 4) /* +#define ARCH_CAP_SSBD_NO (1 << 4) /* * Not susceptible to Speculative Store Bypass - * attack, so no Reduced Data Speculation control - * required. + * attack, so no Speculative Store Bypass + * control required. */ #define MSR_IA32_BBL_CR_CTL 0x00000119 diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 45ef00ad5105..dc21209790bf 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -17,20 +17,20 @@ extern void x86_spec_ctrl_restore_host(u64); /* AMD specific Speculative Store Bypass MSR data */ extern u64 x86_amd_ls_cfg_base; -extern u64 x86_amd_ls_cfg_rds_mask; +extern u64 x86_amd_ls_cfg_ssbd_mask; /* The Intel SPEC CTRL MSR base value cache */ extern u64 x86_spec_ctrl_base; -static inline u64 rds_tif_to_spec_ctrl(u64 tifn) +static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn) { - BUILD_BUG_ON(TIF_RDS < SPEC_CTRL_RDS_SHIFT); - return (tifn & _TIF_RDS) >> (TIF_RDS - SPEC_CTRL_RDS_SHIFT); + BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); + return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); } -static inline u64 rds_tif_to_amd_ls_cfg(u64 tifn) +static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) { - return (tifn & _TIF_RDS) ? x86_amd_ls_cfg_rds_mask : 0ULL; + return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; } extern void speculative_store_bypass_update(void); diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 36a49b4ba4b5..a96e88b243ef 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -92,7 +92,7 @@ struct thread_info { #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ -#define TIF_RDS 5 /* Reduced data speculation */ +#define TIF_SSBD 5 /* Reduced data speculation */ #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ @@ -117,7 +117,7 @@ struct thread_info { #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) -#define _TIF_RDS (1 << TIF_RDS) +#define _TIF_SSBD (1 << TIF_SSBD) #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) @@ -149,7 +149,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_RDS) + (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 14e984916f2f..bd0edb2a21fc 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -532,12 +532,12 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) } /* * Try to cache the base value so further operations can - * avoid RMW. If that faults, do not enable RDS. + * avoid RMW. If that faults, do not enable SSBD. */ if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) { - setup_force_cpu_cap(X86_FEATURE_RDS); - setup_force_cpu_cap(X86_FEATURE_AMD_RDS); - x86_amd_ls_cfg_rds_mask = 1ULL << bit; + setup_force_cpu_cap(X86_FEATURE_SSBD); + setup_force_cpu_cap(X86_FEATURE_AMD_SSBD); + x86_amd_ls_cfg_ssbd_mask = 1ULL << bit; } } } @@ -816,9 +816,9 @@ static void init_amd(struct cpuinfo_x86 *c) if (!cpu_has(c, X86_FEATURE_XENPV)) set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); - if (boot_cpu_has(X86_FEATURE_AMD_RDS)) { - set_cpu_cap(c, X86_FEATURE_RDS); - set_cpu_cap(c, X86_FEATURE_AMD_RDS); + if (boot_cpu_has(X86_FEATURE_AMD_SSBD)) { + set_cpu_cap(c, X86_FEATURE_SSBD); + set_cpu_cap(c, X86_FEATURE_AMD_SSBD); } } diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 6fd3fcf680bf..812e92a49216 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -44,10 +44,10 @@ static u64 x86_spec_ctrl_mask = ~SPEC_CTRL_IBRS; /* * AMD specific MSR info for Speculative Store Bypass control. - * x86_amd_ls_cfg_rds_mask is initialized in identify_boot_cpu(). + * x86_amd_ls_cfg_ssbd_mask is initialized in identify_boot_cpu(). */ u64 x86_amd_ls_cfg_base; -u64 x86_amd_ls_cfg_rds_mask; +u64 x86_amd_ls_cfg_ssbd_mask; void __init check_bugs(void) { @@ -144,7 +144,7 @@ u64 x86_spec_ctrl_get_default(void) u64 msrval = x86_spec_ctrl_base; if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - msrval |= rds_tif_to_spec_ctrl(current_thread_info()->flags); + msrval |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); return msrval; } EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); @@ -157,7 +157,7 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) return; if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - host |= rds_tif_to_spec_ctrl(current_thread_info()->flags); + host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); if (host != guest_spec_ctrl) wrmsrl(MSR_IA32_SPEC_CTRL, guest_spec_ctrl); @@ -172,18 +172,18 @@ void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) return; if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - host |= rds_tif_to_spec_ctrl(current_thread_info()->flags); + host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); if (host != guest_spec_ctrl) wrmsrl(MSR_IA32_SPEC_CTRL, host); } EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host); -static void x86_amd_rds_enable(void) +static void x86_amd_ssb_disable(void) { - u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_rds_mask; + u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask; - if (boot_cpu_has(X86_FEATURE_AMD_RDS)) + if (boot_cpu_has(X86_FEATURE_AMD_SSBD)) wrmsrl(MSR_AMD64_LS_CFG, msrval); } @@ -471,7 +471,7 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE; enum ssb_mitigation_cmd cmd; - if (!boot_cpu_has(X86_FEATURE_RDS)) + if (!boot_cpu_has(X86_FEATURE_SSBD)) return mode; cmd = ssb_parse_cmdline(); @@ -505,7 +505,7 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) /* * We have three CPU feature flags that are in play here: * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. - * - X86_FEATURE_RDS - CPU is able to turn off speculative store bypass + * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass * - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation */ if (mode == SPEC_STORE_BYPASS_DISABLE) { @@ -516,12 +516,12 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) */ switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_INTEL: - x86_spec_ctrl_base |= SPEC_CTRL_RDS; - x86_spec_ctrl_mask &= ~SPEC_CTRL_RDS; - x86_spec_ctrl_set(SPEC_CTRL_RDS); + x86_spec_ctrl_base |= SPEC_CTRL_SSBD; + x86_spec_ctrl_mask &= ~SPEC_CTRL_SSBD; + x86_spec_ctrl_set(SPEC_CTRL_SSBD); break; case X86_VENDOR_AMD: - x86_amd_rds_enable(); + x86_amd_ssb_disable(); break; } } @@ -554,16 +554,16 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) if (task_spec_ssb_force_disable(task)) return -EPERM; task_clear_spec_ssb_disable(task); - update = test_and_clear_tsk_thread_flag(task, TIF_RDS); + update = test_and_clear_tsk_thread_flag(task, TIF_SSBD); break; case PR_SPEC_DISABLE: task_set_spec_ssb_disable(task); - update = !test_and_set_tsk_thread_flag(task, TIF_RDS); + update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); break; case PR_SPEC_FORCE_DISABLE: task_set_spec_ssb_disable(task); task_set_spec_ssb_force_disable(task); - update = !test_and_set_tsk_thread_flag(task, TIF_RDS); + update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); break; default: return -ERANGE; @@ -633,7 +633,7 @@ void x86_spec_ctrl_setup_ap(void) x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask); if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) - x86_amd_rds_enable(); + x86_amd_ssb_disable(); } #ifdef CONFIG_SYSFS diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7405c8653f7a..6f3a5d74acc8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -867,7 +867,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); if (!x86_match_cpu(cpu_no_spec_store_bypass) && - !(ia32_cap & ARCH_CAP_RDS_NO)) + !(ia32_cap & ARCH_CAP_SSBD_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); if (x86_match_cpu(cpu_no_speculation)) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index ac25d1e5e8e8..a34e35731be4 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -119,7 +119,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) setup_clear_cpu_cap(X86_FEATURE_STIBP); setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); - setup_clear_cpu_cap(X86_FEATURE_RDS); + setup_clear_cpu_cap(X86_FEATURE_SSBD); } /* diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 9689e92e72dd..57d4ba250c6a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -203,11 +203,11 @@ static __always_inline void __speculative_store_bypass_update(unsigned long tifn { u64 msr; - if (static_cpu_has(X86_FEATURE_AMD_RDS)) { - msr = x86_amd_ls_cfg_base | rds_tif_to_amd_ls_cfg(tifn); + if (static_cpu_has(X86_FEATURE_AMD_SSBD)) { + msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn); wrmsrl(MSR_AMD64_LS_CFG, msr); } else { - msr = x86_spec_ctrl_base | rds_tif_to_spec_ctrl(tifn); + msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); wrmsrl(MSR_IA32_SPEC_CTRL, msr); } } @@ -246,7 +246,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, if ((tifp ^ tifn) & _TIF_NOTSC) cr4_toggle_bits(X86_CR4_TSD); - if ((tifp ^ tifn) & _TIF_RDS) + if ((tifp ^ tifn) & _TIF_SSBD) __speculative_store_bypass_update(tifn); } From 765897c6486de605eae3f94f77f2c800c9a2a254 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Sat, 14 Jul 2018 02:37:29 -0700 Subject: [PATCH 077/519] proc: Use underscores for SSBD in 'status' commit e96f46ee8587607a828f783daa6eb5b44d25004d upstream The style for the 'status' file is CamelCase or this. _. Fixes: fae1fa0fc ("proc: Provide details on speculation flaw mitigations") Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- fs/proc/array.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/proc/array.c b/fs/proc/array.c index 31414787c971..cb71cbae606d 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -333,7 +333,7 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p) #ifdef CONFIG_SECCOMP seq_printf(m, "Seccomp:\t%d\n", p->seccomp.mode); #endif - seq_printf(m, "\nSpeculation Store Bypass:\t"); + seq_printf(m, "\nSpeculation_Store_Bypass:\t"); switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) { case -EINVAL: seq_printf(m, "unknown"); From e5eea0486470acbe7aa20a0533543c47c942ec93 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 14 Jul 2018 02:37:37 -0700 Subject: [PATCH 078/519] Documentation/spec_ctrl: Do some minor cleanups commit dd0792699c4058e63c0715d9a7c2d40226fcdddc upstream Fix some typos, improve formulations, end sentences with a fullstop. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- Documentation/spec_ctrl.txt | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/Documentation/spec_ctrl.txt b/Documentation/spec_ctrl.txt index 1b3690d30943..32f3d55c54b7 100644 --- a/Documentation/spec_ctrl.txt +++ b/Documentation/spec_ctrl.txt @@ -2,13 +2,13 @@ Speculation Control =================== -Quite some CPUs have speculation related misfeatures which are in fact -vulnerabilites causing data leaks in various forms even accross privilege -domains. +Quite some CPUs have speculation-related misfeatures which are in +fact vulnerabilities causing data leaks in various forms even across +privilege domains. The kernel provides mitigation for such vulnerabilities in various -forms. Some of these mitigations are compile time configurable and some on -the kernel command line. +forms. Some of these mitigations are compile-time configurable and some +can be supplied on the kernel command line. There is also a class of mitigations which are very expensive, but they can be restricted to a certain set of processes or tasks in controlled @@ -32,18 +32,18 @@ the following meaning: Bit Define Description ==== ===================== =================================================== 0 PR_SPEC_PRCTL Mitigation can be controlled per task by - PR_SET_SPECULATION_CTRL + PR_SET_SPECULATION_CTRL. 1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is - disabled + disabled. 2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is - enabled + enabled. 3 PR_SPEC_FORCE_DISABLE Same as PR_SPEC_DISABLE, but cannot be undone. A subsequent prctl(..., PR_SPEC_ENABLE) will fail. ==== ===================== =================================================== If all bits are 0 the CPU is not affected by the speculation misfeature. -If PR_SPEC_PRCTL is set, then the per task control of the mitigation is +If PR_SPEC_PRCTL is set, then the per-task control of the mitigation is available. If not set, prctl(PR_SET_SPECULATION_CTRL) for the speculation misfeature will fail. @@ -61,9 +61,9 @@ Common error codes Value Meaning ======= ================================================================= EINVAL The prctl is not implemented by the architecture or unused - prctl(2) arguments are not 0 + prctl(2) arguments are not 0. -ENODEV arg2 is selecting a not supported speculation misfeature +ENODEV arg2 is selecting a not supported speculation misfeature. ======= ================================================================= PR_SET_SPECULATION_CTRL error codes @@ -74,7 +74,7 @@ Value Meaning 0 Success ERANGE arg3 is incorrect, i.e. it's neither PR_SPEC_ENABLE nor - PR_SPEC_DISABLE nor PR_SPEC_FORCE_DISABLE + PR_SPEC_DISABLE nor PR_SPEC_FORCE_DISABLE. ENXIO Control of the selected speculation misfeature is not possible. See PR_GET_SPECULATION_CTRL. From 631474e1cee0fbc0f346664aea5ee5b1c3600649 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Sat, 14 Jul 2018 02:37:45 -0700 Subject: [PATCH 079/519] x86/bugs: Fix __ssb_select_mitigation() return type commit d66d8ff3d21667b41eddbe86b35ab411e40d8c5f upstream __ssb_select_mitigation() returns one of the members of enum ssb_mitigation, not ssb_mitigation_cmd; fix the prototype to reflect that. Fixes: 24f7fc83b9204 ("x86/bugs: Provide boot parameters for the spec_store_bypass_disable mitigation") Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 812e92a49216..5b58b76254a2 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -466,7 +466,7 @@ static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void) return cmd; } -static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) +static enum ssb_mitigation __init __ssb_select_mitigation(void) { enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE; enum ssb_mitigation_cmd cmd; From 103b28d8a271c1d650eb5b09bd7a53d8915b51d6 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Sat, 14 Jul 2018 02:37:53 -0700 Subject: [PATCH 080/519] x86/bugs: Make cpu_show_common() static commit 7bb4d366cba992904bffa4820d24e70a3de93e76 upstream cpu_show_common() is not used outside of arch/x86/kernel/cpu/bugs.c, so make it static. Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 5b58b76254a2..512be6852395 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -638,7 +638,7 @@ void x86_spec_ctrl_setup_ap(void) #ifdef CONFIG_SYSFS -ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, +static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { if (!boot_cpu_has_bug(bug)) From 95bef2217ece77c345e627eba9cd2e85ada8eeb2 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Sat, 14 Jul 2018 02:38:01 -0700 Subject: [PATCH 081/519] x86/bugs: Fix the parameters alignment and missing void commit ffed645e3be0e32f8e9ab068d257aee8d0fe8eec upstream Fixes: 7bb4d366c ("x86/bugs: Make cpu_show_common() static") Fixes: 24f7fc83b ("x86/bugs: Provide boot parameters for the spec_store_bypass_disable mitigation") Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 512be6852395..84de0fc30ea9 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -529,7 +529,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) return mode; } -static void ssb_select_mitigation() +static void ssb_select_mitigation(void) { ssb_mode = __ssb_select_mitigation(); @@ -639,7 +639,7 @@ void x86_spec_ctrl_setup_ap(void) #ifdef CONFIG_SYSFS static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, - char *buf, unsigned int bug) + char *buf, unsigned int bug) { if (!boot_cpu_has_bug(bug)) return sprintf(buf, "Not affected\n"); From 714f18858ceda6f2b8335686f1f019560fe89283 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Sat, 14 Jul 2018 02:38:08 -0700 Subject: [PATCH 082/519] x86/cpu: Make alternative_msr_write work for 32-bit code commit 5f2b745f5e1304f438f9b2cd03ebc8120b6e0d3b upstream Cast val and (val >> 32) to (u32), so that they fit in a general-purpose register in both 32-bit and 64-bit code. [ tglx: Made it u32 instead of uintptr_t ] Fixes: c65732e4f721 ("x86/cpu: Restore CPUID_8000_0008_EBX reload") Signed-off-by: Jim Mattson Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Acked-by: Linus Torvalds Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 930c15941157..640c11b25913 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -219,8 +219,8 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) { asm volatile(ALTERNATIVE("", "wrmsr", %c[feature]) : : "c" (msr), - "a" (val), - "d" (val >> 32), + "a" ((u32)val), + "d" ((u32)(val >> 32)), [feature] "i" (feature) : "memory"); } From 4f4a2c70cf2ecd17ef3899c754fee30caa343286 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 14 Jul 2018 02:38:17 -0700 Subject: [PATCH 083/519] x86/speculation: Use synthetic bits for IBRS/IBPB/STIBP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e7c587da125291db39ddf1f49b18e5970adbac17 upstream Intel and AMD have different CPUID bits hence for those use synthetic bits which get set on the respective vendor's in init_speculation_control(). So that debacles like what the commit message of c65732e4f721 ("x86/cpu: Restore CPUID_8000_0008_EBX reload") talks about don't happen anymore. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Tested-by: Jörg Otte Cc: Linus Torvalds Cc: "Kirill A. Shutemov" Link: https://lkml.kernel.org/r/20180504161815.GG9257@pd.tnic Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman [ Srivatsa: Backported to 4.4.y, skipping the KVM changes in this patch. ] Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 12 ++++++++---- arch/x86/kernel/cpu/common.c | 14 ++++++++++---- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 97926be21a7f..9f64d10adeba 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -204,7 +204,10 @@ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled*/ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ -#define X86_FEATURE_AMD_SSBD (7*32+24) /* "" AMD SSBD implementation */ +#define X86_FEATURE_AMD_SSBD ( 7*32+24) /* "" AMD SSBD implementation */ +#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ +#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ +#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ @@ -256,9 +259,9 @@ /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ -#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ -#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ -#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ +#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ +#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ @@ -293,6 +296,7 @@ #define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */ #define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */ + /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6f3a5d74acc8..f2b579ff104c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -683,17 +683,23 @@ static void init_speculation_control(struct cpuinfo_x86 *c) * and they also have a different bit for STIBP support. Also, * a hypervisor might have set the individual AMD bits even on * Intel CPUs, for finer-grained selection of what's available. - * - * We use the AMD bits in 0x8000_0008 EBX as the generic hardware - * features, which are visible in /proc/cpuinfo and used by the - * kernel. So set those accordingly from the Intel bits. */ if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { set_cpu_cap(c, X86_FEATURE_IBRS); set_cpu_cap(c, X86_FEATURE_IBPB); } + if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) set_cpu_cap(c, X86_FEATURE_STIBP); + + if (cpu_has(c, X86_FEATURE_AMD_IBRS)) + set_cpu_cap(c, X86_FEATURE_IBRS); + + if (cpu_has(c, X86_FEATURE_AMD_IBPB)) + set_cpu_cap(c, X86_FEATURE_IBPB); + + if (cpu_has(c, X86_FEATURE_AMD_STIBP)) + set_cpu_cap(c, X86_FEATURE_STIBP); } void get_cpu_cap(struct cpuinfo_x86 *c) From e4bb3382cbe9173e7f6e3a13fd1cb39c3a72671f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 14 Jul 2018 02:38:25 -0700 Subject: [PATCH 084/519] x86/cpufeatures: Disentangle MSR_SPEC_CTRL enumeration from IBRS commit 7eb8956a7fec3c1f0abc2a5517dada99ccc8a961 upstream The availability of the SPEC_CTRL MSR is enumerated by a CPUID bit on Intel and implied by IBRS or STIBP support on AMD. That's just confusing and in case an AMD CPU has IBRS not supported because the underlying problem has been fixed but has another bit valid in the SPEC_CTRL MSR, the thing falls apart. Add a synthetic feature bit X86_FEATURE_MSR_SPEC_CTRL to denote the availability on both Intel and AMD. While at it replace the boot_cpu_has() checks with static_cpu_has() where possible. This prevents late microcode loading from exposing SPEC_CTRL, but late loading is already very limited as it does not reevaluate the mitigation options and other bits and pieces. Having static_cpu_has() is the simplest and least fragile solution. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 3 +++ arch/x86/kernel/cpu/bugs.c | 18 +++++++++++------- arch/x86/kernel/cpu/common.c | 9 +++++++-- arch/x86/kernel/cpu/intel.c | 1 + 4 files changed, 22 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 9f64d10adeba..dd04cd739553 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -198,6 +198,9 @@ #define X86_FEATURE_RETPOLINE ( 7*32+29) /* "" Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+30) /* "" AMD Retpoline mitigation for Spectre variant 2 */ + +#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ + /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 84de0fc30ea9..e23e2899d783 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -63,7 +63,7 @@ void __init check_bugs(void) * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD * init code as it is not enumerated and depends on the family. */ - if (boot_cpu_has(X86_FEATURE_IBRS)) + if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); /* Select the proper spectre mitigation before patching alternatives */ @@ -143,7 +143,7 @@ u64 x86_spec_ctrl_get_default(void) { u64 msrval = x86_spec_ctrl_base; - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + if (static_cpu_has(X86_FEATURE_SPEC_CTRL)) msrval |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); return msrval; } @@ -153,10 +153,12 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) { u64 host = x86_spec_ctrl_base; - if (!boot_cpu_has(X86_FEATURE_IBRS)) + /* Is MSR_SPEC_CTRL implemented ? */ + if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) return; - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + /* Intel controls SSB in MSR_SPEC_CTRL */ + if (static_cpu_has(X86_FEATURE_SPEC_CTRL)) host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); if (host != guest_spec_ctrl) @@ -168,10 +170,12 @@ void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) { u64 host = x86_spec_ctrl_base; - if (!boot_cpu_has(X86_FEATURE_IBRS)) + /* Is MSR_SPEC_CTRL implemented ? */ + if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) return; - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + /* Intel controls SSB in MSR_SPEC_CTRL */ + if (static_cpu_has(X86_FEATURE_SPEC_CTRL)) host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); if (host != guest_spec_ctrl) @@ -629,7 +633,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) void x86_spec_ctrl_setup_ap(void) { - if (boot_cpu_has(X86_FEATURE_IBRS)) + if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask); if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f2b579ff104c..1f70ff15eb62 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -687,19 +687,24 @@ static void init_speculation_control(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { set_cpu_cap(c, X86_FEATURE_IBRS); set_cpu_cap(c, X86_FEATURE_IBPB); + set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); } if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) set_cpu_cap(c, X86_FEATURE_STIBP); - if (cpu_has(c, X86_FEATURE_AMD_IBRS)) + if (cpu_has(c, X86_FEATURE_AMD_IBRS)) { set_cpu_cap(c, X86_FEATURE_IBRS); + set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); + } if (cpu_has(c, X86_FEATURE_AMD_IBPB)) set_cpu_cap(c, X86_FEATURE_IBPB); - if (cpu_has(c, X86_FEATURE_AMD_STIBP)) + if (cpu_has(c, X86_FEATURE_AMD_STIBP)) { set_cpu_cap(c, X86_FEATURE_STIBP); + set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); + } } void get_cpu_cap(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index a34e35731be4..9a84e75cbec5 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -118,6 +118,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) setup_clear_cpu_cap(X86_FEATURE_IBPB); setup_clear_cpu_cap(X86_FEATURE_STIBP); setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); + setup_clear_cpu_cap(X86_FEATURE_MSR_SPEC_CTRL); setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); setup_clear_cpu_cap(X86_FEATURE_SSBD); } From 11a0b92f6d57853550f927fe91190b745a5ab945 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 14 Jul 2018 02:38:33 -0700 Subject: [PATCH 085/519] x86/cpufeatures: Disentangle SSBD enumeration commit 52817587e706686fcdb27f14c1b000c92f266c96 upstream The SSBD enumeration is similarly to the other bits magically shared between Intel and AMD though the mechanisms are different. Make X86_FEATURE_SSBD synthetic and set it depending on the vendor specific features or family dependent setup. Change the Intel bit to X86_FEATURE_SPEC_CTRL_SSBD to denote that SSBD is controlled via MSR_SPEC_CTRL and fix up the usage sites. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 6 ++++-- arch/x86/kernel/cpu/amd.c | 7 +------ arch/x86/kernel/cpu/bugs.c | 10 +++++----- arch/x86/kernel/cpu/common.c | 3 +++ arch/x86/kernel/cpu/intel.c | 1 + arch/x86/kernel/process.c | 2 +- 6 files changed, 15 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index dd04cd739553..7dd1bba49172 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -200,6 +200,7 @@ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+30) /* "" AMD Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ +#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ @@ -207,7 +208,8 @@ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled*/ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ -#define X86_FEATURE_AMD_SSBD ( 7*32+24) /* "" AMD SSBD implementation */ +#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation */ + #define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ #define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ #define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ @@ -306,7 +308,7 @@ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ -#define X86_FEATURE_SSBD (18*32+31) /* Speculative Store Bypass Disable */ +#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ /* * BUG word(s) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index bd0edb2a21fc..a97fd6730f12 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -535,8 +535,8 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) * avoid RMW. If that faults, do not enable SSBD. */ if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) { + setup_force_cpu_cap(X86_FEATURE_LS_CFG_SSBD); setup_force_cpu_cap(X86_FEATURE_SSBD); - setup_force_cpu_cap(X86_FEATURE_AMD_SSBD); x86_amd_ls_cfg_ssbd_mask = 1ULL << bit; } } @@ -815,11 +815,6 @@ static void init_amd(struct cpuinfo_x86 *c) /* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */ if (!cpu_has(c, X86_FEATURE_XENPV)) set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); - - if (boot_cpu_has(X86_FEATURE_AMD_SSBD)) { - set_cpu_cap(c, X86_FEATURE_SSBD); - set_cpu_cap(c, X86_FEATURE_AMD_SSBD); - } } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e23e2899d783..9be729267794 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -157,8 +157,8 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) return; - /* Intel controls SSB in MSR_SPEC_CTRL */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL)) + /* SSBD controlled in MSR_SPEC_CTRL */ + if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); if (host != guest_spec_ctrl) @@ -174,8 +174,8 @@ void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) return; - /* Intel controls SSB in MSR_SPEC_CTRL */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL)) + /* SSBD controlled in MSR_SPEC_CTRL */ + if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); if (host != guest_spec_ctrl) @@ -187,7 +187,7 @@ static void x86_amd_ssb_disable(void) { u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask; - if (boot_cpu_has(X86_FEATURE_AMD_SSBD)) + if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) wrmsrl(MSR_AMD64_LS_CFG, msrval); } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 1f70ff15eb62..10977236c6eb 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -693,6 +693,9 @@ static void init_speculation_control(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) set_cpu_cap(c, X86_FEATURE_STIBP); + if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD)) + set_cpu_cap(c, X86_FEATURE_SSBD); + if (cpu_has(c, X86_FEATURE_AMD_IBRS)) { set_cpu_cap(c, X86_FEATURE_IBRS); set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 9a84e75cbec5..4dce22d3cb06 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -121,6 +121,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) setup_clear_cpu_cap(X86_FEATURE_MSR_SPEC_CTRL); setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); setup_clear_cpu_cap(X86_FEATURE_SSBD); + setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL_SSBD); } /* diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 57d4ba250c6a..8cefbd2bb348 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -203,7 +203,7 @@ static __always_inline void __speculative_store_bypass_update(unsigned long tifn { u64 msr; - if (static_cpu_has(X86_FEATURE_AMD_SSBD)) { + if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn); wrmsrl(MSR_AMD64_LS_CFG, msr); } else { From 4ba461d426490b6ed7e8298c4d3b7a13aa5d2686 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 14 Jul 2018 02:38:41 -0700 Subject: [PATCH 086/519] x86/cpu/AMD: Fix erratum 1076 (CPB bit) commit f7f3dc00f61261cdc9ccd8b886f21bc4dffd6fd9 upstream CPUID Fn8000_0007_EDX[CPB] is wrongly 0 on models up to B1. But they do support CPB (AMD's Core Performance Boosting cpufreq CPU feature), so fix that. Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sherry Hurwitz Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170907170821.16021-1-bp@alien8.de Signed-off-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/amd.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a97fd6730f12..87f4a0d73c5e 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -713,6 +713,16 @@ static void init_amd_bd(struct cpuinfo_x86 *c) } } +static void init_amd_zn(struct cpuinfo_x86 *c) +{ + /* + * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects + * all up to and including B1. + */ + if (c->x86_model <= 1 && c->x86_mask <= 1) + set_cpu_cap(c, X86_FEATURE_CPB); +} + static void init_amd(struct cpuinfo_x86 *c) { u32 dummy; @@ -743,6 +753,7 @@ static void init_amd(struct cpuinfo_x86 *c) case 0x10: init_amd_gh(c); break; case 0x12: init_amd_ln(c); break; case 0x15: init_amd_bd(c); break; + case 0x17: init_amd_zn(c); break; } /* Enable workaround for FXSAVE leak */ From 21757fc8bafd50ce477fff2bcec6faec27c5548d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 14 Jul 2018 02:38:50 -0700 Subject: [PATCH 087/519] x86/cpufeatures: Add FEATURE_ZEN commit d1035d971829dcf80e8686ccde26f94b0a069472 upstream Add a ZEN feature bit so family-dependent static_cpu_has() optimizations can be built for ZEN. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 2 ++ arch/x86/kernel/cpu/amd.c | 1 + 2 files changed, 3 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 7dd1bba49172..8ae91326cc47 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -213,6 +213,8 @@ #define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ #define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ #define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */ + /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 87f4a0d73c5e..9f6151884249 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -715,6 +715,7 @@ static void init_amd_bd(struct cpuinfo_x86 *c) static void init_amd_zn(struct cpuinfo_x86 *c) { + set_cpu_cap(c, X86_FEATURE_ZEN); /* * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects * all up to and including B1. From ea8efcd4415f70766acb4bb9553fad855eea48e1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 14 Jul 2018 02:38:58 -0700 Subject: [PATCH 088/519] x86/speculation: Handle HT correctly on AMD commit 1f50ddb4f4189243c05926b842dc1a0332195f31 upstream The AMD64_LS_CFG MSR is a per core MSR on Family 17H CPUs. That means when hyperthreading is enabled the SSBD bit toggle needs to take both cores into account. Otherwise the following situation can happen: CPU0 CPU1 disable SSB disable SSB enable SSB <- Enables it for the Core, i.e. for CPU0 as well So after the SSB enable on CPU1 the task on CPU0 runs with SSB enabled again. On Intel the SSBD control is per core as well, but the synchronization logic is implemented behind the per thread SPEC_CTRL MSR. It works like this: CORE_SPEC_CTRL = THREAD0_SPEC_CTRL | THREAD1_SPEC_CTRL i.e. if one of the threads enables a mitigation then this affects both and the mitigation is only disabled in the core when both threads disabled it. Add the necessary synchronization logic for AMD family 17H. Unfortunately that requires a spinlock to serialize the access to the MSR, but the locks are only shared between siblings. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/spec-ctrl.h | 6 ++ arch/x86/kernel/process.c | 131 ++++++++++++++++++++++++++++--- arch/x86/kernel/smpboot.c | 5 ++ 3 files changed, 133 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index dc21209790bf..0cb49c4564b0 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -33,6 +33,12 @@ static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; } +#ifdef CONFIG_SMP +extern void speculative_store_bypass_ht_init(void); +#else +static inline void speculative_store_bypass_ht_init(void) { } +#endif + extern void speculative_store_bypass_update(void); #endif diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 8cefbd2bb348..0842869db312 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -199,22 +199,135 @@ static inline void switch_to_bitmap(struct tss_struct *tss, } } +#ifdef CONFIG_SMP + +struct ssb_state { + struct ssb_state *shared_state; + raw_spinlock_t lock; + unsigned int disable_state; + unsigned long local_state; +}; + +#define LSTATE_SSB 0 + +static DEFINE_PER_CPU(struct ssb_state, ssb_state); + +void speculative_store_bypass_ht_init(void) +{ + struct ssb_state *st = this_cpu_ptr(&ssb_state); + unsigned int this_cpu = smp_processor_id(); + unsigned int cpu; + + st->local_state = 0; + + /* + * Shared state setup happens once on the first bringup + * of the CPU. It's not destroyed on CPU hotunplug. + */ + if (st->shared_state) + return; + + raw_spin_lock_init(&st->lock); + + /* + * Go over HT siblings and check whether one of them has set up the + * shared state pointer already. + */ + for_each_cpu(cpu, topology_sibling_cpumask(this_cpu)) { + if (cpu == this_cpu) + continue; + + if (!per_cpu(ssb_state, cpu).shared_state) + continue; + + /* Link it to the state of the sibling: */ + st->shared_state = per_cpu(ssb_state, cpu).shared_state; + return; + } + + /* + * First HT sibling to come up on the core. Link shared state of + * the first HT sibling to itself. The siblings on the same core + * which come up later will see the shared state pointer and link + * themself to the state of this CPU. + */ + st->shared_state = st; +} + +/* + * Logic is: First HT sibling enables SSBD for both siblings in the core + * and last sibling to disable it, disables it for the whole core. This how + * MSR_SPEC_CTRL works in "hardware": + * + * CORE_SPEC_CTRL = THREAD0_SPEC_CTRL | THREAD1_SPEC_CTRL + */ +static __always_inline void amd_set_core_ssb_state(unsigned long tifn) +{ + struct ssb_state *st = this_cpu_ptr(&ssb_state); + u64 msr = x86_amd_ls_cfg_base; + + if (!static_cpu_has(X86_FEATURE_ZEN)) { + msr |= ssbd_tif_to_amd_ls_cfg(tifn); + wrmsrl(MSR_AMD64_LS_CFG, msr); + return; + } + + if (tifn & _TIF_SSBD) { + /* + * Since this can race with prctl(), block reentry on the + * same CPU. + */ + if (__test_and_set_bit(LSTATE_SSB, &st->local_state)) + return; + + msr |= x86_amd_ls_cfg_ssbd_mask; + + raw_spin_lock(&st->shared_state->lock); + /* First sibling enables SSBD: */ + if (!st->shared_state->disable_state) + wrmsrl(MSR_AMD64_LS_CFG, msr); + st->shared_state->disable_state++; + raw_spin_unlock(&st->shared_state->lock); + } else { + if (!__test_and_clear_bit(LSTATE_SSB, &st->local_state)) + return; + + raw_spin_lock(&st->shared_state->lock); + st->shared_state->disable_state--; + if (!st->shared_state->disable_state) + wrmsrl(MSR_AMD64_LS_CFG, msr); + raw_spin_unlock(&st->shared_state->lock); + } +} +#else +static __always_inline void amd_set_core_ssb_state(unsigned long tifn) +{ + u64 msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn); + + wrmsrl(MSR_AMD64_LS_CFG, msr); +} +#endif + +static __always_inline void intel_set_ssb_state(unsigned long tifn) +{ + u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); + + wrmsrl(MSR_IA32_SPEC_CTRL, msr); +} + static __always_inline void __speculative_store_bypass_update(unsigned long tifn) { - u64 msr; - - if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { - msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn); - wrmsrl(MSR_AMD64_LS_CFG, msr); - } else { - msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); - wrmsrl(MSR_IA32_SPEC_CTRL, msr); - } + if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) + amd_set_core_ssb_state(tifn); + else + intel_set_ssb_state(tifn); } void speculative_store_bypass_update(void) { + preempt_disable(); __speculative_store_bypass_update(current_thread_info()->flags); + preempt_enable(); } void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 1f7aefc7b0b4..c017f1c71560 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -75,6 +75,7 @@ #include #include #include +#include /* Number of siblings per CPU package */ int smp_num_siblings = 1; @@ -217,6 +218,8 @@ static void notrace start_secondary(void *unused) */ check_tsc_sync_target(); + speculative_store_bypass_ht_init(); + /* * Lock vector_lock and initialize the vectors on this cpu * before setting the cpu online. We must set it online with @@ -1209,6 +1212,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) set_mtrr_aps_delayed_init(); smp_quirk_init_udelay(); + + speculative_store_bypass_ht_init(); } void arch_enable_nonboot_cpus_begin(void) From e13a6f0955bb5ee6daca1f08027d6561d0830daf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 14 Jul 2018 02:39:06 -0700 Subject: [PATCH 089/519] x86/bugs, KVM: Extend speculation control for VIRT_SPEC_CTRL commit ccbcd2674472a978b48c91c1fbfb66c0ff959f24 upstream AMD is proposing a VIRT_SPEC_CTRL MSR to handle the Speculative Store Bypass Disable via MSR_AMD64_LS_CFG so that guests do not have to care about the bit position of the SSBD bit and thus facilitate migration. Also, the sibling coordination on Family 17H CPUs can only be done on the host. Extend x86_spec_ctrl_set_guest() and x86_spec_ctrl_restore_host() with an extra argument for the VIRT_SPEC_CTRL MSR. Hand in 0 from VMX and in SVM add a new virt_spec_ctrl member to the CPU data structure which is going to be used in later patches for the actual implementation. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman [ Srivatsa: Backported to 4.4.y, skipping the KVM changes in this patch. ] Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/spec-ctrl.h | 9 ++++++--- arch/x86/kernel/cpu/bugs.c | 20 ++++++++++++++++++-- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 0cb49c4564b0..6e2874049afd 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -10,10 +10,13 @@ * the guest has, while on VMEXIT we restore the host view. This * would be easier if SPEC_CTRL were architecturally maskable or * shadowable for guests but this is not (currently) the case. - * Takes the guest view of SPEC_CTRL MSR as a parameter. + * Takes the guest view of SPEC_CTRL MSR as a parameter and also + * the guest's version of VIRT_SPEC_CTRL, if emulated. */ -extern void x86_spec_ctrl_set_guest(u64); -extern void x86_spec_ctrl_restore_host(u64); +extern void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, + u64 guest_virt_spec_ctrl); +extern void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, + u64 guest_virt_spec_ctrl); /* AMD specific Speculative Store Bypass MSR data */ extern u64 x86_amd_ls_cfg_base; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 9be729267794..a1c98fda0878 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -149,7 +149,15 @@ u64 x86_spec_ctrl_get_default(void) } EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); -void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) +/** + * x86_spec_ctrl_set_guest - Set speculation control registers for the guest + * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL + * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL + * (may get translated to MSR_AMD64_LS_CFG bits) + * + * Avoids writing to the MSR if the content/bits are the same + */ +void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) { u64 host = x86_spec_ctrl_base; @@ -166,7 +174,15 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) } EXPORT_SYMBOL_GPL(x86_spec_ctrl_set_guest); -void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) +/** + * x86_spec_ctrl_restore_host - Restore host speculation control registers + * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL + * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL + * (may get translated to MSR_AMD64_LS_CFG bits) + * + * Avoids writing to the MSR if the content/bits are the same + */ +void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) { u64 host = x86_spec_ctrl_base; From ecfe9bf30e4b7cd13f3b28f40a587a932b5cb457 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Sat, 14 Jul 2018 02:39:14 -0700 Subject: [PATCH 090/519] x86/speculation: Add virtualized speculative store bypass disable support commit 11fb0683493b2da112cd64c9dada221b52463bf7 upstream Some AMD processors only support a non-architectural means of enabling speculative store bypass disable (SSBD). To allow a simplified view of this to a guest, an architectural definition has been created through a new CPUID bit, 0x80000008_EBX[25], and a new MSR, 0xc001011f. With this, a hypervisor can virtualize the existence of this definition and provide an architectural method for using SSBD to a guest. Add the new CPUID feature, the new MSR and update the existing SSBD support to use this MSR when present. Signed-off-by: Tom Lendacky Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 2 ++ arch/x86/kernel/cpu/bugs.c | 4 +++- arch/x86/kernel/process.c | 13 ++++++++++++- 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 8ae91326cc47..f4b175db70f4 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -269,6 +269,7 @@ #define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ #define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ #define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 2ea2ff1a81e8..22f2dd50d2d9 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -328,6 +328,8 @@ #define MSR_AMD64_IBSOPDATA4 0xc001103d #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ +#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f + /* Fam 16h MSRs */ #define MSR_F16H_L2I_PERF_CTL 0xc0010230 #define MSR_F16H_L2I_PERF_CTR 0xc0010231 diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index a1c98fda0878..50ab206a09b3 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -203,7 +203,9 @@ static void x86_amd_ssb_disable(void) { u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask; - if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) + if (boot_cpu_has(X86_FEATURE_VIRT_SSBD)) + wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, SPEC_CTRL_SSBD); + else if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) wrmsrl(MSR_AMD64_LS_CFG, msrval); } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 0842869db312..eab9d0cfed70 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -308,6 +308,15 @@ static __always_inline void amd_set_core_ssb_state(unsigned long tifn) } #endif +static __always_inline void amd_set_ssb_virt_state(unsigned long tifn) +{ + /* + * SSBD has the same definition in SPEC_CTRL and VIRT_SPEC_CTRL, + * so ssbd_tif_to_spec_ctrl() just works. + */ + wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn)); +} + static __always_inline void intel_set_ssb_state(unsigned long tifn) { u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); @@ -317,7 +326,9 @@ static __always_inline void intel_set_ssb_state(unsigned long tifn) static __always_inline void __speculative_store_bypass_update(unsigned long tifn) { - if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) + if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) + amd_set_ssb_virt_state(tifn); + else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) amd_set_core_ssb_state(tifn); else intel_set_ssb_state(tifn); From 3d60492cea89c0a0fb06c73ee49cc14c55f527dd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 14 Jul 2018 02:39:22 -0700 Subject: [PATCH 091/519] x86/speculation: Rework speculative_store_bypass_update() commit 0270be3e34efb05a88bc4c422572ece038ef3608 upstream The upcoming support for the virtual SPEC_CTRL MSR on AMD needs to reuse speculative_store_bypass_update() to avoid code duplication. Add an argument for supplying a thread info (TIF) value and create a wrapper speculative_store_bypass_update_current() which is used at the existing call site. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/spec-ctrl.h | 7 ++++++- arch/x86/kernel/cpu/bugs.c | 2 +- arch/x86/kernel/process.c | 4 ++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 6e2874049afd..82b6c5a0d61e 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -42,6 +42,11 @@ extern void speculative_store_bypass_ht_init(void); static inline void speculative_store_bypass_ht_init(void) { } #endif -extern void speculative_store_bypass_update(void); +extern void speculative_store_bypass_update(unsigned long tif); + +static inline void speculative_store_bypass_update_current(void) +{ + speculative_store_bypass_update(current_thread_info()->flags); +} #endif diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 50ab206a09b3..1b29be9211af 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -596,7 +596,7 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) * mitigation until it is next scheduled. */ if (task == current && update) - speculative_store_bypass_update(); + speculative_store_bypass_update_current(); return 0; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index eab9d0cfed70..e18c8798c3a2 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -334,10 +334,10 @@ static __always_inline void __speculative_store_bypass_update(unsigned long tifn intel_set_ssb_state(tifn); } -void speculative_store_bypass_update(void) +void speculative_store_bypass_update(unsigned long tif) { preempt_disable(); - __speculative_store_bypass_update(current_thread_info()->flags); + __speculative_store_bypass_update(tif); preempt_enable(); } From d5aec90670c378b6d05e5f904b1a8c8cffb17eef Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 14 Jul 2018 02:39:30 -0700 Subject: [PATCH 092/519] x86/bugs: Unify x86_spec_ctrl_{set_guest, restore_host} commit cc69b34989210f067b2c51d5539b5f96ebcc3a01 upstream Function bodies are very similar and are going to grow more almost identical code. Add a bool arg to determine whether SPEC_CTRL is being set for the guest or restored to the host. No functional changes. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/spec-ctrl.h | 33 +++++++++++++++--- arch/x86/kernel/cpu/bugs.c | 58 ++++++++------------------------ 2 files changed, 43 insertions(+), 48 deletions(-) diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 82b6c5a0d61e..9cecbe5e57ee 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -13,10 +13,35 @@ * Takes the guest view of SPEC_CTRL MSR as a parameter and also * the guest's version of VIRT_SPEC_CTRL, if emulated. */ -extern void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, - u64 guest_virt_spec_ctrl); -extern void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, - u64 guest_virt_spec_ctrl); +extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool guest); + +/** + * x86_spec_ctrl_set_guest - Set speculation control registers for the guest + * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL + * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL + * (may get translated to MSR_AMD64_LS_CFG bits) + * + * Avoids writing to the MSR if the content/bits are the same + */ +static inline +void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) +{ + x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, true); +} + +/** + * x86_spec_ctrl_restore_host - Restore host speculation control registers + * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL + * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL + * (may get translated to MSR_AMD64_LS_CFG bits) + * + * Avoids writing to the MSR if the content/bits are the same + */ +static inline +void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) +{ + x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, false); +} /* AMD specific Speculative Store Bypass MSR data */ extern u64 x86_amd_ls_cfg_base; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 1b29be9211af..208d44c5cc50 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -149,55 +149,25 @@ u64 x86_spec_ctrl_get_default(void) } EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); -/** - * x86_spec_ctrl_set_guest - Set speculation control registers for the guest - * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL - * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL - * (may get translated to MSR_AMD64_LS_CFG bits) - * - * Avoids writing to the MSR if the content/bits are the same - */ -void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) +void +x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { - u64 host = x86_spec_ctrl_base; + struct thread_info *ti = current_thread_info(); + u64 msr, host = x86_spec_ctrl_base; /* Is MSR_SPEC_CTRL implemented ? */ - if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - return; + if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { + /* SSBD controlled in MSR_SPEC_CTRL */ + if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) + host |= ssbd_tif_to_spec_ctrl(ti->flags); - /* SSBD controlled in MSR_SPEC_CTRL */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) - host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); - - if (host != guest_spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, guest_spec_ctrl); + if (host != guest_spec_ctrl) { + msr = setguest ? guest_spec_ctrl : host; + wrmsrl(MSR_IA32_SPEC_CTRL, msr); + } + } } -EXPORT_SYMBOL_GPL(x86_spec_ctrl_set_guest); - -/** - * x86_spec_ctrl_restore_host - Restore host speculation control registers - * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL - * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL - * (may get translated to MSR_AMD64_LS_CFG bits) - * - * Avoids writing to the MSR if the content/bits are the same - */ -void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) -{ - u64 host = x86_spec_ctrl_base; - - /* Is MSR_SPEC_CTRL implemented ? */ - if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - return; - - /* SSBD controlled in MSR_SPEC_CTRL */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) - host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); - - if (host != guest_spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, host); -} -EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host); +EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl); static void x86_amd_ssb_disable(void) { From 9ed7ee52e4e06364f47d6a6e898610bae5f04e93 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 14 Jul 2018 02:39:38 -0700 Subject: [PATCH 093/519] x86/bugs: Expose x86_spec_ctrl_base directly commit fa8ac4988249c38476f6ad678a4848a736373403 upstream x86_spec_ctrl_base is the system wide default value for the SPEC_CTRL MSR. x86_spec_ctrl_get_default() returns x86_spec_ctrl_base and was intended to prevent modification to that variable. Though the variable is read only after init and globaly visible already. Remove the function and export the variable instead. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 16 +++++----------- arch/x86/include/asm/spec-ctrl.h | 3 --- arch/x86/kernel/cpu/bugs.c | 11 +---------- 3 files changed, 6 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 640c11b25913..2757c79754e1 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -172,16 +172,7 @@ enum spectre_v2_mitigation { SPECTRE_V2_IBRS, }; -/* - * The Intel specification for the SPEC_CTRL MSR requires that we - * preserve any already set reserved bits at boot time (e.g. for - * future additions that this kernel is not currently aware of). - * We then set any additional mitigation bits that we want - * ourselves and always use this as the base for SPEC_CTRL. - * We also use this when handling guest entry/exit as below. - */ extern void x86_spec_ctrl_set(u64); -extern u64 x86_spec_ctrl_get_default(void); /* The Speculative Store Bypass disable variants */ enum ssb_mitigation { @@ -232,6 +223,9 @@ static inline void indirect_branch_prediction_barrier(void) alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB); } +/* The Intel SPEC CTRL MSR base value cache */ +extern u64 x86_spec_ctrl_base; + /* * With retpoline, we must use IBRS to restrict branch prediction * before calling into firmware. @@ -240,7 +234,7 @@ static inline void indirect_branch_prediction_barrier(void) */ #define firmware_restrict_branch_speculation_start() \ do { \ - u64 val = x86_spec_ctrl_get_default() | SPEC_CTRL_IBRS; \ + u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \ \ preempt_disable(); \ alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ @@ -249,7 +243,7 @@ do { \ #define firmware_restrict_branch_speculation_end() \ do { \ - u64 val = x86_spec_ctrl_get_default(); \ + u64 val = x86_spec_ctrl_base; \ \ alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ X86_FEATURE_USE_IBRS_FW); \ diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 9cecbe5e57ee..763d49710329 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -47,9 +47,6 @@ void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) extern u64 x86_amd_ls_cfg_base; extern u64 x86_amd_ls_cfg_ssbd_mask; -/* The Intel SPEC CTRL MSR base value cache */ -extern u64 x86_spec_ctrl_base; - static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn) { BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 208d44c5cc50..5391df5826c9 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -35,6 +35,7 @@ static void __init ssb_select_mitigation(void); * writes to SPEC_CTRL contain whatever reserved bits have been set. */ u64 x86_spec_ctrl_base; +EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); /* * The vendor and possibly platform specific bits which can be modified in @@ -139,16 +140,6 @@ void x86_spec_ctrl_set(u64 val) } EXPORT_SYMBOL_GPL(x86_spec_ctrl_set); -u64 x86_spec_ctrl_get_default(void) -{ - u64 msrval = x86_spec_ctrl_base; - - if (static_cpu_has(X86_FEATURE_SPEC_CTRL)) - msrval |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); - return msrval; -} -EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); - void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { From 90cfa767bc12a9931e5e45ed275b069d5b35b52e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 14 Jul 2018 02:39:46 -0700 Subject: [PATCH 094/519] x86/bugs: Remove x86_spec_ctrl_set() commit 4b59bdb569453a60b752b274ca61f009e37f4dae upstream x86_spec_ctrl_set() is only used in bugs.c and the extra mask checks there provide no real value as both call sites can just write x86_spec_ctrl_base to MSR_SPEC_CTRL. x86_spec_ctrl_base is valid and does not need any extra masking or checking. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 2 -- arch/x86/kernel/cpu/bugs.c | 13 ++----------- 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 2757c79754e1..b4c74c24c890 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -172,8 +172,6 @@ enum spectre_v2_mitigation { SPECTRE_V2_IBRS, }; -extern void x86_spec_ctrl_set(u64); - /* The Speculative Store Bypass disable variants */ enum ssb_mitigation { SPEC_STORE_BYPASS_NONE, diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 5391df5826c9..05eed68a32f4 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -131,15 +131,6 @@ static const char *spectre_v2_strings[] = { static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; -void x86_spec_ctrl_set(u64 val) -{ - if (val & x86_spec_ctrl_mask) - WARN_ONCE(1, "SPEC_CTRL MSR value 0x%16llx is unknown.\n", val); - else - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base | val); -} -EXPORT_SYMBOL_GPL(x86_spec_ctrl_set); - void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { @@ -501,7 +492,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) case X86_VENDOR_INTEL: x86_spec_ctrl_base |= SPEC_CTRL_SSBD; x86_spec_ctrl_mask &= ~SPEC_CTRL_SSBD; - x86_spec_ctrl_set(SPEC_CTRL_SSBD); + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); break; case X86_VENDOR_AMD: x86_amd_ssb_disable(); @@ -613,7 +604,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) void x86_spec_ctrl_setup_ap(void) { if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask); + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) x86_amd_ssb_disable(); From 80d7439fb0c446d006599b6347efd255a86a93ca Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 14 Jul 2018 02:39:55 -0700 Subject: [PATCH 095/519] x86/bugs: Rework spec_ctrl base and mask logic commit be6fcb5478e95bb1c91f489121238deb3abca46a upstream x86_spec_ctrL_mask is intended to mask out bits from a MSR_SPEC_CTRL value which are not to be modified. However the implementation is not really used and the bitmask was inverted to make a check easier, which was removed in "x86/bugs: Remove x86_spec_ctrl_set()" Aside of that it is missing the STIBP bit if it is supported by the platform, so if the mask would be used in x86_virt_spec_ctrl() then it would prevent a guest from setting STIBP. Add the STIBP bit if supported and use the mask in x86_virt_spec_ctrl() to sanitize the value which is supplied by the guest. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 05eed68a32f4..af11a02819bc 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -41,7 +41,7 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); * The vendor and possibly platform specific bits which can be modified in * x86_spec_ctrl_base. */ -static u64 x86_spec_ctrl_mask = ~SPEC_CTRL_IBRS; +static u64 x86_spec_ctrl_mask = SPEC_CTRL_IBRS; /* * AMD specific MSR info for Speculative Store Bypass control. @@ -67,6 +67,10 @@ void __init check_bugs(void) if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + /* Allow STIBP in MSR_SPEC_CTRL if supported */ + if (boot_cpu_has(X86_FEATURE_STIBP)) + x86_spec_ctrl_mask |= SPEC_CTRL_STIBP; + /* Select the proper spectre mitigation before patching alternatives */ spectre_v2_select_mitigation(); @@ -134,18 +138,26 @@ static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { + u64 msrval, guestval, hostval = x86_spec_ctrl_base; struct thread_info *ti = current_thread_info(); - u64 msr, host = x86_spec_ctrl_base; /* Is MSR_SPEC_CTRL implemented ? */ if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { + /* + * Restrict guest_spec_ctrl to supported values. Clear the + * modifiable bits in the host base value and or the + * modifiable bits from the guest value. + */ + guestval = hostval & ~x86_spec_ctrl_mask; + guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; + /* SSBD controlled in MSR_SPEC_CTRL */ if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) - host |= ssbd_tif_to_spec_ctrl(ti->flags); + hostval |= ssbd_tif_to_spec_ctrl(ti->flags); - if (host != guest_spec_ctrl) { - msr = setguest ? guest_spec_ctrl : host; - wrmsrl(MSR_IA32_SPEC_CTRL, msr); + if (hostval != guestval) { + msrval = setguest ? guestval : hostval; + wrmsrl(MSR_IA32_SPEC_CTRL, msrval); } } } @@ -491,7 +503,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_INTEL: x86_spec_ctrl_base |= SPEC_CTRL_SSBD; - x86_spec_ctrl_mask &= ~SPEC_CTRL_SSBD; + x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); break; case X86_VENDOR_AMD: From 48805280d05c968e0883e8debf5e33f40f8e56c5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 14 Jul 2018 02:40:03 -0700 Subject: [PATCH 096/519] x86/speculation, KVM: Implement support for VIRT_SPEC_CTRL/LS_CFG commit 47c61b3955cf712cadfc25635bf9bc174af030ea upstream Add the necessary logic for supporting the emulated VIRT_SPEC_CTRL MSR to x86_virt_spec_ctrl(). If either X86_FEATURE_LS_CFG_SSBD or X86_FEATURE_VIRT_SPEC_CTRL is set then use the new guest_virt_spec_ctrl argument to check whether the state must be modified on the host. The update reuses speculative_store_bypass_update() so the ZEN-specific sibling coordination can be reused. Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/spec-ctrl.h | 6 ++++++ arch/x86/kernel/cpu/bugs.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 763d49710329..ae7c2c5cd7f0 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -53,6 +53,12 @@ static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn) return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); } +static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl) +{ + BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); + return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); +} + static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) { return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index af11a02819bc..12a8867071f3 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -160,6 +160,36 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) wrmsrl(MSR_IA32_SPEC_CTRL, msrval); } } + + /* + * If SSBD is not handled in MSR_SPEC_CTRL on AMD, update + * MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported. + */ + if (!static_cpu_has(X86_FEATURE_LS_CFG_SSBD) && + !static_cpu_has(X86_FEATURE_VIRT_SSBD)) + return; + + /* + * If the host has SSBD mitigation enabled, force it in the host's + * virtual MSR value. If its not permanently enabled, evaluate + * current's TIF_SSBD thread flag. + */ + if (static_cpu_has(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE)) + hostval = SPEC_CTRL_SSBD; + else + hostval = ssbd_tif_to_spec_ctrl(ti->flags); + + /* Sanitize the guest value */ + guestval = guest_virt_spec_ctrl & SPEC_CTRL_SSBD; + + if (hostval != guestval) { + unsigned long tif; + + tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) : + ssbd_spec_ctrl_to_tif(hostval); + + speculative_store_bypass_update(tif); + } } EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl); From cadb98135daf474648d646db5625e9c663b94a3d Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Sat, 14 Jul 2018 02:40:10 -0700 Subject: [PATCH 097/519] x86/bugs: Rename SSBD_NO to SSB_NO commit 240da953fcc6a9008c92fae5b1f727ee5ed167ab upstream The "336996 Speculative Execution Side Channel Mitigations" from May defines this as SSB_NO, hence lets sync-up. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 2 +- arch/x86/kernel/cpu/common.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 22f2dd50d2d9..caa00191e565 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -58,7 +58,7 @@ #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a #define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ #define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ -#define ARCH_CAP_SSBD_NO (1 << 4) /* +#define ARCH_CAP_SSB_NO (1 << 4) /* * Not susceptible to Speculative Store Bypass * attack, so no Speculative Store Bypass * control required. diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 10977236c6eb..9ad38ad194ac 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -881,7 +881,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); if (!x86_match_cpu(cpu_no_spec_store_bypass) && - !(ia32_cap & ARCH_CAP_SSBD_NO)) + !(ia32_cap & ARCH_CAP_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); if (x86_match_cpu(cpu_no_speculation)) From 399a9d0cc466b0100feee5c6e9de7e6378b18fab Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 21 Jun 2018 10:43:31 +0200 Subject: [PATCH 098/519] x86/xen: Add call of speculative_store_bypass_ht_init() to PV paths commit 74899d92e66663dc7671a8017b3146dcd4735f3b upstream. Commit: 1f50ddb4f418 ("x86/speculation: Handle HT correctly on AMD") ... added speculative_store_bypass_ht_init() to the per-CPU initialization sequence. speculative_store_bypass_ht_init() needs to be called on each CPU for PV guests, too. Reported-by: Brian Woods Tested-by: Brian Woods Signed-off-by: Juergen Gross Cc: Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: boris.ostrovsky@oracle.com Cc: xen-devel@lists.xenproject.org Fixes: 1f50ddb4f4189243c05926b842dc1a0332195f31 ("x86/speculation: Handle HT correctly on AMD") Link: https://lore.kernel.org/lkml/20180621084331.21228-1-jgross@suse.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/smp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 3f4ebf0261f2..29e50d1229bc 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -28,6 +28,7 @@ #include #include +#include #include #include @@ -87,6 +88,8 @@ static void cpu_bringup(void) cpu_data(cpu).x86_max_cores = 1; set_cpu_sibling_map(cpu); + speculative_store_bypass_ht_init(); + xen_setup_cpu_clockevents(); notify_cpu_starting(cpu); @@ -357,6 +360,8 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus) } set_cpu_sibling_map(0); + speculative_store_bypass_ht_init(); + xen_pmu_init(0); if (xen_smp_intr_init(0)) From 42a8fe474e0c3e9babad09b4d3e882d7a0f09c76 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 18 Jan 2017 11:15:39 -0800 Subject: [PATCH 099/519] x86/cpu: Re-apply forced caps every time CPU caps are re-read commit 60d3450167433f2d099ce2869dc52dd9e7dc9b29 upstream. Calling get_cpu_cap() will reset a bunch of CPU features. This will cause the system to lose track of force-set and force-cleared features in the words that are reset until the end of CPU initialization. This can cause X86_FEATURE_FPU, for example, to change back and forth during boot and potentially confuse CPU setup. To minimize the chance of confusion, re-apply forced caps every time get_cpu_cap() is called. Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Matthew Whitehead Cc: Oleg Nesterov Cc: One Thousand Gnomes Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/c817eb373d2c67c2c81413a70fc9b845fa34a37e.1484705016.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9ad38ad194ac..3d21b28f9826 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -803,6 +803,13 @@ void get_cpu_cap(struct cpuinfo_x86 *c) init_scattered_cpuid_features(c); init_speculation_control(c); + + /* + * Clear/Set all flags overridden by options, after probe. + * This needs to happen each time we re-probe, which may happen + * several times during CPU initialization. + */ + apply_forced_caps(c); } static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) From c9ae49135d83243f12b5a66044302d4a17e0dcfe Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Thu, 12 Apr 2018 19:11:58 +0100 Subject: [PATCH 100/519] block: do not use interruptible wait anywhere commit 1dc3039bc87ae7d19a990c3ee71cfd8a9068f428 upstream. When blk_queue_enter() waits for a queue to unfreeze, or unset the PREEMPT_ONLY flag, do not allow it to be interrupted by a signal. The PREEMPT_ONLY flag was introduced later in commit 3a0a529971ec ("block, scsi: Make SCSI quiesce and resume work reliably"). Note the SCSI device is resumed asynchronously, i.e. after un-freezing userspace tasks. So that commit exposed the bug as a regression in v4.15. A mysterious SIGBUS (or -EIO) sometimes happened during the time the device was being resumed. Most frequently, there was no kernel log message, and we saw Xorg or Xwayland killed by SIGBUS.[1] [1] E.g. https://bugzilla.redhat.com/show_bug.cgi?id=1553979 Without this fix, I get an IO error in this test: # dd if=/dev/sda of=/dev/null iflag=direct & \ while killall -SIGUSR1 dd; do sleep 0.1; done & \ echo mem > /sys/power/state ; \ sleep 5; killall dd # stop after 5 seconds The interruptible wait was added to blk_queue_enter in commit 3ef28e83ab15 ("block: generic request_queue reference counting"). Before then, the interruptible wait was only in blk-mq, but I don't think it could ever have been correct. Reviewed-by: Bart Van Assche Cc: stable@vger.kernel.org Signed-off-by: Alan Jenkins Signed-off-by: Jens Axboe Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- block/blk-core.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index f5f1a55703ae..50d77c90070d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -651,21 +651,17 @@ EXPORT_SYMBOL(blk_alloc_queue); int blk_queue_enter(struct request_queue *q, gfp_t gfp) { while (true) { - int ret; - if (percpu_ref_tryget_live(&q->q_usage_counter)) return 0; if (!gfpflags_allow_blocking(gfp)) return -EBUSY; - ret = wait_event_interruptible(q->mq_freeze_wq, - !atomic_read(&q->mq_freeze_depth) || - blk_queue_dying(q)); + wait_event(q->mq_freeze_wq, + !atomic_read(&q->mq_freeze_depth) || + blk_queue_dying(q)); if (blk_queue_dying(q)) return -ENODEV; - if (ret) - return ret; } } From 470ee7ab7776085fe5573788df2dea8140d7a0c1 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 29 Feb 2016 21:46:07 +0100 Subject: [PATCH 101/519] clk: tegra: Fix PLL_U post divider and initial rate on Tegra30 commit 797097301860c64b63346d068ba4fe4992bd5021 upstream. The post divider value in the frequency table is wrong as it would lead to the PLL producing an output rate of 960 MHz instead of the desired 480 MHz. This wasn't a problem as nothing used the table to actually initialize the PLL rate, but the bootloader configuration was used unaltered. If the bootloader does not set up the PLL it will fail to come when used under Linux. To fix this don't rely on the bootloader, but set the correct rate in the clock driver. Signed-off-by: Lucas Stach Signed-off-by: Thierry Reding [jonathanh@nvidia.com: Back-ported to stable v4.4.y] Signed-off-by: Jon Hunter Signed-off-by: Greg Kroah-Hartman --- drivers/clk/tegra/clk-tegra30.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/clk/tegra/clk-tegra30.c b/drivers/clk/tegra/clk-tegra30.c index 8c41c6fcb9ee..acf83569f86f 100644 --- a/drivers/clk/tegra/clk-tegra30.c +++ b/drivers/clk/tegra/clk-tegra30.c @@ -333,11 +333,11 @@ static struct pdiv_map pllu_p[] = { }; static struct tegra_clk_pll_freq_table pll_u_freq_table[] = { - { 12000000, 480000000, 960, 12, 0, 12}, - { 13000000, 480000000, 960, 13, 0, 12}, - { 16800000, 480000000, 400, 7, 0, 5}, - { 19200000, 480000000, 200, 4, 0, 3}, - { 26000000, 480000000, 960, 26, 0, 12}, + { 12000000, 480000000, 960, 12, 2, 12 }, + { 13000000, 480000000, 960, 13, 2, 12 }, + { 16800000, 480000000, 400, 7, 2, 5 }, + { 19200000, 480000000, 200, 4, 2, 3 }, + { 26000000, 480000000, 960, 26, 2, 12 }, { 0, 0, 0, 0, 0, 0 }, }; @@ -1372,6 +1372,7 @@ static struct tegra_clk_init_table init_table[] __initdata = { {TEGRA30_CLK_GR2D, TEGRA30_CLK_PLL_C, 300000000, 0}, {TEGRA30_CLK_GR3D, TEGRA30_CLK_PLL_C, 300000000, 0}, {TEGRA30_CLK_GR3D2, TEGRA30_CLK_PLL_C, 300000000, 0}, + { TEGRA30_CLK_PLL_U, TEGRA30_CLK_CLK_MAX, 480000000, 0 }, {TEGRA30_CLK_CLK_MAX, TEGRA30_CLK_CLK_MAX, 0, 0}, /* This MUST be the last entry. */ }; From 1ee52929e64c4bd185884ebc22d437ff93f97e3a Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Tue, 14 Jun 2016 10:12:13 +0200 Subject: [PATCH 102/519] ubi: Introduce vol_ignored() commit 243a4f8126fcf7facb04b324dbb7c85d10b11ce9 upstream. This makes the logic more easy to follow. Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/attach.c | 24 ++++++++++++++++++------ drivers/mtd/ubi/ubi.h | 15 +++++++++++++++ 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/drivers/mtd/ubi/attach.c b/drivers/mtd/ubi/attach.c index c1aaf0336cf2..abd6137a3b4d 100644 --- a/drivers/mtd/ubi/attach.c +++ b/drivers/mtd/ubi/attach.c @@ -803,6 +803,20 @@ static int check_corruption(struct ubi_device *ubi, struct ubi_vid_hdr *vid_hdr, return err; } +static bool vol_ignored(int vol_id) +{ + switch (vol_id) { + case UBI_LAYOUT_VOLUME_ID: + return true; + } + +#ifdef CONFIG_MTD_UBI_FASTMAP + return ubi_is_fm_vol(vol_id); +#else + return false; +#endif +} + /** * scan_peb - scan and process UBI headers of a PEB. * @ubi: UBI device description object @@ -995,17 +1009,15 @@ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai, *vid = vol_id; if (sqnum) *sqnum = be64_to_cpu(vidh->sqnum); - if (vol_id > UBI_MAX_VOLUMES && vol_id != UBI_LAYOUT_VOLUME_ID) { + if (vol_id > UBI_MAX_VOLUMES && !vol_ignored(vol_id)) { int lnum = be32_to_cpu(vidh->lnum); /* Unsupported internal volume */ switch (vidh->compat) { case UBI_COMPAT_DELETE: - if (vol_id != UBI_FM_SB_VOLUME_ID - && vol_id != UBI_FM_DATA_VOLUME_ID) { - ubi_msg(ubi, "\"delete\" compatible internal volume %d:%d found, will remove it", - vol_id, lnum); - } + ubi_msg(ubi, "\"delete\" compatible internal volume %d:%d found, will remove it", + vol_id, lnum); + err = add_to_list(ai, pnum, vol_id, lnum, ec, 1, &ai->erase); if (err) diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index de1ea2e4c37d..086ff56922b5 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -1101,4 +1101,19 @@ static inline int idx2vol_id(const struct ubi_device *ubi, int idx) return idx; } +/** + * ubi_is_fm_vol - check whether a volume ID is a Fastmap volume. + * @vol_id: volume ID + */ +static inline bool ubi_is_fm_vol(int vol_id) +{ + switch (vol_id) { + case UBI_FM_SB_VOLUME_ID: + case UBI_FM_DATA_VOLUME_ID: + return true; + } + + return false; +} + #endif /* !__UBI_UBI_H__ */ From 6fdca47fcc1a26b770ce1eb1a440ea06f8d804c5 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Tue, 14 Jun 2016 10:12:15 +0200 Subject: [PATCH 103/519] ubi: Rework Fastmap attach base code commit fdf10ed710c0aa177e8dfcd84e65e4e5e8e0956b upstream. Introduce a new list to the UBI attach information object to be able to deal better with old and corrupted Fastmap eraseblocks. Also move more Fastmap specific code into fastmap.c. Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/attach.c | 99 +++++++++++++++++++++++++++------------ drivers/mtd/ubi/fastmap.c | 36 ++++++++++++-- drivers/mtd/ubi/ubi.h | 28 ++++++++++- drivers/mtd/ubi/wl.c | 47 ++++++++++++++----- 4 files changed, 165 insertions(+), 45 deletions(-) diff --git a/drivers/mtd/ubi/attach.c b/drivers/mtd/ubi/attach.c index abd6137a3b4d..68ff57435c89 100644 --- a/drivers/mtd/ubi/attach.c +++ b/drivers/mtd/ubi/attach.c @@ -174,6 +174,40 @@ static int add_corrupted(struct ubi_attach_info *ai, int pnum, int ec) return 0; } +/** + * add_fastmap - add a Fastmap related physical eraseblock. + * @ai: attaching information + * @pnum: physical eraseblock number the VID header came from + * @vid_hdr: the volume identifier header + * @ec: erase counter of the physical eraseblock + * + * This function allocates a 'struct ubi_ainf_peb' object for a Fastamp + * physical eraseblock @pnum and adds it to the 'fastmap' list. + * Such blocks can be Fastmap super and data blocks from both the most + * recent Fastmap we're attaching from or from old Fastmaps which will + * be erased. + */ +static int add_fastmap(struct ubi_attach_info *ai, int pnum, + struct ubi_vid_hdr *vid_hdr, int ec) +{ + struct ubi_ainf_peb *aeb; + + aeb = kmem_cache_alloc(ai->aeb_slab_cache, GFP_KERNEL); + if (!aeb) + return -ENOMEM; + + aeb->pnum = pnum; + aeb->vol_id = be32_to_cpu(vidh->vol_id); + aeb->sqnum = be64_to_cpu(vidh->sqnum); + aeb->ec = ec; + list_add(&aeb->u.list, &ai->fastmap); + + dbg_bld("add to fastmap list: PEB %d, vol_id %d, sqnum: %llu", pnum, + aeb->vol_id, aeb->sqnum); + + return 0; +} + /** * validate_vid_hdr - check volume identifier header. * @ubi: UBI device description object @@ -822,18 +856,15 @@ static bool vol_ignored(int vol_id) * @ubi: UBI device description object * @ai: attaching information * @pnum: the physical eraseblock number - * @vid: The volume ID of the found volume will be stored in this pointer - * @sqnum: The sqnum of the found volume will be stored in this pointer * * This function reads UBI headers of PEB @pnum, checks them, and adds * information about this PEB to the corresponding list or RB-tree in the * "attaching info" structure. Returns zero if the physical eraseblock was * successfully handled and a negative error code in case of failure. */ -static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai, - int pnum, int *vid, unsigned long long *sqnum) +static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai, int pnum) { - long long uninitialized_var(ec); + long long ec; int err, bitflips = 0, vol_id = -1, ec_err = 0; dbg_bld("scan PEB %d", pnum); @@ -1005,10 +1036,6 @@ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai, } vol_id = be32_to_cpu(vidh->vol_id); - if (vid) - *vid = vol_id; - if (sqnum) - *sqnum = be64_to_cpu(vidh->sqnum); if (vol_id > UBI_MAX_VOLUMES && !vol_ignored(vol_id)) { int lnum = be32_to_cpu(vidh->lnum); @@ -1049,7 +1076,12 @@ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai, if (ec_err) ubi_warn(ubi, "valid VID header but corrupted EC header at PEB %d", pnum); - err = ubi_add_to_av(ubi, ai, pnum, ec, vidh, bitflips); + + if (ubi_is_fm_vol(vol_id)) + err = add_fastmap(ai, pnum, vidh, ec); + else + err = ubi_add_to_av(ubi, ai, pnum, ec, vidh, bitflips); + if (err) return err; @@ -1198,6 +1230,10 @@ static void destroy_ai(struct ubi_attach_info *ai) list_del(&aeb->u.list); kmem_cache_free(ai->aeb_slab_cache, aeb); } + list_for_each_entry_safe(aeb, aeb_tmp, &ai->fastmap, u.list) { + list_del(&aeb->u.list); + kmem_cache_free(ai->aeb_slab_cache, aeb); + } /* Destroy the volume RB-tree */ rb = ai->volumes.rb_node; @@ -1257,7 +1293,7 @@ static int scan_all(struct ubi_device *ubi, struct ubi_attach_info *ai, cond_resched(); dbg_gen("process PEB %d", pnum); - err = scan_peb(ubi, ai, pnum, NULL, NULL); + err = scan_peb(ubi, ai, pnum); if (err < 0) goto out_vidh; } @@ -1323,6 +1359,7 @@ static struct ubi_attach_info *alloc_ai(void) INIT_LIST_HEAD(&ai->free); INIT_LIST_HEAD(&ai->erase); INIT_LIST_HEAD(&ai->alien); + INIT_LIST_HEAD(&ai->fastmap); ai->volumes = RB_ROOT; ai->aeb_slab_cache = kmem_cache_create("ubi_aeb_slab_cache", sizeof(struct ubi_ainf_peb), @@ -1349,52 +1386,54 @@ static struct ubi_attach_info *alloc_ai(void) */ static int scan_fast(struct ubi_device *ubi, struct ubi_attach_info **ai) { - int err, pnum, fm_anchor = -1; - unsigned long long max_sqnum = 0; + int err, pnum; + struct ubi_attach_info *scan_ai; err = -ENOMEM; + scan_ai = alloc_ai(); + if (!scan_ai) + goto out; + ech = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); if (!ech) - goto out; + goto out_ai; vidh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL); if (!vidh) goto out_ech; for (pnum = 0; pnum < UBI_FM_MAX_START; pnum++) { - int vol_id = -1; - unsigned long long sqnum = -1; cond_resched(); dbg_gen("process PEB %d", pnum); - err = scan_peb(ubi, *ai, pnum, &vol_id, &sqnum); + err = scan_peb(ubi, scan_ai, pnum); if (err < 0) goto out_vidh; - - if (vol_id == UBI_FM_SB_VOLUME_ID && sqnum > max_sqnum) { - max_sqnum = sqnum; - fm_anchor = pnum; - } } ubi_free_vid_hdr(ubi, vidh); kfree(ech); - if (fm_anchor < 0) - return UBI_NO_FASTMAP; + err = ubi_scan_fastmap(ubi, *ai, scan_ai); + if (err) { + /* + * Didn't attach via fastmap, do a full scan but reuse what + * we've aready scanned. + */ + destroy_ai(*ai); + *ai = scan_ai; + } else + destroy_ai(scan_ai); - destroy_ai(*ai); - *ai = alloc_ai(); - if (!*ai) - return -ENOMEM; - - return ubi_scan_fastmap(ubi, *ai, fm_anchor); + return err; out_vidh: ubi_free_vid_hdr(ubi, vidh); out_ech: kfree(ech); +out_ai: + destroy_ai(scan_ai); out: return err; } diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index bba7dd1b5ebf..e724a363cef3 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -850,28 +850,58 @@ static int ubi_attach_fastmap(struct ubi_device *ubi, return ret; } +/** + * find_fm_anchor - find the most recent Fastmap superblock (anchor) + * @ai: UBI attach info to be filled + */ +static int find_fm_anchor(struct ubi_attach_info *ai) +{ + int ret = -1; + struct ubi_ainf_peb *aeb; + unsigned long long max_sqnum = 0; + + list_for_each_entry(aeb, &ai->fastmap, u.list) { + if (aeb->vol_id == UBI_FM_SB_VOLUME_ID && aeb->sqnum > max_sqnum) { + max_sqnum = aeb->sqnum; + ret = aeb->pnum; + } + } + + return ret; +} + /** * ubi_scan_fastmap - scan the fastmap. * @ubi: UBI device object * @ai: UBI attach info to be filled - * @fm_anchor: The fastmap starts at this PEB + * @scan_ai: UBI attach info from the first 64 PEBs, + * used to find the most recent Fastmap data structure * * Returns 0 on success, UBI_NO_FASTMAP if no fastmap was found, * UBI_BAD_FASTMAP if one was found but is not usable. * < 0 indicates an internal error. */ int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai, - int fm_anchor) + struct ubi_attach_info *scan_ai) { struct ubi_fm_sb *fmsb, *fmsb2; struct ubi_vid_hdr *vh; struct ubi_ec_hdr *ech; struct ubi_fastmap_layout *fm; - int i, used_blocks, pnum, ret = 0; + struct ubi_ainf_peb *tmp_aeb, *aeb; + int i, used_blocks, pnum, fm_anchor, ret = 0; size_t fm_size; __be32 crc, tmp_crc; unsigned long long sqnum = 0; + fm_anchor = find_fm_anchor(scan_ai); + if (fm_anchor < 0) + return UBI_NO_FASTMAP; + + /* Move all (possible) fastmap blocks into our new attach structure. */ + list_for_each_entry_safe(aeb, tmp_aeb, &scan_ai->fastmap, u.list) + list_move_tail(&aeb->u.list, &ai->fastmap); + down_write(&ubi->fm_protect); memset(ubi->fm_buf, 0, ubi->fm_size); diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index 086ff56922b5..051976caf8f0 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -699,6 +699,8 @@ struct ubi_ainf_volume { * @erase: list of physical eraseblocks which have to be erased * @alien: list of physical eraseblocks which should not be used by UBI (e.g., * those belonging to "preserve"-compatible internal volumes) + * @fastmap: list of physical eraseblocks which relate to fastmap (e.g., + * eraseblocks of the current and not yet erased old fastmap blocks) * @corr_peb_count: count of PEBs in the @corr list * @empty_peb_count: count of PEBs which are presumably empty (contain only * 0xFF bytes) @@ -727,6 +729,7 @@ struct ubi_attach_info { struct list_head free; struct list_head erase; struct list_head alien; + struct list_head fastmap; int corr_peb_count; int empty_peb_count; int alien_peb_count; @@ -907,7 +910,7 @@ int ubi_compare_lebs(struct ubi_device *ubi, const struct ubi_ainf_peb *aeb, size_t ubi_calc_fm_size(struct ubi_device *ubi); int ubi_update_fastmap(struct ubi_device *ubi); int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai, - int fm_anchor); + struct ubi_attach_info *scan_ai); #else static inline int ubi_update_fastmap(struct ubi_device *ubi) { return 0; } #endif @@ -1116,4 +1119,27 @@ static inline bool ubi_is_fm_vol(int vol_id) return false; } +/** + * ubi_find_fm_block - check whether a PEB is part of the current Fastmap. + * @ubi: UBI device description object + * @pnum: physical eraseblock to look for + * + * This function returns a wear leveling object if @pnum relates to the current + * fastmap, @NULL otherwise. + */ +static inline struct ubi_wl_entry *ubi_find_fm_block(const struct ubi_device *ubi, + int pnum) +{ + int i; + + if (ubi->fm) { + for (i = 0; i < ubi->fm->used_blocks; i++) { + if (ubi->fm->e[i]->pnum == pnum) + return ubi->fm->e[i]; + } + } + + return NULL; +} + #endif /* !__UBI_UBI_H__ */ diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index ca9746f41ff1..7eb1f3fb16e2 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1598,19 +1598,44 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) } } + list_for_each_entry(aeb, &ai->fastmap, u.list) { + cond_resched(); + + e = ubi_find_fm_block(ubi, aeb->pnum); + + if (e) { + ubi_assert(!ubi->lookuptbl[e->pnum]); + ubi->lookuptbl[e->pnum] = e; + } else { + /* + * Usually old Fastmap PEBs are scheduled for erasure + * and we don't have to care about them but if we face + * an power cut before scheduling them we need to + * take care of them here. + */ + if (ubi->lookuptbl[aeb->pnum]) + continue; + + e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); + if (!e) + goto out_free; + + e->pnum = aeb->pnum; + e->ec = aeb->ec; + ubi_assert(!ubi->lookuptbl[e->pnum]); + ubi->lookuptbl[e->pnum] = e; + if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0)) { + wl_entry_destroy(ubi, e); + goto out_free; + } + } + + found_pebs++; + } + dbg_wl("found %i PEBs", found_pebs); - if (ubi->fm) { - ubi_assert(ubi->good_peb_count == - found_pebs + ubi->fm->used_blocks); - - for (i = 0; i < ubi->fm->used_blocks; i++) { - e = ubi->fm->e[i]; - ubi->lookuptbl[e->pnum] = e; - } - } - else - ubi_assert(ubi->good_peb_count == found_pebs); + ubi_assert(ubi->good_peb_count == found_pebs); reserved_pebs = WL_RESERVED_PEBS; ubi_fastmap_init(ubi, &reserved_pebs); From faf2b8d929a47809eab04f17e21f44ebae377dc6 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Tue, 14 Jun 2016 10:12:17 +0200 Subject: [PATCH 104/519] ubi: Be more paranoid while seaching for the most recent Fastmap commit 74f2c6e9a47cf4e508198c8594626cc82906a13d upstream. Since PEB erasure is asynchornous it can happen that there is more than one Fastmap on the MTD. This is fine because the attach logic will pick the Fastmap data structure with the highest sequence number. On a not so well configured MTD stack spurious ECC errors are common. Causes can be different, bad hardware, wrong operating modes, etc... If the most current Fastmap renders bad due to ECC errors UBI might pick an older Fastmap to attach from. While this can only happen on an anyway broken setup it will show completely different sympthoms and makes finding the root cause much more difficult. So, be debug friendly and fall back to scanning mode of we're facing an ECC error while scanning for Fastmap. Cc: Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/attach.c | 28 ++++++++++++++++++++++++---- drivers/mtd/ubi/ubi.h | 3 +++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/ubi/attach.c b/drivers/mtd/ubi/attach.c index 68ff57435c89..5cde3ad1665e 100644 --- a/drivers/mtd/ubi/attach.c +++ b/drivers/mtd/ubi/attach.c @@ -856,13 +856,15 @@ static bool vol_ignored(int vol_id) * @ubi: UBI device description object * @ai: attaching information * @pnum: the physical eraseblock number + * @fast: true if we're scanning for a Fastmap * * This function reads UBI headers of PEB @pnum, checks them, and adds * information about this PEB to the corresponding list or RB-tree in the * "attaching info" structure. Returns zero if the physical eraseblock was * successfully handled and a negative error code in case of failure. */ -static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai, int pnum) +static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai, + int pnum, bool fast) { long long ec; int err, bitflips = 0, vol_id = -1, ec_err = 0; @@ -980,6 +982,20 @@ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai, int pnum */ ai->maybe_bad_peb_count += 1; case UBI_IO_BAD_HDR: + /* + * If we're facing a bad VID header we have to drop *all* + * Fastmap data structures we find. The most recent Fastmap + * could be bad and therefore there is a chance that we attach + * from an old one. On a fine MTD stack a PEB must not render + * bad all of a sudden, but the reality is different. + * So, let's be paranoid and help finding the root cause by + * falling back to scanning mode instead of attaching with a + * bad EBA table and cause data corruption which is hard to + * analyze. + */ + if (fast) + ai->force_full_scan = 1; + if (ec_err) /* * Both headers are corrupted. There is a possibility @@ -1293,7 +1309,7 @@ static int scan_all(struct ubi_device *ubi, struct ubi_attach_info *ai, cond_resched(); dbg_gen("process PEB %d", pnum); - err = scan_peb(ubi, ai, pnum); + err = scan_peb(ubi, ai, pnum, false); if (err < 0) goto out_vidh; } @@ -1407,7 +1423,7 @@ static int scan_fast(struct ubi_device *ubi, struct ubi_attach_info **ai) cond_resched(); dbg_gen("process PEB %d", pnum); - err = scan_peb(ubi, scan_ai, pnum); + err = scan_peb(ubi, scan_ai, pnum, true); if (err < 0) goto out_vidh; } @@ -1415,7 +1431,11 @@ static int scan_fast(struct ubi_device *ubi, struct ubi_attach_info **ai) ubi_free_vid_hdr(ubi, vidh); kfree(ech); - err = ubi_scan_fastmap(ubi, *ai, scan_ai); + if (scan_ai->force_full_scan) + err = UBI_NO_FASTMAP; + else + err = ubi_scan_fastmap(ubi, *ai, scan_ai); + if (err) { /* * Didn't attach via fastmap, do a full scan but reuse what diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index 051976caf8f0..05d9ec66437c 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -711,6 +711,8 @@ struct ubi_ainf_volume { * @vols_found: number of volumes found * @highest_vol_id: highest volume ID * @is_empty: flag indicating whether the MTD device is empty or not + * @force_full_scan: flag indicating whether we need to do a full scan and drop + all existing Fastmap data structures * @min_ec: lowest erase counter value * @max_ec: highest erase counter value * @max_sqnum: highest sequence number value @@ -738,6 +740,7 @@ struct ubi_attach_info { int vols_found; int highest_vol_id; int is_empty; + int force_full_scan; int min_ec; int max_ec; unsigned long long max_sqnum; From 000b4c28bb28d471662a7d8fed80c9f511afe4cf Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Wed, 24 Aug 2016 14:36:14 +0200 Subject: [PATCH 105/519] ubi: Fix races around ubi_refill_pools() commit 2e8f08deabbc7eefe4c5838aaa6aa9a23a8acf2e upstream. When writing a new Fastmap the first thing that happens is refilling the pools in memory. At this stage it is possible that new PEBs from the new pools get already claimed and written with data. If this happens before the new Fastmap data structure hits the flash and we face power cut the freshly written PEB will not scanned and unnoticed. Solve the issue by locking the pools until Fastmap is written. Cc: Fixes: dbb7d2a88d ("UBI: Add fastmap core") Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/eba.c | 4 ++-- drivers/mtd/ubi/fastmap-wl.c | 6 ++++-- drivers/mtd/ubi/fastmap.c | 14 ++++++++++---- drivers/mtd/ubi/wl.c | 20 ++++++++++++++------ 4 files changed, 30 insertions(+), 14 deletions(-) diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c index c4a25c858c07..03cf0553ec1b 100644 --- a/drivers/mtd/ubi/eba.c +++ b/drivers/mtd/ubi/eba.c @@ -1178,6 +1178,8 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, struct ubi_volume *vol; uint32_t crc; + ubi_assert(rwsem_is_locked(&ubi->fm_eba_sem)); + vol_id = be32_to_cpu(vid_hdr->vol_id); lnum = be32_to_cpu(vid_hdr->lnum); @@ -1346,9 +1348,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, } ubi_assert(vol->eba_tbl[lnum] == from); - down_read(&ubi->fm_eba_sem); vol->eba_tbl[lnum] = to; - up_read(&ubi->fm_eba_sem); out_unlock_buf: mutex_unlock(&ubi->buf_mutex); diff --git a/drivers/mtd/ubi/fastmap-wl.c b/drivers/mtd/ubi/fastmap-wl.c index ed62f1efe6eb..69dd21679a30 100644 --- a/drivers/mtd/ubi/fastmap-wl.c +++ b/drivers/mtd/ubi/fastmap-wl.c @@ -262,6 +262,8 @@ static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi) struct ubi_fm_pool *pool = &ubi->fm_wl_pool; int pnum; + ubi_assert(rwsem_is_locked(&ubi->fm_eba_sem)); + if (pool->used == pool->size) { /* We cannot update the fastmap here because this * function is called in atomic context. @@ -303,7 +305,7 @@ int ubi_ensure_anchor_pebs(struct ubi_device *ubi) wrk->anchor = 1; wrk->func = &wear_leveling_worker; - schedule_ubi_work(ubi, wrk); + __schedule_ubi_work(ubi, wrk); return 0; } @@ -344,7 +346,7 @@ int ubi_wl_put_fm_peb(struct ubi_device *ubi, struct ubi_wl_entry *fm_e, spin_unlock(&ubi->wl_lock); vol_id = lnum ? UBI_FM_DATA_VOLUME_ID : UBI_FM_SB_VOLUME_ID; - return schedule_erase(ubi, e, vol_id, lnum, torture); + return schedule_erase(ubi, e, vol_id, lnum, torture, true); } /** diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index e724a363cef3..c5477299b66b 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -1514,22 +1514,30 @@ int ubi_update_fastmap(struct ubi_device *ubi) struct ubi_wl_entry *tmp_e; down_write(&ubi->fm_protect); + down_write(&ubi->work_sem); + down_write(&ubi->fm_eba_sem); ubi_refill_pools(ubi); if (ubi->ro_mode || ubi->fm_disabled) { + up_write(&ubi->fm_eba_sem); + up_write(&ubi->work_sem); up_write(&ubi->fm_protect); return 0; } ret = ubi_ensure_anchor_pebs(ubi); if (ret) { + up_write(&ubi->fm_eba_sem); + up_write(&ubi->work_sem); up_write(&ubi->fm_protect); return ret; } new_fm = kzalloc(sizeof(*new_fm), GFP_KERNEL); if (!new_fm) { + up_write(&ubi->fm_eba_sem); + up_write(&ubi->work_sem); up_write(&ubi->fm_protect); return -ENOMEM; } @@ -1638,16 +1646,14 @@ int ubi_update_fastmap(struct ubi_device *ubi) new_fm->e[0] = tmp_e; } - down_write(&ubi->work_sem); - down_write(&ubi->fm_eba_sem); ret = ubi_write_fastmap(ubi, new_fm); - up_write(&ubi->fm_eba_sem); - up_write(&ubi->work_sem); if (ret) goto err; out_unlock: + up_write(&ubi->fm_eba_sem); + up_write(&ubi->work_sem); up_write(&ubi->fm_protect); kfree(old_fm); return ret; diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 7eb1f3fb16e2..d49e96f94f46 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -580,7 +580,7 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, * failure. */ static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, - int vol_id, int lnum, int torture) + int vol_id, int lnum, int torture, bool nested) { struct ubi_work *wl_wrk; @@ -599,7 +599,10 @@ static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, wl_wrk->lnum = lnum; wl_wrk->torture = torture; - schedule_ubi_work(ubi, wl_wrk); + if (nested) + __schedule_ubi_work(ubi, wl_wrk); + else + schedule_ubi_work(ubi, wl_wrk); return 0; } @@ -658,6 +661,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, if (!vid_hdr) return -ENOMEM; + down_read(&ubi->fm_eba_sem); mutex_lock(&ubi->move_mutex); spin_lock(&ubi->wl_lock); ubi_assert(!ubi->move_from && !ubi->move_to); @@ -884,6 +888,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, dbg_wl("done"); mutex_unlock(&ubi->move_mutex); + up_read(&ubi->fm_eba_sem); return 0; /* @@ -925,6 +930,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, } mutex_unlock(&ubi->move_mutex); + up_read(&ubi->fm_eba_sem); return 0; out_error: @@ -946,6 +952,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, out_ro: ubi_ro_mode(ubi); mutex_unlock(&ubi->move_mutex); + up_read(&ubi->fm_eba_sem); ubi_assert(err != 0); return err < 0 ? err : -EIO; @@ -953,6 +960,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, ubi->wl_scheduled = 0; spin_unlock(&ubi->wl_lock); mutex_unlock(&ubi->move_mutex); + up_read(&ubi->fm_eba_sem); ubi_free_vid_hdr(ubi, vid_hdr); return 0; } @@ -1075,7 +1083,7 @@ static int __erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk) int err1; /* Re-schedule the LEB for erasure */ - err1 = schedule_erase(ubi, e, vol_id, lnum, 0); + err1 = schedule_erase(ubi, e, vol_id, lnum, 0, false); if (err1) { wl_entry_destroy(ubi, e); err = err1; @@ -1256,7 +1264,7 @@ int ubi_wl_put_peb(struct ubi_device *ubi, int vol_id, int lnum, } spin_unlock(&ubi->wl_lock); - err = schedule_erase(ubi, e, vol_id, lnum, torture); + err = schedule_erase(ubi, e, vol_id, lnum, torture, false); if (err) { spin_lock(&ubi->wl_lock); wl_tree_add(e, &ubi->used); @@ -1544,7 +1552,7 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) e->pnum = aeb->pnum; e->ec = aeb->ec; ubi->lookuptbl[e->pnum] = e; - if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0)) { + if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false)) { wl_entry_destroy(ubi, e); goto out_free; } @@ -1624,7 +1632,7 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) e->ec = aeb->ec; ubi_assert(!ubi->lookuptbl[e->pnum]); ubi->lookuptbl[e->pnum] = e; - if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0)) { + if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false)) { wl_entry_destroy(ubi, e); goto out_free; } From a5f958c4eadb8c9214c75b69330d4b5aa03d16e6 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Wed, 24 Aug 2016 14:36:15 +0200 Subject: [PATCH 106/519] ubi: Fix Fastmap's update_vol() commit f7d11b33d4e8cedf19367c09b891bbc705163976 upstream. Usually Fastmap is free to consider every PEB in one of the pools as newer than the existing PEB. Since PEBs in a pool are by definition newer than everything else. But update_vol() missed the case that a pool can contain more than one candidate. Cc: Fixes: dbb7d2a88d ("UBI: Add fastmap core") Signed-off-by: Richard Weinberger Reviewed-by: Boris Brezillon Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/fastmap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index c5477299b66b..72e89b352034 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -326,6 +326,7 @@ static int update_vol(struct ubi_device *ubi, struct ubi_attach_info *ai, aeb->pnum = new_aeb->pnum; aeb->copy_flag = new_vh->copy_flag; aeb->scrub = new_aeb->scrub; + aeb->sqnum = new_aeb->sqnum; kmem_cache_free(ai->aeb_slab_cache, new_aeb); /* new_aeb is older */ From f891ee97d9df8407ba1a46f9a7b89d8d57a70b7a Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Tue, 5 Dec 2017 16:01:20 +0100 Subject: [PATCH 107/519] ubi: fastmap: Erase outdated anchor PEBs during attach commit f78e5623f45bab2b726eec29dc5cefbbab2d0b1c upstream. The fastmap update code might erase the current fastmap anchor PEB in case it doesn't find any new free PEB. When a power cut happens in this situation we must not have any outdated fastmap anchor PEB on the device, because that would be used to attach during next boot. The easiest way to make that sure is to erase all outdated fastmap anchor PEBs synchronously during attach. Signed-off-by: Sascha Hauer Reviewed-by: Richard Weinberger Fixes: dbb7d2a88d2a ("UBI: Add fastmap core") Cc: Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/wl.c | 77 ++++++++++++++++++++++++++++++++------------ 1 file changed, 57 insertions(+), 20 deletions(-) diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index d49e96f94f46..b3c1b8106a68 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1508,6 +1508,46 @@ static void shutdown_work(struct ubi_device *ubi) } } +/** + * erase_aeb - erase a PEB given in UBI attach info PEB + * @ubi: UBI device description object + * @aeb: UBI attach info PEB + * @sync: If true, erase synchronously. Otherwise schedule for erasure + */ +static int erase_aeb(struct ubi_device *ubi, struct ubi_ainf_peb *aeb, bool sync) +{ + struct ubi_wl_entry *e; + int err; + + e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); + if (!e) + return -ENOMEM; + + e->pnum = aeb->pnum; + e->ec = aeb->ec; + ubi->lookuptbl[e->pnum] = e; + + if (sync) { + err = sync_erase(ubi, e, false); + if (err) + goto out_free; + + wl_tree_add(e, &ubi->free); + ubi->free_count++; + } else { + err = schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false); + if (err) + goto out_free; + } + + return 0; + +out_free: + wl_entry_destroy(ubi, e); + + return err; +} + /** * ubi_wl_init - initialize the WL sub-system using attaching information. * @ubi: UBI device description object @@ -1545,18 +1585,10 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) list_for_each_entry_safe(aeb, tmp, &ai->erase, u.list) { cond_resched(); - e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); - if (!e) + err = erase_aeb(ubi, aeb, false); + if (err) goto out_free; - e->pnum = aeb->pnum; - e->ec = aeb->ec; - ubi->lookuptbl[e->pnum] = e; - if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false)) { - wl_entry_destroy(ubi, e); - goto out_free; - } - found_pebs++; } @@ -1615,6 +1647,8 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) ubi_assert(!ubi->lookuptbl[e->pnum]); ubi->lookuptbl[e->pnum] = e; } else { + bool sync = false; + /* * Usually old Fastmap PEBs are scheduled for erasure * and we don't have to care about them but if we face @@ -1624,18 +1658,21 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) if (ubi->lookuptbl[aeb->pnum]) continue; - e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); - if (!e) - goto out_free; + /* + * The fastmap update code might not find a free PEB for + * writing the fastmap anchor to and then reuses the + * current fastmap anchor PEB. When this PEB gets erased + * and a power cut happens before it is written again we + * must make sure that the fastmap attach code doesn't + * find any outdated fastmap anchors, hence we erase the + * outdated fastmap anchor PEBs synchronously here. + */ + if (aeb->vol_id == UBI_FM_SB_VOLUME_ID) + sync = true; - e->pnum = aeb->pnum; - e->ec = aeb->ec; - ubi_assert(!ubi->lookuptbl[e->pnum]); - ubi->lookuptbl[e->pnum] = e; - if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false)) { - wl_entry_destroy(ubi, e); + err = erase_aeb(ubi, aeb, sync); + if (err) goto out_free; - } } found_pebs++; From 762b585c492fedda1b0bc4c6d0a867307bf7cd0f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 25 Jul 2018 10:18:33 +0200 Subject: [PATCH 108/519] Linux 4.4.144 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 54690fee0485..63f3e2438a26 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 143 +SUBLEVEL = 144 EXTRAVERSION = NAME = Blurry Fish Butt From 54a634c43ae75031c03126957edf667dba1439ec Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 20 Jul 2018 13:58:21 +0200 Subject: [PATCH 109/519] MIPS: ath79: fix register address in ath79_ddr_wb_flush() commit bc88ad2efd11f29e00a4fd60fcd1887abfe76833 upstream. ath79_ddr_wb_flush_base has the type void __iomem *, so register offsets need to be a multiple of 4 in order to access the intended register. Signed-off-by: Felix Fietkau Signed-off-by: John Crispin Signed-off-by: Paul Burton Fixes: 24b0e3e84fbf ("MIPS: ath79: Improve the DDR controller interface") Patchwork: https://patchwork.linux-mips.org/patch/19912/ Cc: Alban Bedel Cc: James Hogan Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # 4.2+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/ath79/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/ath79/common.c b/arch/mips/ath79/common.c index 8ae4067a5eda..40ecb6e700cd 100644 --- a/arch/mips/ath79/common.c +++ b/arch/mips/ath79/common.c @@ -58,7 +58,7 @@ EXPORT_SYMBOL_GPL(ath79_ddr_ctrl_init); void ath79_ddr_wb_flush(u32 reg) { - void __iomem *flush_reg = ath79_ddr_wb_flush_base + reg; + void __iomem *flush_reg = ath79_ddr_wb_flush_base + (reg * 4); /* Flush the DDR write buffer. */ __raw_writel(0x1, flush_reg); From 48f41c0c5781e3450830f8ec68513bfa18a5451b Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 23 Jul 2018 16:50:48 +0200 Subject: [PATCH 110/519] ip: hash fragments consistently [ Upstream commit 3dd1c9a1270736029ffca670e9bd0265f4120600 ] The skb hash for locally generated ip[v6] fragments belonging to the same datagram can vary in several circumstances: * for connected UDP[v6] sockets, the first fragment get its hash via set_owner_w()/skb_set_hash_from_sk() * for unconnected IPv6 UDPv6 sockets, the first fragment can get its hash via ip6_make_flowlabel()/skb_get_hash_flowi6(), if auto_flowlabel is enabled For the following frags the hash is usually computed via skb_get_hash(). The above can cause OoO for unconnected IPv6 UDPv6 socket: in that scenario the egress tx queue can be selected on a per packet basis via the skb hash. It may also fool flow-oriented schedulers to place fragments belonging to the same datagram in different flows. Fix the issue by copying the skb hash from the head frag into the others at fragmentation time. Before this commit: perf probe -a "dev_queue_xmit skb skb->hash skb->l4_hash:b1@0/8 skb->sw_hash:b1@1/8" netperf -H $IPV4 -t UDP_STREAM -l 5 -- -m 2000 -n & perf record -e probe:dev_queue_xmit -e probe:skb_set_owner_w -a sleep 0.1 perf script probe:dev_queue_xmit: (ffffffff8c6b1b20) hash=3713014309 l4_hash=1 sw_hash=0 probe:dev_queue_xmit: (ffffffff8c6b1b20) hash=0 l4_hash=0 sw_hash=0 After this commit: probe:dev_queue_xmit: (ffffffff8c6b1b20) hash=2171763177 l4_hash=1 sw_hash=0 probe:dev_queue_xmit: (ffffffff8c6b1b20) hash=2171763177 l4_hash=1 sw_hash=0 Fixes: b73c3d0e4f0e ("net: Save TX flow hash in sock and set in skbuf on xmit") Fixes: 67800f9b1f4e ("ipv6: Call skb_get_hash_flowi6 to get skb->hash in ip6_make_flowlabel") Signed-off-by: Paolo Abeni Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_output.c | 2 ++ net/ipv6/ip6_output.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 10286432f684..c11bb6d2d00a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -480,6 +480,8 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->dev = from->dev; to->mark = from->mark; + skb_copy_hash(to, from); + /* Copy the flags to each fragment. */ IPCB(to)->flags = IPCB(from)->flags; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 74786783834b..0feede45bd28 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -559,6 +559,8 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->dev = from->dev; to->mark = from->mark; + skb_copy_hash(to, from); + #ifdef CONFIG_NET_SCHED to->tc_index = from->tc_index; #endif From 73dad08709fee646875f5a81c07b05fb54582732 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 24 Jul 2018 14:27:55 +0300 Subject: [PATCH 111/519] net/mlx4_core: Save the qpn from the input modifier in RST2INIT wrapper [ Upstream commit 958c696f5a7274d9447a458ad7aa70719b29a50a ] Function mlx4_RST2INIT_QP_wrapper saved the qp number passed in the qp context, rather than the one passed in the input modifier. However, the qp number in the qp context is not defined as a required parameter by the FW. Therefore, drivers may choose to not specify the qp number in the qp context for the reset-to-init transition. Thus, we must save the qp number passed in the command input modifier -- which is always present. (This saved qp number is used as the input modifier for command 2RST_QP when a slave's qp's are destroyed). Fixes: c82e9aa0a8bc ("mlx4_core: resource tracking for HCA resources used by guests") Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index e3080fbd9d00..7911dc3da98e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -2891,7 +2891,7 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, u32 srqn = qp_get_srqn(qpc) & 0xffffff; int use_srq = (qp_get_srqn(qpc) >> 24) & 1; struct res_srq *srq; - int local_qpn = be32_to_cpu(qpc->local_qpn) & 0xffffff; + int local_qpn = vhcr->in_modifier & 0xffffff; err = adjust_qp_sched_queue(dev, slave, qpc, inbox); if (err) From b04c9a08710606e2fa0a16f24541a270c00ef58c Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Fri, 20 Jul 2018 13:21:01 -0700 Subject: [PATCH 112/519] rtnetlink: add rtnl_link_state check in rtnl_configure_link [ Upstream commit 5025f7f7d506fba9b39e7fe8ca10f6f34cb9bc2d ] rtnl_configure_link sets dev->rtnl_link_state to RTNL_LINK_INITIALIZED and unconditionally calls __dev_notify_flags to notify user-space of dev flags. current call sequence for rtnl_configure_link rtnetlink_newlink rtnl_link_ops->newlink rtnl_configure_link (unconditionally notifies userspace of default and new dev flags) If a newlink handler wants to call rtnl_configure_link early, we will end up with duplicate notifications to user-space. This patch fixes rtnl_configure_link to check rtnl_link_state and call __dev_notify_flags with gchanges = 0 if already RTNL_LINK_INITIALIZED. Later in the series, this patch will help the following sequence where a driver implementing newlink can call rtnl_configure_link to initialize the link early. makes the following call sequence work: rtnetlink_newlink rtnl_link_ops->newlink (vxlan) -> rtnl_configure_link (initializes link and notifies user-space of default dev flags) rtnl_configure_link (updates dev flags if requested by user ifm and notifies user-space of new dev flags) Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2017ffa5197a..96c9c0f0905a 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2087,9 +2087,12 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm) return err; } - dev->rtnl_link_state = RTNL_LINK_INITIALIZED; - - __dev_notify_flags(dev, old_flags, ~0U); + if (dev->rtnl_link_state == RTNL_LINK_INITIALIZED) { + __dev_notify_flags(dev, old_flags, 0U); + } else { + dev->rtnl_link_state = RTNL_LINK_INITIALIZED; + __dev_notify_flags(dev, old_flags, ~0U); + } return 0; } EXPORT_SYMBOL(rtnl_configure_link); From 500e03f463835e74c75890d56d9a7ab63755aa2d Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 12 Jul 2018 06:04:52 -0700 Subject: [PATCH 113/519] tcp: fix dctcp delayed ACK schedule [ Upstream commit b0c05d0e99d98d7f0cd41efc1eeec94efdc3325d ] Previously, when a data segment was sent an ACK was piggybacked on the data segment without generating a CA_EVENT_NON_DELAYED_ACK event to notify congestion control modules. So the DCTCP ca->delayed_ack_reserved flag could incorrectly stay set when in fact there were no delayed ACKs being reserved. This could result in sending a special ECN notification ACK that carries an older ACK sequence, when in fact there was no need for such an ACK. DCTCP keeps track of the delayed ACK status with its own separate state ca->delayed_ack_reserved. Previously it may accidentally cancel the delayed ACK without updating this field upon sending a special ACK that carries a older ACK sequence. This inconsistency would lead to DCTCP receiver never acknowledging the latest data until the sender times out and retry in some cases. Packetdrill script (provided by Larry Brakmo) 0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 0.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 0.000 setsockopt(3, SOL_TCP, TCP_CONGESTION, "dctcp", 5) = 0 0.000 bind(3, ..., ...) = 0 0.000 listen(3, 1) = 0 0.100 < [ect0] SEW 0:0(0) win 32792 0.100 > SE. 0:0(0) ack 1 0.110 < [ect0] . 1:1(0) ack 1 win 257 0.200 accept(3, ..., ...) = 4 0.200 < [ect0] . 1:1001(1000) ack 1 win 257 0.200 > [ect01] . 1:1(0) ack 1001 0.200 write(4, ..., 1) = 1 0.200 > [ect01] P. 1:2(1) ack 1001 0.200 < [ect0] . 1001:2001(1000) ack 2 win 257 0.200 write(4, ..., 1) = 1 0.200 > [ect01] P. 2:3(1) ack 2001 0.200 < [ect0] . 2001:3001(1000) ack 3 win 257 0.200 < [ect0] . 3001:4001(1000) ack 3 win 257 0.200 > [ect01] . 3:3(0) ack 4001 0.210 < [ce] P. 4001:4501(500) ack 3 win 257 +0.001 read(4, ..., 4500) = 4500 +0 write(4, ..., 1) = 1 +0 > [ect01] PE. 3:4(1) ack 4501 +0.010 < [ect0] W. 4501:5501(1000) ack 4 win 257 // Previously the ACK sequence below would be 4501, causing a long RTO +0.040~+0.045 > [ect01] . 4:4(0) ack 5501 // delayed ack +0.311 < [ect0] . 5501:6501(1000) ack 4 win 257 // More data +0 > [ect01] . 4:4(0) ack 6501 // now acks everything +0.500 < F. 9501:9501(0) ack 4 win 257 Reported-by: Larry Brakmo Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Acked-by: Lawrence Brakmo Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_dctcp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 55d7da1d2ce9..e772679a5ded 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -134,7 +134,8 @@ static void dctcp_ce_state_0_to_1(struct sock *sk) /* State has changed from CE=0 to CE=1 and delayed * ACK has not sent yet. */ - if (!ca->ce_state && ca->delayed_ack_reserved) { + if (!ca->ce_state && + inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) { u32 tmp_rcv_nxt; /* Save current rcv_nxt. */ @@ -164,7 +165,8 @@ static void dctcp_ce_state_1_to_0(struct sock *sk) /* State has changed from CE=1 to CE=0 and delayed * ACK has not sent yet. */ - if (ca->ce_state && ca->delayed_ack_reserved) { + if (ca->ce_state && + inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) { u32 tmp_rcv_nxt; /* Save current rcv_nxt. */ From 17fea38e74ab24afb06970bbd9dc52db11a8034b Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 18 Jul 2018 13:56:34 -0700 Subject: [PATCH 114/519] tcp: helpers to send special DCTCP ack [ Upstream commit 2987babb6982306509380fc11b450227a844493b ] Refactor and create helpers to send the special ACK in DCTCP. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_output.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2854db094864..cbba484d4f5e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -901,8 +901,8 @@ void tcp_wfree(struct sk_buff *skb) * We are working here with either a clone of the original * SKB, or a fresh unique copy made by the retransmit engine. */ -static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, - gfp_t gfp_mask) +static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, + int clone_it, gfp_t gfp_mask, u32 rcv_nxt) { const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet; @@ -962,7 +962,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, th->source = inet->inet_sport; th->dest = inet->inet_dport; th->seq = htonl(tcb->seq); - th->ack_seq = htonl(tp->rcv_nxt); + th->ack_seq = htonl(rcv_nxt); *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | tcb->tcp_flags); @@ -1036,6 +1036,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, return net_xmit_eval(err); } +static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, + gfp_t gfp_mask) +{ + return __tcp_transmit_skb(sk, skb, clone_it, gfp_mask, + tcp_sk(sk)->rcv_nxt); +} + /* This routine just queues the buffer for sending. * * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames, @@ -3354,7 +3361,7 @@ void tcp_send_delayed_ack(struct sock *sk) } /* This routine sends an ack and also updates the window. */ -void tcp_send_ack(struct sock *sk) +void __tcp_send_ack(struct sock *sk, u32 rcv_nxt) { struct sk_buff *buff; @@ -3391,7 +3398,12 @@ void tcp_send_ack(struct sock *sk) /* Send it off, this clears delayed acks for us. */ skb_mstamp_get(&buff->skb_mstamp); - tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); + __tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC), rcv_nxt); +} + +void tcp_send_ack(struct sock *sk) +{ + __tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt); } EXPORT_SYMBOL_GPL(tcp_send_ack); From 0b1d40e9e7738e3396ce414b1c62b911c285dfa3 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 18 Jul 2018 13:56:35 -0700 Subject: [PATCH 115/519] tcp: do not cancel delay-AcK on DCTCP special ACK [ Upstream commit 27cde44a259c380a3c09066fc4b42de7dde9b1ad ] Currently when a DCTCP receiver delays an ACK and receive a data packet with a different CE mark from the previous one's, it sends two immediate ACKs acking previous and latest sequences respectly (for ECN accounting). Previously sending the first ACK may mark off the delayed ACK timer (tcp_event_ack_sent). This may subsequently prevent sending the second ACK to acknowledge the latest sequence (tcp_ack_snd_check). The culprit is that tcp_send_ack() assumes it always acknowleges the latest sequence, which is not true for the first special ACK. The fix is to not make the assumption in tcp_send_ack and check the actual ack sequence before cancelling the delayed ACK. Further it's safer to pass the ack sequence number as a local variable into tcp_send_ack routine, instead of intercepting tp->rcv_nxt to avoid future bugs like this. Reported-by: Neal Cardwell Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/tcp.h | 1 + net/ipv4/tcp_dctcp.c | 34 ++++------------------------------ net/ipv4/tcp_output.c | 11 ++++++++--- 3 files changed, 13 insertions(+), 33 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index a3696b778757..24ba8f005f01 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -559,6 +559,7 @@ void tcp_send_fin(struct sock *sk); void tcp_send_active_reset(struct sock *sk, gfp_t priority); int tcp_send_synack(struct sock *); void tcp_push_one(struct sock *, unsigned int mss_now); +void __tcp_send_ack(struct sock *sk, u32 rcv_nxt); void tcp_send_ack(struct sock *sk); void tcp_send_delayed_ack(struct sock *sk); void tcp_send_loss_probe(struct sock *sk); diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index e772679a5ded..c48badee1cfa 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -135,21 +135,8 @@ static void dctcp_ce_state_0_to_1(struct sock *sk) * ACK has not sent yet. */ if (!ca->ce_state && - inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) { - u32 tmp_rcv_nxt; - - /* Save current rcv_nxt. */ - tmp_rcv_nxt = tp->rcv_nxt; - - /* Generate previous ack with CE=0. */ - tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; - tp->rcv_nxt = ca->prior_rcv_nxt; - - tcp_send_ack(sk); - - /* Recover current rcv_nxt. */ - tp->rcv_nxt = tmp_rcv_nxt; - } + inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) + __tcp_send_ack(sk, ca->prior_rcv_nxt); ca->prior_rcv_nxt = tp->rcv_nxt; ca->ce_state = 1; @@ -166,21 +153,8 @@ static void dctcp_ce_state_1_to_0(struct sock *sk) * ACK has not sent yet. */ if (ca->ce_state && - inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) { - u32 tmp_rcv_nxt; - - /* Save current rcv_nxt. */ - tmp_rcv_nxt = tp->rcv_nxt; - - /* Generate previous ack with CE=1. */ - tp->ecn_flags |= TCP_ECN_DEMAND_CWR; - tp->rcv_nxt = ca->prior_rcv_nxt; - - tcp_send_ack(sk); - - /* Recover current rcv_nxt. */ - tp->rcv_nxt = tmp_rcv_nxt; - } + inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) + __tcp_send_ack(sk, ca->prior_rcv_nxt); ca->prior_rcv_nxt = tp->rcv_nxt; ca->ce_state = 0; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index cbba484d4f5e..6fa749ce231f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -177,8 +177,13 @@ static void tcp_event_data_sent(struct tcp_sock *tp, } /* Account for an ACK we sent. */ -static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) +static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts, + u32 rcv_nxt) { + struct tcp_sock *tp = tcp_sk(sk); + + if (unlikely(rcv_nxt != tp->rcv_nxt)) + return; /* Special ACK sent by DCTCP to reflect ECN */ tcp_dec_quickack_mode(sk, pkts); inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); } @@ -1005,7 +1010,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, icsk->icsk_af_ops->send_check(sk, skb); if (likely(tcb->tcp_flags & TCPHDR_ACK)) - tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); + tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt); if (skb->len != tcp_header_size) tcp_event_data_sent(tp, sk); @@ -3400,12 +3405,12 @@ void __tcp_send_ack(struct sock *sk, u32 rcv_nxt) skb_mstamp_get(&buff->skb_mstamp); __tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC), rcv_nxt); } +EXPORT_SYMBOL_GPL(__tcp_send_ack); void tcp_send_ack(struct sock *sk) { __tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt); } -EXPORT_SYMBOL_GPL(tcp_send_ack); /* This routine sends a packet with an out of date sequence * number. It assumes the other end will try to ack it. From 255924ea891f647451af3acbc40a3730dcb3255e Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 18 Jul 2018 13:56:36 -0700 Subject: [PATCH 116/519] tcp: do not delay ACK in DCTCP upon CE status change [ Upstream commit a0496ef2c23b3b180902dd185d0d63ccbc624cf8 ] Per DCTCP RFC8257 (Section 3.2) the ACK reflecting the CE status change has to be sent immediately so the sender can respond quickly: """ When receiving packets, the CE codepoint MUST be processed as follows: 1. If the CE codepoint is set and DCTCP.CE is false, set DCTCP.CE to true and send an immediate ACK. 2. If the CE codepoint is not set and DCTCP.CE is true, set DCTCP.CE to false and send an immediate ACK. """ Previously DCTCP implementation may continue to delay the ACK. This patch fixes that to implement the RFC by forcing an immediate ACK. Tested with this packetdrill script provided by Larry Brakmo 0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 0.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 0.000 setsockopt(3, SOL_TCP, TCP_CONGESTION, "dctcp", 5) = 0 0.000 bind(3, ..., ...) = 0 0.000 listen(3, 1) = 0 0.100 < [ect0] SEW 0:0(0) win 32792 0.100 > SE. 0:0(0) ack 1 0.110 < [ect0] . 1:1(0) ack 1 win 257 0.200 accept(3, ..., ...) = 4 +0 setsockopt(4, SOL_SOCKET, SO_DEBUG, [1], 4) = 0 0.200 < [ect0] . 1:1001(1000) ack 1 win 257 0.200 > [ect01] . 1:1(0) ack 1001 0.200 write(4, ..., 1) = 1 0.200 > [ect01] P. 1:2(1) ack 1001 0.200 < [ect0] . 1001:2001(1000) ack 2 win 257 +0.005 < [ce] . 2001:3001(1000) ack 2 win 257 +0.000 > [ect01] . 2:2(0) ack 2001 // Previously the ACK below would be delayed by 40ms +0.000 > [ect01] E. 2:2(0) ack 3001 +0.500 < F. 9501:9501(0) ack 4 win 257 Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/tcp.h | 1 + net/ipv4/tcp_dctcp.c | 30 ++++++++++++++++++------------ net/ipv4/tcp_input.c | 3 ++- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 24ba8f005f01..65babd8a682d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -376,6 +376,7 @@ ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); +void tcp_enter_quickack_mode(struct sock *sk); static inline void tcp_dec_quickack_mode(struct sock *sk, const unsigned int pkts) { diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index c48badee1cfa..e63b764e55ea 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -131,12 +131,15 @@ static void dctcp_ce_state_0_to_1(struct sock *sk) struct dctcp *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); - /* State has changed from CE=0 to CE=1 and delayed - * ACK has not sent yet. - */ - if (!ca->ce_state && - inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) - __tcp_send_ack(sk, ca->prior_rcv_nxt); + if (!ca->ce_state) { + /* State has changed from CE=0 to CE=1, force an immediate + * ACK to reflect the new CE state. If an ACK was delayed, + * send that first to reflect the prior CE state. + */ + if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) + __tcp_send_ack(sk, ca->prior_rcv_nxt); + tcp_enter_quickack_mode(sk); + } ca->prior_rcv_nxt = tp->rcv_nxt; ca->ce_state = 1; @@ -149,12 +152,15 @@ static void dctcp_ce_state_1_to_0(struct sock *sk) struct dctcp *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); - /* State has changed from CE=1 to CE=0 and delayed - * ACK has not sent yet. - */ - if (ca->ce_state && - inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) - __tcp_send_ack(sk, ca->prior_rcv_nxt); + if (ca->ce_state) { + /* State has changed from CE=1 to CE=0, force an immediate + * ACK to reflect the new CE state. If an ACK was delayed, + * send that first to reflect the prior CE state. + */ + if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) + __tcp_send_ack(sk, ca->prior_rcv_nxt); + tcp_enter_quickack_mode(sk); + } ca->prior_rcv_nxt = tp->rcv_nxt; ca->ce_state = 0; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 4350ee058441..782fb6dbf636 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -187,13 +187,14 @@ static void tcp_incr_quickack(struct sock *sk) icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS); } -static void tcp_enter_quickack_mode(struct sock *sk) +void tcp_enter_quickack_mode(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); tcp_incr_quickack(sk); icsk->icsk_ack.pingpong = 0; icsk->icsk_ack.ato = TCP_ATO_MIN; } +EXPORT_SYMBOL(tcp_enter_quickack_mode); /* Send ACKs quickly, if "quick" count is not exhausted * and the session is not interactive. From 5fbec4801264cb3279ef6ac9c70bcbe2aaef89d5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Jul 2018 09:28:18 -0700 Subject: [PATCH 117/519] tcp: avoid collapses in tcp_prune_queue() if possible [ Upstream commit f4a3313d8e2ca9fd8d8f45e40a2903ba782607e7 ] Right after a TCP flow is created, receiving tiny out of order packets allways hit the condition : if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) tcp_clamp_window(sk); tcp_clamp_window() increases sk_rcvbuf to match sk_rmem_alloc (guarded by tcp_rmem[2]) Calling tcp_collapse_ofo_queue() in this case is not useful, and offers a O(N^2) surface attack to malicious peers. Better not attempt anything before full queue capacity is reached, forcing attacker to spend lots of resource and allow us to more easily detect the abuse. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 782fb6dbf636..8088e98696aa 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4875,6 +4875,9 @@ static int tcp_prune_queue(struct sock *sk) else if (tcp_under_memory_pressure(sk)) tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); + if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) + return 0; + tcp_collapse_ofo_queue(sk); if (!skb_queue_empty(&sk->sk_receive_queue)) tcp_collapse(sk, &sk->sk_receive_queue, From dc6ae4dffd656811dee7151b19545e4cd839d378 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Jul 2018 09:28:19 -0700 Subject: [PATCH 118/519] tcp: detect malicious patterns in tcp_collapse_ofo_queue() [ Upstream commit 3d4bf93ac12003f9b8e1e2de37fe27983deebdcf ] In case an attacker feeds tiny packets completely out of order, tcp_collapse_ofo_queue() might scan the whole rb-tree, performing expensive copies, but not changing socket memory usage at all. 1) Do not attempt to collapse tiny skbs. 2) Add logic to exit early when too many tiny skbs are detected. We prefer not doing aggressive collapsing (which copies packets) for pathological flows, and revert to tcp_prune_ofo_queue() which will be less expensive. In the future, we might add the possibility of terminating flows that are proven to be malicious. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8088e98696aa..5c645069a09a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4789,6 +4789,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, static void tcp_collapse_ofo_queue(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + u32 range_truesize, sum_tiny = 0; struct sk_buff *skb = skb_peek(&tp->out_of_order_queue); struct sk_buff *head; u32 start, end; @@ -4798,6 +4799,7 @@ static void tcp_collapse_ofo_queue(struct sock *sk) start = TCP_SKB_CB(skb)->seq; end = TCP_SKB_CB(skb)->end_seq; + range_truesize = skb->truesize; head = skb; for (;;) { @@ -4812,14 +4814,24 @@ static void tcp_collapse_ofo_queue(struct sock *sk) if (!skb || after(TCP_SKB_CB(skb)->seq, end) || before(TCP_SKB_CB(skb)->end_seq, start)) { - tcp_collapse(sk, &tp->out_of_order_queue, - head, skb, start, end); + /* Do not attempt collapsing tiny skbs */ + if (range_truesize != head->truesize || + end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) { + tcp_collapse(sk, &tp->out_of_order_queue, + head, skb, start, end); + } else { + sum_tiny += range_truesize; + if (sum_tiny > sk->sk_rcvbuf >> 3) + return; + } + head = skb; if (!skb) break; /* Start new segment */ start = TCP_SKB_CB(skb)->seq; end = TCP_SKB_CB(skb)->end_seq; + range_truesize = skb->truesize; } else { if (before(TCP_SKB_CB(skb)->seq, start)) start = TCP_SKB_CB(skb)->seq; From a77bf88daa402cb52729fe4e46e1b7ce58f1f8ad Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 23 Jul 2018 19:36:48 -0400 Subject: [PATCH 119/519] ip: in cmsg IP(V6)_ORIGDSTADDR call pskb_may_pull [ Upstream commit 2efd4fca703a6707cad16ab486eaab8fc7f0fd49 ] Syzbot reported a read beyond the end of the skb head when returning IPV6_ORIGDSTADDR: BUG: KMSAN: kernel-infoleak in put_cmsg+0x5ef/0x860 net/core/scm.c:242 CPU: 0 PID: 4501 Comm: syz-executor128 Not tainted 4.17.0+ #9 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x185/0x1d0 lib/dump_stack.c:113 kmsan_report+0x188/0x2a0 mm/kmsan/kmsan.c:1125 kmsan_internal_check_memory+0x138/0x1f0 mm/kmsan/kmsan.c:1219 kmsan_copy_to_user+0x7a/0x160 mm/kmsan/kmsan.c:1261 copy_to_user include/linux/uaccess.h:184 [inline] put_cmsg+0x5ef/0x860 net/core/scm.c:242 ip6_datagram_recv_specific_ctl+0x1cf3/0x1eb0 net/ipv6/datagram.c:719 ip6_datagram_recv_ctl+0x41c/0x450 net/ipv6/datagram.c:733 rawv6_recvmsg+0x10fb/0x1460 net/ipv6/raw.c:521 [..] This logic and its ipv4 counterpart read the destination port from the packet at skb_transport_offset(skb) + 4. With MSG_MORE and a local SOCK_RAW sender, syzbot was able to cook a packet that stores headers exactly up to skb_transport_offset(skb) in the head and the remainder in a frag. Call pskb_may_pull before accessing the pointer to ensure that it lies in skb head. Link: http://lkml.kernel.org/r/CAF=yD-LEJwZj5a1-bAAj2Oy_hKmGygV6rsJ_WOrAYnv-fnayiQ@mail.gmail.com Reported-by: syzbot+9adb4b567003cac781f0@syzkaller.appspotmail.com Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_sockglue.c | 7 +++++-- net/ipv6/datagram.c | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index ce9a7fbb7c5f..88426a6a7a85 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -135,15 +135,18 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) { struct sockaddr_in sin; const struct iphdr *iph = ip_hdr(skb); - __be16 *ports = (__be16 *)skb_transport_header(skb); + __be16 *ports; + int end; - if (skb_transport_offset(skb) + 4 > skb->len) + end = skb_transport_offset(skb) + 4; + if (end > 0 && !pskb_may_pull(skb, end)) return; /* All current transport protocols have the port numbers in the * first four bytes of the transport header and this function is * written with this assumption in mind. */ + ports = (__be16 *)skb_transport_header(skb); sin.sin_family = AF_INET; sin.sin_addr.s_addr = iph->daddr; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index cae37bfd12ab..9f6e57ded338 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -657,13 +657,16 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, } if (np->rxopt.bits.rxorigdstaddr) { struct sockaddr_in6 sin6; - __be16 *ports = (__be16 *) skb_transport_header(skb); + __be16 *ports; + int end; - if (skb_transport_offset(skb) + 4 <= skb->len) { + end = skb_transport_offset(skb) + 4; + if (end <= 0 || pskb_may_pull(skb, end)) { /* All current transport protocols have the port numbers in the * first four bytes of the transport header and this function is * written with this assumption in mind. */ + ports = (__be16 *)skb_transport_header(skb); sin6.sin6_family = AF_INET6; sin6.sin6_addr = ipv6_hdr(skb)->daddr; From 92197cdb913a3f130b70550c856afc27cf710ea9 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Tue, 10 Jul 2018 08:28:49 +0200 Subject: [PATCH 120/519] usb: cdc_acm: Add quirk for Castles VEGA3000 commit 1445cbe476fc3dd09c0b380b206526a49403c071 upstream. The device (a POS terminal) implements CDC ACM, but has not union descriptor. Signed-off-by: Lubomir Rintel Acked-by: Oliver Neukum Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 7ed30d0b5273..a501f3ba6a3f 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1771,6 +1771,9 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x09d8, 0x0320), /* Elatec GmbH TWN3 */ .driver_info = NO_UNION_NORMAL, /* has misplaced union descriptor */ }, + { USB_DEVICE(0x0ca6, 0xa050), /* Castles VEGA3000 */ + .driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */ + }, { USB_DEVICE(0x2912, 0x0001), /* ATOL FPrint */ .driver_info = CLEAR_HALT_CONDITIONS, From ff180bcc594676b355476237a382ea10c6492c34 Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Thu, 19 Jul 2018 14:39:37 -0500 Subject: [PATCH 121/519] usb: core: handle hub C_PORT_OVER_CURRENT condition commit 249a32b7eeb3edb6897dd38f89651a62163ac4ed upstream. Based on USB2.0 Spec Section 11.12.5, "If a hub has per-port power switching and per-port current limiting, an over-current on one port may still cause the power on another port to fall below specific minimums. In this case, the affected port is placed in the Power-Off state and C_PORT_OVER_CURRENT is set for the port, but PORT_OVER_CURRENT is not set." so let's check C_PORT_OVER_CURRENT too for over current condition. Fixes: 08d1dec6f405 ("usb:hub set hub->change_bits when over-current happens") Cc: Tested-by: Alessandro Antenucci Signed-off-by: Bin Liu Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 4d86da0df131..93756664592a 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1123,10 +1123,14 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) if (!udev || udev->state == USB_STATE_NOTATTACHED) { /* Tell hub_wq to disconnect the device or - * check for a new connection + * check for a new connection or over current condition. + * Based on USB2.0 Spec Section 11.12.5, + * C_PORT_OVER_CURRENT could be set while + * PORT_OVER_CURRENT is not. So check for any of them. */ if (udev || (portstatus & USB_PORT_STAT_CONNECTION) || - (portstatus & USB_PORT_STAT_OVERCURRENT)) + (portstatus & USB_PORT_STAT_OVERCURRENT) || + (portchange & USB_PORT_STAT_C_OVERCURRENT)) set_bit(port1, hub->change_bits); } else if (portstatus & USB_PORT_STAT_ENABLE) { From c420866afc161203d17f1fcd965a27a61ef70dd4 Mon Sep 17 00:00:00 2001 From: Jerry Zhang Date: Mon, 2 Jul 2018 12:48:08 -0700 Subject: [PATCH 122/519] usb: gadget: f_fs: Only return delayed status when len is 0 commit 4d644abf25698362bd33d17c9ddc8f7122c30f17 upstream. Commit 1b9ba000 ("Allow function drivers to pause control transfers") states that USB_GADGET_DELAYED_STATUS is only supported if data phase is 0 bytes. It seems that when the length is not 0 bytes, there is no need to explicitly delay the data stage since the transfer is not completed until the user responds. However, when the length is 0, there is no data stage and the transfer is finished once setup() returns, hence there is a need to explicitly delay completion. This manifests as the following bugs: Prior to 946ef68ad4e4 ('Let setup() return USB_GADGET_DELAYED_STATUS'), when setup is 0 bytes, ffs would require user to queue a 0 byte request in order to clear setup state. However, that 0 byte request was actually not needed and would hang and cause errors in other setup requests. After the above commit, 0 byte setups work since the gadget now accepts empty queues to ep0 to clear the delay, but all other setups hang. Fixes: 946ef68ad4e4 ("Let setup() return USB_GADGET_DELAYED_STATUS") Signed-off-by: Jerry Zhang Cc: stable Acked-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 4191feb765b1..4800bb22cdd6 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -3037,7 +3037,7 @@ static int ffs_func_setup(struct usb_function *f, __ffs_event_add(ffs, FUNCTIONFS_SETUP); spin_unlock_irqrestore(&ffs->ev.waitq.lock, flags); - return USB_GADGET_DELAYED_STATUS; + return creq->wLength == 0 ? USB_GADGET_DELAYED_STATUS : 0; } static void ffs_func_suspend(struct usb_function *f) From 0b14de0538aaa6c9b8c5d90b29e0a8dd698ba918 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 10 Jul 2018 14:51:33 +0200 Subject: [PATCH 123/519] driver core: Partially revert "driver core: correct device's shutdown order" commit 722e5f2b1eec7de61117b7c0a7914761e3da2eda upstream. Commit 52cdbdd49853 (driver core: correct device's shutdown order) introduced a regression by breaking device shutdown on some systems. Namely, the devices_kset_move_last() call in really_probe() added by that commit is a mistake as it may cause parents to follow children in the devices_kset list which then causes shutdown to fail. For example, if a device has children before really_probe() is called for it (which is not uncommon), that call will cause it to be reordered after the children in the devices_kset list and the ordering of that list will not reflect the correct device shutdown order any more. Also it causes the devices_kset list to be constantly reordered until all drivers have been probed which is totally pointless overhead in the majority of cases and it only covered an issue with system shutdown, while system-wide suspend/resume potentially had the same issue on the affected platforms (which was not covered). Moreover, the shutdown issue originally addressed by the change in really_probe() made by commit 52cdbdd49853 is not present in 4.18-rc any more, since dra7 started to use the sdhci-omap driver which doesn't disable any regulators during shutdown, so the really_probe() part of commit 52cdbdd49853 can be safely reverted. [The original issue was related to the omap_hsmmc driver used by dra7 previously.] For the above reasons, revert the really_probe() modifications made by commit 52cdbdd49853. The other code changes made by commit 52cdbdd49853 are useful and they need not be reverted. Fixes: 52cdbdd49853 (driver core: correct device's shutdown order) Link: https://lore.kernel.org/lkml/CAFgQCTt7VfqM=UyCnvNFxrSw8Z6cUtAi3HUwR4_xPAc03SgHjQ@mail.gmail.com/ Reported-by: Pingfan Liu Tested-by: Pingfan Liu Reviewed-by: Kishon Vijay Abraham I Signed-off-by: Rafael J. Wysocki Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index a641cf3ccad6..1dffb018a7fe 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -304,14 +304,6 @@ static int really_probe(struct device *dev, struct device_driver *drv) goto probe_failed; } - /* - * Ensure devices are listed in devices_kset in correct order - * It's important to move Dev to the end of devices_kset before - * calling .probe, because it could be recursive and parent Dev - * should always go first - */ - devices_kset_move_last(dev); - if (dev->bus->probe) { ret = dev->bus->probe(dev); if (ret) From e4c557649fda285e512f28a771038180cdeff6cf Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Tue, 7 Feb 2017 17:01:14 +0200 Subject: [PATCH 124/519] can: xilinx_can: fix RX loop if RXNEMP is asserted without RXOK commit 32852c561bffd613d4ed7ec464b1e03e1b7b6c5c upstream. If the device gets into a state where RXNEMP (RX FIFO not empty) interrupt is asserted without RXOK (new frame received successfully) interrupt being asserted, xcan_rx_poll() will continue to try to clear RXNEMP without actually reading frames from RX FIFO. If the RX FIFO is not empty, the interrupt will not be cleared and napi_schedule() will just be called again. This situation can occur when: (a) xcan_rx() returns without reading RX FIFO due to an error condition. The code tries to clear both RXOK and RXNEMP but RXNEMP will not clear due to a frame still being in the FIFO. The frame will never be read from the FIFO as RXOK is no longer set. (b) A frame is received between xcan_rx_poll() reading interrupt status and clearing RXOK. RXOK will be cleared, but RXNEMP will again remain set as the new message is still in the FIFO. I'm able to trigger case (b) by flooding the bus with frames under load. There does not seem to be any benefit in using both RXNEMP and RXOK in the way the driver does, and the polling example in the reference manual (UG585 v1.10 18.3.7 Read Messages from RxFIFO) also says that either RXOK or RXNEMP can be used for detecting incoming messages. Fix the issue and simplify the RX processing by only using RXNEMP without RXOK. Tested with the integrated CAN on Zynq-7000 SoC. Fixes: b1201e44f50b ("can: xilinx CAN controller support") Signed-off-by: Anssi Hannula Cc: Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/xilinx_can.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 51670b322409..727add0decdf 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -100,7 +100,7 @@ enum xcan_reg { #define XCAN_INTR_ALL (XCAN_IXR_TXOK_MASK | XCAN_IXR_BSOFF_MASK |\ XCAN_IXR_WKUP_MASK | XCAN_IXR_SLP_MASK | \ XCAN_IXR_RXNEMP_MASK | XCAN_IXR_ERROR_MASK | \ - XCAN_IXR_ARBLST_MASK | XCAN_IXR_RXOK_MASK) + XCAN_IXR_ARBLST_MASK) /* CAN register bit shift - XCAN___SHIFT */ #define XCAN_BTR_SJW_SHIFT 7 /* Synchronous jump width */ @@ -708,15 +708,7 @@ static int xcan_rx_poll(struct napi_struct *napi, int quota) isr = priv->read_reg(priv, XCAN_ISR_OFFSET); while ((isr & XCAN_IXR_RXNEMP_MASK) && (work_done < quota)) { - if (isr & XCAN_IXR_RXOK_MASK) { - priv->write_reg(priv, XCAN_ICR_OFFSET, - XCAN_IXR_RXOK_MASK); - work_done += xcan_rx(ndev); - } else { - priv->write_reg(priv, XCAN_ICR_OFFSET, - XCAN_IXR_RXNEMP_MASK); - break; - } + work_done += xcan_rx(ndev); priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_RXNEMP_MASK); isr = priv->read_reg(priv, XCAN_ISR_OFFSET); } @@ -727,7 +719,7 @@ static int xcan_rx_poll(struct napi_struct *napi, int quota) if (work_done < quota) { napi_complete(napi); ier = priv->read_reg(priv, XCAN_IER_OFFSET); - ier |= (XCAN_IXR_RXOK_MASK | XCAN_IXR_RXNEMP_MASK); + ier |= XCAN_IXR_RXNEMP_MASK; priv->write_reg(priv, XCAN_IER_OFFSET, ier); } return work_done; @@ -799,9 +791,9 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id) } /* Check for the type of receive interrupt and Processing it */ - if (isr & (XCAN_IXR_RXNEMP_MASK | XCAN_IXR_RXOK_MASK)) { + if (isr & XCAN_IXR_RXNEMP_MASK) { ier = priv->read_reg(priv, XCAN_IER_OFFSET); - ier &= ~(XCAN_IXR_RXNEMP_MASK | XCAN_IXR_RXOK_MASK); + ier &= ~XCAN_IXR_RXNEMP_MASK; priv->write_reg(priv, XCAN_IER_OFFSET, ier); napi_schedule(&priv->napi); } From 7e572a170fc30ce4e6bff79b2a54f445a88457ac Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Wed, 8 Feb 2017 13:13:40 +0200 Subject: [PATCH 125/519] can: xilinx_can: fix recovery from error states not being propagated commit 877e0b75947e2c7acf5624331bb17ceb093c98ae upstream. The xilinx_can driver contains no mechanism for propagating recovery from CAN_STATE_ERROR_WARNING and CAN_STATE_ERROR_PASSIVE. Add such a mechanism by factoring the handling of XCAN_STATE_ERROR_PASSIVE and XCAN_STATE_ERROR_WARNING out of xcan_err_interrupt and checking for recovery after RX and TX if the interface is in one of those states. Tested with the integrated CAN on Zynq-7000 SoC. Fixes: b1201e44f50b ("can: xilinx CAN controller support") Signed-off-by: Anssi Hannula Cc: Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/xilinx_can.c | 155 ++++++++++++++++++++++++++++------- 1 file changed, 127 insertions(+), 28 deletions(-) diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 727add0decdf..b0b52bf276b0 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -2,6 +2,7 @@ * * Copyright (C) 2012 - 2014 Xilinx, Inc. * Copyright (C) 2009 PetaLogix. All rights reserved. + * Copyright (C) 2017 Sandvik Mining and Construction Oy * * Description: * This driver is developed for Axi CAN IP and for Zynq CANPS Controller. @@ -528,6 +529,123 @@ static int xcan_rx(struct net_device *ndev) return 1; } +/** + * xcan_current_error_state - Get current error state from HW + * @ndev: Pointer to net_device structure + * + * Checks the current CAN error state from the HW. Note that this + * only checks for ERROR_PASSIVE and ERROR_WARNING. + * + * Return: + * ERROR_PASSIVE or ERROR_WARNING if either is active, ERROR_ACTIVE + * otherwise. + */ +static enum can_state xcan_current_error_state(struct net_device *ndev) +{ + struct xcan_priv *priv = netdev_priv(ndev); + u32 status = priv->read_reg(priv, XCAN_SR_OFFSET); + + if ((status & XCAN_SR_ESTAT_MASK) == XCAN_SR_ESTAT_MASK) + return CAN_STATE_ERROR_PASSIVE; + else if (status & XCAN_SR_ERRWRN_MASK) + return CAN_STATE_ERROR_WARNING; + else + return CAN_STATE_ERROR_ACTIVE; +} + +/** + * xcan_set_error_state - Set new CAN error state + * @ndev: Pointer to net_device structure + * @new_state: The new CAN state to be set + * @cf: Error frame to be populated or NULL + * + * Set new CAN error state for the device, updating statistics and + * populating the error frame if given. + */ +static void xcan_set_error_state(struct net_device *ndev, + enum can_state new_state, + struct can_frame *cf) +{ + struct xcan_priv *priv = netdev_priv(ndev); + u32 ecr = priv->read_reg(priv, XCAN_ECR_OFFSET); + u32 txerr = ecr & XCAN_ECR_TEC_MASK; + u32 rxerr = (ecr & XCAN_ECR_REC_MASK) >> XCAN_ESR_REC_SHIFT; + + priv->can.state = new_state; + + if (cf) { + cf->can_id |= CAN_ERR_CRTL; + cf->data[6] = txerr; + cf->data[7] = rxerr; + } + + switch (new_state) { + case CAN_STATE_ERROR_PASSIVE: + priv->can.can_stats.error_passive++; + if (cf) + cf->data[1] = (rxerr > 127) ? + CAN_ERR_CRTL_RX_PASSIVE : + CAN_ERR_CRTL_TX_PASSIVE; + break; + case CAN_STATE_ERROR_WARNING: + priv->can.can_stats.error_warning++; + if (cf) + cf->data[1] |= (txerr > rxerr) ? + CAN_ERR_CRTL_TX_WARNING : + CAN_ERR_CRTL_RX_WARNING; + break; + case CAN_STATE_ERROR_ACTIVE: + if (cf) + cf->data[1] |= CAN_ERR_CRTL_ACTIVE; + break; + default: + /* non-ERROR states are handled elsewhere */ + WARN_ON(1); + break; + } +} + +/** + * xcan_update_error_state_after_rxtx - Update CAN error state after RX/TX + * @ndev: Pointer to net_device structure + * + * If the device is in a ERROR-WARNING or ERROR-PASSIVE state, check if + * the performed RX/TX has caused it to drop to a lesser state and set + * the interface state accordingly. + */ +static void xcan_update_error_state_after_rxtx(struct net_device *ndev) +{ + struct xcan_priv *priv = netdev_priv(ndev); + enum can_state old_state = priv->can.state; + enum can_state new_state; + + /* changing error state due to successful frame RX/TX can only + * occur from these states + */ + if (old_state != CAN_STATE_ERROR_WARNING && + old_state != CAN_STATE_ERROR_PASSIVE) + return; + + new_state = xcan_current_error_state(ndev); + + if (new_state != old_state) { + struct sk_buff *skb; + struct can_frame *cf; + + skb = alloc_can_err_skb(ndev, &cf); + + xcan_set_error_state(ndev, new_state, skb ? cf : NULL); + + if (skb) { + struct net_device_stats *stats = &ndev->stats; + + stats->rx_packets++; + stats->rx_bytes += cf->can_dlc; + netif_rx(skb); + } + } +} + /** * xcan_err_interrupt - error frame Isr * @ndev: net_device pointer @@ -543,16 +661,12 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr) struct net_device_stats *stats = &ndev->stats; struct can_frame *cf; struct sk_buff *skb; - u32 err_status, status, txerr = 0, rxerr = 0; + u32 err_status; skb = alloc_can_err_skb(ndev, &cf); err_status = priv->read_reg(priv, XCAN_ESR_OFFSET); priv->write_reg(priv, XCAN_ESR_OFFSET, err_status); - txerr = priv->read_reg(priv, XCAN_ECR_OFFSET) & XCAN_ECR_TEC_MASK; - rxerr = ((priv->read_reg(priv, XCAN_ECR_OFFSET) & - XCAN_ECR_REC_MASK) >> XCAN_ESR_REC_SHIFT); - status = priv->read_reg(priv, XCAN_SR_OFFSET); if (isr & XCAN_IXR_BSOFF_MASK) { priv->can.state = CAN_STATE_BUS_OFF; @@ -562,28 +676,10 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr) can_bus_off(ndev); if (skb) cf->can_id |= CAN_ERR_BUSOFF; - } else if ((status & XCAN_SR_ESTAT_MASK) == XCAN_SR_ESTAT_MASK) { - priv->can.state = CAN_STATE_ERROR_PASSIVE; - priv->can.can_stats.error_passive++; - if (skb) { - cf->can_id |= CAN_ERR_CRTL; - cf->data[1] = (rxerr > 127) ? - CAN_ERR_CRTL_RX_PASSIVE : - CAN_ERR_CRTL_TX_PASSIVE; - cf->data[6] = txerr; - cf->data[7] = rxerr; - } - } else if (status & XCAN_SR_ERRWRN_MASK) { - priv->can.state = CAN_STATE_ERROR_WARNING; - priv->can.can_stats.error_warning++; - if (skb) { - cf->can_id |= CAN_ERR_CRTL; - cf->data[1] |= (txerr > rxerr) ? - CAN_ERR_CRTL_TX_WARNING : - CAN_ERR_CRTL_RX_WARNING; - cf->data[6] = txerr; - cf->data[7] = rxerr; - } + } else { + enum can_state new_state = xcan_current_error_state(ndev); + + xcan_set_error_state(ndev, new_state, skb ? cf : NULL); } /* Check for Arbitration lost interrupt */ @@ -713,8 +809,10 @@ static int xcan_rx_poll(struct napi_struct *napi, int quota) isr = priv->read_reg(priv, XCAN_ISR_OFFSET); } - if (work_done) + if (work_done) { can_led_event(ndev, CAN_LED_EVENT_RX); + xcan_update_error_state_after_rxtx(ndev); + } if (work_done < quota) { napi_complete(napi); @@ -745,6 +843,7 @@ static void xcan_tx_interrupt(struct net_device *ndev, u32 isr) isr = priv->read_reg(priv, XCAN_ISR_OFFSET); } can_led_event(ndev, CAN_LED_EVENT_TX); + xcan_update_error_state_after_rxtx(ndev); netif_wake_queue(ndev); } From d7dec444472632fcba1d4b3be0aac8d279a37cc4 Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Tue, 7 Feb 2017 13:23:04 +0200 Subject: [PATCH 126/519] can: xilinx_can: fix device dropping off bus on RX overrun commit 2574fe54515ed3487405de329e4e9f13d7098c10 upstream. The xilinx_can driver performs a software reset when an RX overrun is detected. This causes the device to enter Configuration mode where no messages are received or transmitted. The documentation does not mention any need to perform a reset on an RX overrun, and testing by inducing an RX overflow also indicated that the device continues to work just fine without a reset. Remove the software reset. Tested with the integrated CAN on Zynq-7000 SoC. Fixes: b1201e44f50b ("can: xilinx CAN controller support") Signed-off-by: Anssi Hannula Cc: Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/xilinx_can.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index b0b52bf276b0..e787116e9e82 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -695,7 +695,6 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr) if (isr & XCAN_IXR_RXOFLW_MASK) { stats->rx_over_errors++; stats->rx_errors++; - priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_RESET_MASK); if (skb) { cf->can_id |= CAN_ERR_CRTL; cf->data[1] |= CAN_ERR_CRTL_RX_OVERFLOW; From e5df9264649e13f0b6c2da9ce089a8ac41ef5121 Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Thu, 23 Feb 2017 14:50:03 +0200 Subject: [PATCH 127/519] can: xilinx_can: keep only 1-2 frames in TX FIFO to fix TX accounting commit 620050d9c2be15c47017ba95efe59e0832e99a56 upstream. The xilinx_can driver assumes that the TXOK interrupt only clears after it has been acknowledged as many times as there have been successfully sent frames. However, the documentation does not mention such behavior, instead saying just that the interrupt is cleared when the clear bit is set. Similarly, testing seems to also suggest that it is immediately cleared regardless of the amount of frames having been sent. Performing some heavy TX load and then going back to idle has the tx_head drifting further away from tx_tail over time, steadily reducing the amount of frames the driver keeps in the TX FIFO (but not to zero, as the TXOK interrupt always frees up space for 1 frame from the driver's perspective, so frames continue to be sent) and delaying the local echo frames. The TX FIFO tracking is also otherwise buggy as it does not account for TX FIFO being cleared after software resets, causing BUG!, TX FIFO full when queue awake! messages to be output. There does not seem to be any way to accurately track the state of the TX FIFO for local echo support while using the full TX FIFO. The Zynq version of the HW (but not the soft-AXI version) has watermark programming support and with it an additional TX-FIFO-empty interrupt bit. Modify the driver to only put 1 frame into TX FIFO at a time on soft-AXI and 2 frames at a time on Zynq. On Zynq the TXFEMP interrupt bit is used to detect whether 1 or 2 frames have been sent at interrupt processing time. Tested with the integrated CAN on Zynq-7000 SoC. The 1-frame-FIFO mode was also tested. An alternative way to solve this would be to drop local echo support but keep using the full TX FIFO. v2: Add FIFO space check before TX queue wake with locking to synchronize with queue stop. This avoids waking the queue when xmit() had just filled it. v3: Keep local echo support and reduce the amount of frames in FIFO instead as suggested by Marc Kleine-Budde. Fixes: b1201e44f50b ("can: xilinx CAN controller support") Signed-off-by: Anssi Hannula Cc: Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/xilinx_can.c | 139 +++++++++++++++++++++++++++++++---- 1 file changed, 123 insertions(+), 16 deletions(-) diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index e787116e9e82..09c9950a03a5 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -26,8 +26,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -118,6 +120,7 @@ enum xcan_reg { /** * struct xcan_priv - This definition define CAN driver instance * @can: CAN private data structure. + * @tx_lock: Lock for synchronizing TX interrupt handling * @tx_head: Tx CAN packets ready to send on the queue * @tx_tail: Tx CAN packets successfully sended on the queue * @tx_max: Maximum number packets the driver can send @@ -132,6 +135,7 @@ enum xcan_reg { */ struct xcan_priv { struct can_priv can; + spinlock_t tx_lock; unsigned int tx_head; unsigned int tx_tail; unsigned int tx_max; @@ -159,6 +163,11 @@ static const struct can_bittiming_const xcan_bittiming_const = { .brp_inc = 1, }; +#define XCAN_CAP_WATERMARK 0x0001 +struct xcan_devtype_data { + unsigned int caps; +}; + /** * xcan_write_reg_le - Write a value to the device register little endian * @priv: Driver private data structure @@ -238,6 +247,10 @@ static int set_reset_mode(struct net_device *ndev) usleep_range(500, 10000); } + /* reset clears FIFOs */ + priv->tx_head = 0; + priv->tx_tail = 0; + return 0; } @@ -392,6 +405,7 @@ static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev) struct net_device_stats *stats = &ndev->stats; struct can_frame *cf = (struct can_frame *)skb->data; u32 id, dlc, data[2] = {0, 0}; + unsigned long flags; if (can_dropped_invalid_skb(ndev, skb)) return NETDEV_TX_OK; @@ -439,6 +453,9 @@ static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev) data[1] = be32_to_cpup((__be32 *)(cf->data + 4)); can_put_echo_skb(skb, ndev, priv->tx_head % priv->tx_max); + + spin_lock_irqsave(&priv->tx_lock, flags); + priv->tx_head++; /* Write the Frame to Xilinx CAN TX FIFO */ @@ -454,10 +471,16 @@ static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev) stats->tx_bytes += cf->can_dlc; } + /* Clear TX-FIFO-empty interrupt for xcan_tx_interrupt() */ + if (priv->tx_max > 1) + priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXFEMP_MASK); + /* Check if the TX buffer is full */ if ((priv->tx_head - priv->tx_tail) == priv->tx_max) netif_stop_queue(ndev); + spin_unlock_irqrestore(&priv->tx_lock, flags); + return NETDEV_TX_OK; } @@ -831,19 +854,71 @@ static void xcan_tx_interrupt(struct net_device *ndev, u32 isr) { struct xcan_priv *priv = netdev_priv(ndev); struct net_device_stats *stats = &ndev->stats; + unsigned int frames_in_fifo; + int frames_sent = 1; /* TXOK => at least 1 frame was sent */ + unsigned long flags; + int retries = 0; - while ((priv->tx_head - priv->tx_tail > 0) && - (isr & XCAN_IXR_TXOK_MASK)) { + /* Synchronize with xmit as we need to know the exact number + * of frames in the FIFO to stay in sync due to the TXFEMP + * handling. + * This also prevents a race between netif_wake_queue() and + * netif_stop_queue(). + */ + spin_lock_irqsave(&priv->tx_lock, flags); + + frames_in_fifo = priv->tx_head - priv->tx_tail; + + if (WARN_ON_ONCE(frames_in_fifo == 0)) { + /* clear TXOK anyway to avoid getting back here */ priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXOK_MASK); + spin_unlock_irqrestore(&priv->tx_lock, flags); + return; + } + + /* Check if 2 frames were sent (TXOK only means that at least 1 + * frame was sent). + */ + if (frames_in_fifo > 1) { + WARN_ON(frames_in_fifo > priv->tx_max); + + /* Synchronize TXOK and isr so that after the loop: + * (1) isr variable is up-to-date at least up to TXOK clear + * time. This avoids us clearing a TXOK of a second frame + * but not noticing that the FIFO is now empty and thus + * marking only a single frame as sent. + * (2) No TXOK is left. Having one could mean leaving a + * stray TXOK as we might process the associated frame + * via TXFEMP handling as we read TXFEMP *after* TXOK + * clear to satisfy (1). + */ + while ((isr & XCAN_IXR_TXOK_MASK) && !WARN_ON(++retries == 100)) { + priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXOK_MASK); + isr = priv->read_reg(priv, XCAN_ISR_OFFSET); + } + + if (isr & XCAN_IXR_TXFEMP_MASK) { + /* nothing in FIFO anymore */ + frames_sent = frames_in_fifo; + } + } else { + /* single frame in fifo, just clear TXOK */ + priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXOK_MASK); + } + + while (frames_sent--) { can_get_echo_skb(ndev, priv->tx_tail % priv->tx_max); priv->tx_tail++; stats->tx_packets++; - isr = priv->read_reg(priv, XCAN_ISR_OFFSET); } + + netif_wake_queue(ndev); + + spin_unlock_irqrestore(&priv->tx_lock, flags); + can_led_event(ndev, CAN_LED_EVENT_TX); xcan_update_error_state_after_rxtx(ndev); - netif_wake_queue(ndev); } /** @@ -1120,6 +1195,18 @@ static int __maybe_unused xcan_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(xcan_dev_pm_ops, xcan_suspend, xcan_resume); +static const struct xcan_devtype_data xcan_zynq_data = { + .caps = XCAN_CAP_WATERMARK, +}; + +/* Match table for OF platform binding */ +static const struct of_device_id xcan_of_match[] = { + { .compatible = "xlnx,zynq-can-1.0", .data = &xcan_zynq_data }, + { .compatible = "xlnx,axi-can-1.00.a", }, + { /* end of list */ }, +}; +MODULE_DEVICE_TABLE(of, xcan_of_match); + /** * xcan_probe - Platform registration call * @pdev: Handle to the platform device structure @@ -1134,8 +1221,10 @@ static int xcan_probe(struct platform_device *pdev) struct resource *res; /* IO mem resources */ struct net_device *ndev; struct xcan_priv *priv; + const struct of_device_id *of_id; + int caps = 0; void __iomem *addr; - int ret, rx_max, tx_max; + int ret, rx_max, tx_max, tx_fifo_depth; /* Get the virtual base address for the device */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -1145,7 +1234,8 @@ static int xcan_probe(struct platform_device *pdev) goto err; } - ret = of_property_read_u32(pdev->dev.of_node, "tx-fifo-depth", &tx_max); + ret = of_property_read_u32(pdev->dev.of_node, "tx-fifo-depth", + &tx_fifo_depth); if (ret < 0) goto err; @@ -1153,6 +1243,30 @@ static int xcan_probe(struct platform_device *pdev) if (ret < 0) goto err; + of_id = of_match_device(xcan_of_match, &pdev->dev); + if (of_id) { + const struct xcan_devtype_data *devtype_data = of_id->data; + + if (devtype_data) + caps = devtype_data->caps; + } + + /* There is no way to directly figure out how many frames have been + * sent when the TXOK interrupt is processed. If watermark programming + * is supported, we can have 2 frames in the FIFO and use TXFEMP + * to determine if 1 or 2 frames have been sent. + * Theoretically we should be able to use TXFWMEMP to determine up + * to 3 frames, but it seems that after putting a second frame in the + * FIFO, with watermark at 2 frames, it can happen that TXFWMEMP (less + * than 2 frames in FIFO) is set anyway with no TXOK (a frame was + * sent), which is not a sensible state - possibly TXFWMEMP is not + * completely synchronized with the rest of the bits? + */ + if (caps & XCAN_CAP_WATERMARK) + tx_max = min(tx_fifo_depth, 2); + else + tx_max = 1; + /* Create a CAN device instance */ ndev = alloc_candev(sizeof(struct xcan_priv), tx_max); if (!ndev) @@ -1167,6 +1281,7 @@ static int xcan_probe(struct platform_device *pdev) CAN_CTRLMODE_BERR_REPORTING; priv->reg_base = addr; priv->tx_max = tx_max; + spin_lock_init(&priv->tx_lock); /* Get IRQ for the device */ ndev->irq = platform_get_irq(pdev, 0); @@ -1234,9 +1349,9 @@ static int xcan_probe(struct platform_device *pdev) devm_can_led_init(ndev); clk_disable_unprepare(priv->bus_clk); clk_disable_unprepare(priv->can_clk); - netdev_dbg(ndev, "reg_base=0x%p irq=%d clock=%d, tx fifo depth:%d\n", + netdev_dbg(ndev, "reg_base=0x%p irq=%d clock=%d, tx fifo depth: actual %d, using %d\n", priv->reg_base, ndev->irq, priv->can.clock.freq, - priv->tx_max); + tx_fifo_depth, priv->tx_max); return 0; @@ -1272,14 +1387,6 @@ static int xcan_remove(struct platform_device *pdev) return 0; } -/* Match table for OF platform binding */ -static const struct of_device_id xcan_of_match[] = { - { .compatible = "xlnx,zynq-can-1.0", }, - { .compatible = "xlnx,axi-can-1.00.a", }, - { /* end of list */ }, -}; -MODULE_DEVICE_TABLE(of, xcan_of_match); - static struct platform_driver xcan_driver = { .probe = xcan_probe, .remove = xcan_remove, From f7efacee6f8aaa311726eed09470e970b59fd4e1 Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Mon, 26 Feb 2018 14:39:59 +0200 Subject: [PATCH 128/519] can: xilinx_can: fix incorrect clear of non-processed interrupts commit 2f4f0f338cf453bfcdbcf089e177c16f35f023c8 upstream. xcan_interrupt() clears ERROR|RXOFLV|BSOFF|ARBLST interrupts if any of them is asserted. This does not take into account that some of them could have been asserted between interrupt status read and interrupt clear, therefore clearing them without handling them. Fix the code to only clear those interrupts that it knows are asserted and therefore going to be processed in xcan_err_interrupt(). Fixes: b1201e44f50b ("can: xilinx CAN controller support") Signed-off-by: Anssi Hannula Cc: Michal Simek Cc: Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/xilinx_can.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 09c9950a03a5..a7286d72ed7b 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -937,6 +937,7 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id) struct net_device *ndev = (struct net_device *)dev_id; struct xcan_priv *priv = netdev_priv(ndev); u32 isr, ier; + u32 isr_errors; /* Get the interrupt status from Xilinx CAN */ isr = priv->read_reg(priv, XCAN_ISR_OFFSET); @@ -955,11 +956,10 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id) xcan_tx_interrupt(ndev, isr); /* Check for the type of error interrupt and Processing it */ - if (isr & (XCAN_IXR_ERROR_MASK | XCAN_IXR_RXOFLW_MASK | - XCAN_IXR_BSOFF_MASK | XCAN_IXR_ARBLST_MASK)) { - priv->write_reg(priv, XCAN_ICR_OFFSET, (XCAN_IXR_ERROR_MASK | - XCAN_IXR_RXOFLW_MASK | XCAN_IXR_BSOFF_MASK | - XCAN_IXR_ARBLST_MASK)); + isr_errors = isr & (XCAN_IXR_ERROR_MASK | XCAN_IXR_RXOFLW_MASK | + XCAN_IXR_BSOFF_MASK | XCAN_IXR_ARBLST_MASK); + if (isr_errors) { + priv->write_reg(priv, XCAN_ICR_OFFSET, isr_errors); xcan_err_interrupt(ndev, isr); } From 264dc1588925388709eeddd51901042ca277c828 Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Mon, 26 Feb 2018 14:27:13 +0200 Subject: [PATCH 129/519] can: xilinx_can: fix RX overflow interrupt not being enabled commit 83997997252f5d3fc7f04abc24a89600c2b504ab upstream. RX overflow interrupt (RXOFLW) is disabled even though xcan_interrupt() processes it. This means that an RX overflow interrupt will only be processed when another interrupt gets asserted (e.g. for RX/TX). Fix that by enabling the RXOFLW interrupt. Fixes: b1201e44f50b ("can: xilinx CAN controller support") Signed-off-by: Anssi Hannula Cc: Michal Simek Cc: Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/xilinx_can.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index a7286d72ed7b..700b98d9c250 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -103,7 +103,7 @@ enum xcan_reg { #define XCAN_INTR_ALL (XCAN_IXR_TXOK_MASK | XCAN_IXR_BSOFF_MASK |\ XCAN_IXR_WKUP_MASK | XCAN_IXR_SLP_MASK | \ XCAN_IXR_RXNEMP_MASK | XCAN_IXR_ERROR_MASK | \ - XCAN_IXR_ARBLST_MASK) + XCAN_IXR_RXOFLW_MASK | XCAN_IXR_ARBLST_MASK) /* CAN register bit shift - XCAN___SHIFT */ #define XCAN_BTR_SJW_SHIFT 7 /* Synchronous jump width */ From d41d0fe374d4fc358cfdae80fd39fd98174d3da3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 26 Jul 2018 10:13:22 +0200 Subject: [PATCH 130/519] turn off -Wattribute-alias Starting with gcc-8.1, we get a warning about all system call definitions, which use an alias between functions with incompatible prototypes, e.g.: In file included from ../mm/process_vm_access.c:19: ../include/linux/syscalls.h:211:18: warning: 'sys_process_vm_readv' alias between functions of incompatible types 'long int(pid_t, const struct iovec *, long unsigned int, const struct iovec *, long unsigned int, long unsigned int)' {aka 'long int(int, const struct iovec *, long unsigned int, const struct iovec *, long unsigned int, long unsigned int)'} and 'long int(long int, long int, long int, long int, long int, long int)' [-Wattribute-alias] asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ ^~~ ../include/linux/syscalls.h:207:2: note: in expansion of macro '__SYSCALL_DEFINEx' __SYSCALL_DEFINEx(x, sname, __VA_ARGS__) ^~~~~~~~~~~~~~~~~ ../include/linux/syscalls.h:201:36: note: in expansion of macro 'SYSCALL_DEFINEx' #define SYSCALL_DEFINE6(name, ...) SYSCALL_DEFINEx(6, _##name, __VA_ARGS__) ^~~~~~~~~~~~~~~ ../mm/process_vm_access.c:300:1: note: in expansion of macro 'SYSCALL_DEFINE6' SYSCALL_DEFINE6(process_vm_readv, pid_t, pid, const struct iovec __user *, lvec, ^~~~~~~~~~~~~~~ ../include/linux/syscalls.h:215:18: note: aliased declaration here asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ ^~~ ../include/linux/syscalls.h:207:2: note: in expansion of macro '__SYSCALL_DEFINEx' __SYSCALL_DEFINEx(x, sname, __VA_ARGS__) ^~~~~~~~~~~~~~~~~ ../include/linux/syscalls.h:201:36: note: in expansion of macro 'SYSCALL_DEFINEx' #define SYSCALL_DEFINE6(name, ...) SYSCALL_DEFINEx(6, _##name, __VA_ARGS__) ^~~~~~~~~~~~~~~ ../mm/process_vm_access.c:300:1: note: in expansion of macro 'SYSCALL_DEFINE6' SYSCALL_DEFINE6(process_vm_readv, pid_t, pid, const struct iovec __user *, lvec, This is really noisy and does not indicate a real problem. In the latest mainline kernel, this was addressed by commit bee20031772a ("disable -Wattribute-alias warning for SYSCALL_DEFINEx()"), which seems too invasive to backport. This takes a much simpler approach and just disables the warning across the kernel. Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 63f3e2438a26..bee96eda883a 100644 --- a/Makefile +++ b/Makefile @@ -624,6 +624,7 @@ KBUILD_CFLAGS += $(call cc-disable-warning,frame-address,) KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation) KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow) KBUILD_CFLAGS += $(call cc-disable-warning, int-in-bool-context) +KBUILD_CFLAGS += $(call cc-disable-warning, attribute-alias) ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE KBUILD_CFLAGS += -Os From 0ca85fc310e8c24cba10ed241a0188795e177683 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 26 Jul 2018 10:13:23 +0200 Subject: [PATCH 131/519] ARM: fix put_user() for gcc-8 Building kernels before linux-4.7 with gcc-8 results in many build failures when gcc triggers a check that was meant to catch broken compilers: /tmp/ccCGMQmS.s:648: Error: .err encountered According to the discussion in the gcc bugzilla, a local "register asm()" variable is still supposed to be the correct way to force an inline assembly to use a particular register, but marking it 'const' lets the compiler do optimizations that break that, i.e the compiler is free to treat the variable as either 'const' or 'register' in that case. Upstream commit 9f73bd8bb445 ("ARM: uaccess: remove put_user() code duplication") fixed this problem in linux-4.8 as part of a larger change, but seems a little too big to be backported to 4.4. Let's take the simplest fix and change only the one broken line in the same way as newer kernels. Suggested-by: Bernd Edlinger Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85745 Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86673 Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 35c9db857ebe..cd8b589111ba 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -251,7 +251,7 @@ extern int __put_user_8(void *, unsigned long long); ({ \ unsigned long __limit = current_thread_info()->addr_limit - 1; \ const typeof(*(p)) __user *__tmp_p = (p); \ - register const typeof(*(p)) __r2 asm("r2") = (x); \ + register typeof(*(p)) __r2 asm("r2") = (x); \ register const typeof(*(p)) __user *__p asm("r0") = __tmp_p; \ register unsigned long __l asm("r1") = __limit; \ register int __e asm("r0"); \ From ac15b2b23808ee2c3264329b035a8f0f7d7f50e6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 28 Jul 2018 07:45:05 +0200 Subject: [PATCH 132/519] Linux 4.4.145 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index bee96eda883a..be31491a2d67 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 144 +SUBLEVEL = 145 EXTRAVERSION = NAME = Blurry Fish Butt From 97e066123dc69bd3bca36794a0f75989e97a04fe Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Thu, 12 Jul 2018 09:33:04 -0700 Subject: [PATCH 133/519] MIPS: Fix off-by-one in pci_resource_to_user() commit 38c0a74fe06da3be133cae3fb7bde6a9438e698b upstream. The MIPS implementation of pci_resource_to_user() introduced in v3.12 by commit 4c2924b725fb ("MIPS: PCI: Use pci_resource_to_user to map pci memory space properly") incorrectly sets *end to the address of the byte after the resource, rather than the last byte of the resource. This results in userland seeing resources as a byte larger than they actually are, for example a 32 byte BAR will be reported by a tool such as lspci as being 33 bytes in size: Region 2: I/O ports at 1000 [disabled] [size=33] Correct this by subtracting one from the calculated end address, reporting the correct address to userland. Signed-off-by: Paul Burton Reported-by: Rui Wang Fixes: 4c2924b725fb ("MIPS: PCI: Use pci_resource_to_user to map pci memory space properly") Cc: James Hogan Cc: Ralf Baechle Cc: Wolfgang Grandegger Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # v3.12+ Patchwork: https://patchwork.linux-mips.org/patch/19829/ Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h index 98c31e5d9579..a7bc901819c8 100644 --- a/arch/mips/include/asm/pci.h +++ b/arch/mips/include/asm/pci.h @@ -89,7 +89,7 @@ static inline void pci_resource_to_user(const struct pci_dev *dev, int bar, phys_addr_t size = resource_size(rsrc); *start = fixup_bigphys_addr(rsrc->start, size); - *end = rsrc->start + size; + *end = rsrc->start + size - 1; } /* From 1e2e0c65b5fcb13328fa4387663148040ae28d5a Mon Sep 17 00:00:00 2001 From: Donald Shanty III Date: Wed, 4 Jul 2018 15:50:47 +0000 Subject: [PATCH 134/519] Input: elan_i2c - add ACPI ID for lenovo ideapad 330 commit 938f45008d8bc391593c97508bc798cc95a52b9b upstream. This allows Elan driver to bind to the touchpad found in Lenovo Ideapad 330 series laptops. Signed-off-by: Donald Shanty III Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/elan_i2c_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 97f6e05cffce..a7515f5dd76c 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1251,6 +1251,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0611", 0 }, { "ELAN0612", 0 }, { "ELAN0618", 0 }, + { "ELAN061D", 0 }, { "ELAN1000", 0 }, { } }; From 8fe1c0b7af711ff6b2e5ed17edb3128b5b66d924 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 18 Jul 2018 17:24:35 +0000 Subject: [PATCH 135/519] Input: i8042 - add Lenovo LaVie Z to the i8042 reset list commit 384cf4285b34e08917e3e66603382f2b0c4f6e1b upstream. The Lenovo LaVie Z laptop requires i8042 to be reset in order to consistently detect its Elantech touchpad. The nomux and kbdreset quirks are not sufficient. It's possible the other LaVie Z models from NEC require this as well. Cc: stable@vger.kernel.org Signed-off-by: Chen-Yu Tsai Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/serio/i8042-x86ia64io.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index e484ea2dc787..34be09651ee8 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -527,6 +527,13 @@ static const struct dmi_system_id __initconst i8042_dmi_nomux_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "N24_25BU"), }, }, + { + /* Lenovo LaVie Z */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo LaVie Z"), + }, + }, { } }; From 0179b1b364243aca8001ef1fbd51a1d4349547c5 Mon Sep 17 00:00:00 2001 From: KT Liao Date: Mon, 16 Jul 2018 12:10:03 +0000 Subject: [PATCH 136/519] Input: elan_i2c - add another ACPI ID for Lenovo Ideapad 330-15AST commit 6f88a6439da5d94de334a341503bc2c7f4a7ea7f upstream. Add ELAN0622 to ACPI mapping table to support Elan touchpad found in Ideapad 330-15AST. Signed-off-by: KT Liao Reported-by: Anant Shende Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/elan_i2c_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index a7515f5dd76c..a716482774db 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1252,6 +1252,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0612", 0 }, { "ELAN0618", 0 }, { "ELAN061D", 0 }, + { "ELAN0622", 0 }, { "ELAN1000", 0 }, { } }; From b80091b0113c931894c87b034048f8677a53fe9f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 24 Jul 2018 19:13:31 -0400 Subject: [PATCH 137/519] tracing: Fix double free of event_trigger_data commit 1863c387259b629e4ebfb255495f67cd06aa229b upstream. Running the following: # cd /sys/kernel/debug/tracing # echo 500000 > buffer_size_kb [ Or some other number that takes up most of memory ] # echo snapshot > events/sched/sched_switch/trigger Triggers the following bug: ------------[ cut here ]------------ kernel BUG at mm/slub.c:296! invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC PTI CPU: 6 PID: 6878 Comm: bash Not tainted 4.18.0-rc6-test+ #1066 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v03.03 07/14/2016 RIP: 0010:kfree+0x16c/0x180 Code: 05 41 0f b6 72 51 5b 5d 41 5c 4c 89 d7 e9 ac b3 f8 ff 48 89 d9 48 89 da 41 b8 01 00 00 00 5b 5d 41 5c 4c 89 d6 e9 f4 f3 ff ff <0f> 0b 0f 0b 48 8b 3d d9 d8 f9 00 e9 c1 fe ff ff 0f 1f 40 00 0f 1f RSP: 0018:ffffb654436d3d88 EFLAGS: 00010246 RAX: ffff91a9d50f3d80 RBX: ffff91a9d50f3d80 RCX: ffff91a9d50f3d80 RDX: 00000000000006a4 RSI: ffff91a9de5a60e0 RDI: ffff91a9d9803500 RBP: ffffffff8d267c80 R08: 00000000000260e0 R09: ffffffff8c1a56be R10: fffff0d404543cc0 R11: 0000000000000389 R12: ffffffff8c1a56be R13: ffff91a9d9930e18 R14: ffff91a98c0c2890 R15: ffffffff8d267d00 FS: 00007f363ea64700(0000) GS:ffff91a9de580000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055c1cacc8e10 CR3: 00000000d9b46003 CR4: 00000000001606e0 Call Trace: event_trigger_callback+0xee/0x1d0 event_trigger_write+0xfc/0x1a0 __vfs_write+0x33/0x190 ? handle_mm_fault+0x115/0x230 ? _cond_resched+0x16/0x40 vfs_write+0xb0/0x190 ksys_write+0x52/0xc0 do_syscall_64+0x5a/0x160 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7f363e16ab50 Code: 73 01 c3 48 8b 0d 38 83 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 79 db 2c 00 00 75 10 b8 01 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 1e e3 01 00 48 89 04 24 RSP: 002b:00007fff9a4c6378 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000009 RCX: 00007f363e16ab50 RDX: 0000000000000009 RSI: 000055c1cacc8e10 RDI: 0000000000000001 RBP: 000055c1cacc8e10 R08: 00007f363e435740 R09: 00007f363ea64700 R10: 0000000000000073 R11: 0000000000000246 R12: 0000000000000009 R13: 0000000000000001 R14: 00007f363e4345e0 R15: 00007f363e4303c0 Modules linked in: ip6table_filter ip6_tables snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_seq snd_seq_device i915 snd_pcm snd_timer i2c_i801 snd soundcore i2c_algo_bit drm_kms_helper 86_pkg_temp_thermal video kvm_intel kvm irqbypass wmi e1000e ---[ end trace d301afa879ddfa25 ]--- The cause is because the register_snapshot_trigger() call failed to allocate the snapshot buffer, and then called unregister_trigger() which freed the data that was passed to it. Then on return to the function that called register_snapshot_trigger(), as it sees it failed to register, it frees the trigger_data again and causes a double free. By calling event_trigger_init() on the trigger_data (which only ups the reference counter for it), and then event_trigger_free() afterward, the trigger_data would not get freed by the registering trigger function as it would only up and lower the ref count for it. If the register trigger function fails, then the event_trigger_free() called after it will free the trigger data normally. Link: http://lkml.kernel.org/r/20180724191331.738eb819@gandalf.local.home Cc: stable@vger.kerne.org Fixes: 93e31ffbf417 ("tracing: Add 'snapshot' event trigger command") Reported-by: Masami Hiramatsu Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_trigger.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 2172dd61577e..9d9cacf07ed4 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -663,6 +663,8 @@ event_trigger_callback(struct event_command *cmd_ops, goto out_free; out_reg: + /* Up the trigger_data count to make sure reg doesn't free it on failure */ + event_trigger_init(trigger_ops, trigger_data); ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file); /* * The above returns on success the # of functions enabled, @@ -670,11 +672,13 @@ event_trigger_callback(struct event_command *cmd_ops, * Consider no functions a failure too. */ if (!ret) { + cmd_ops->unreg(glob, trigger_ops, trigger_data, file); ret = -ENOENT; - goto out_free; - } else if (ret < 0) - goto out_free; - ret = 0; + } else if (ret > 0) + ret = 0; + + /* Down the counter of trigger_data or free it if not used anymore */ + event_trigger_free(trigger_ops, trigger_data); out: return ret; From 1a7f63b2a0911df09b4939995e3fe5daeb9753ef Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 25 Jul 2018 16:02:06 -0400 Subject: [PATCH 138/519] tracing: Fix possible double free in event_enable_trigger_func() commit 15cc78644d0075e76d59476a4467e7143860f660 upstream. There was a case that triggered a double free in event_trigger_callback() due to the called reg() function freeing the trigger_data and then it getting freed again by the error return by the caller. The solution there was to up the trigger_data ref count. Code inspection found that event_enable_trigger_func() has the same issue, but is not as easy to trigger (requires harder to trigger failures). It needs to be solved slightly different as it needs more to clean up when the reg() function fails. Link: http://lkml.kernel.org/r/20180725124008.7008e586@gandalf.local.home Cc: stable@vger.kernel.org Fixes: 7862ad1846e99 ("tracing: Add 'enable_event' and 'disable_event' event trigger commands") Reivewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_trigger.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 9d9cacf07ed4..b8a894adab2c 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -1231,6 +1231,9 @@ event_enable_trigger_func(struct event_command *cmd_ops, goto out; } + /* Up the trigger_data count to make sure nothing frees it on failure */ + event_trigger_init(trigger_ops, trigger_data); + if (trigger) { number = strsep(&trigger, ":"); @@ -1281,6 +1284,7 @@ event_enable_trigger_func(struct event_command *cmd_ops, goto out_disable; /* Just return zero, not the number of enabled functions */ ret = 0; + event_trigger_free(trigger_ops, trigger_data); out: return ret; @@ -1291,7 +1295,7 @@ event_enable_trigger_func(struct event_command *cmd_ops, out_free: if (cmd_ops->set_filter) cmd_ops->set_filter(NULL, trigger_data, NULL); - kfree(trigger_data); + event_trigger_free(trigger_ops, trigger_data); kfree(enable_data); goto out; } From d9bbd8076836b1c3e648132afe73185f945b89a7 Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Wed, 25 Jul 2018 16:20:38 +0200 Subject: [PATCH 139/519] tracing/kprobes: Fix trace_probe flags on enable_trace_kprobe() failure commit 57ea2a34adf40f3a6e88409aafcf803b8945619a upstream. If enable_trace_kprobe fails to enable the probe in enable_k(ret)probe it returns an error, but does not unset the tp flags it set previously. This results in a probe being considered enabled and failures like being unable to remove the probe through kprobe_events file since probes_open() expects every probe to be disabled. Link: http://lkml.kernel.org/r/20180725102826.8300-1-asavkov@redhat.com Link: http://lkml.kernel.org/r/20180725142038.4765-1-asavkov@redhat.com Cc: Ingo Molnar Cc: stable@vger.kernel.org Fixes: 41a7dd420c57 ("tracing/kprobes: Support ftrace_event_file base multibuffer") Acked-by: Masami Hiramatsu Reviewed-by: Josh Poimboeuf Signed-off-by: Artem Savkov Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_kprobe.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index f2682799c215..a357d7e93934 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -349,11 +349,10 @@ static struct trace_kprobe *find_trace_kprobe(const char *event, static int enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) { + struct event_file_link *link; int ret = 0; if (file) { - struct event_file_link *link; - link = kmalloc(sizeof(*link), GFP_KERNEL); if (!link) { ret = -ENOMEM; @@ -373,6 +372,16 @@ enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) else ret = enable_kprobe(&tk->rp.kp); } + + if (ret) { + if (file) { + list_del_rcu(&link->list); + kfree(link); + tk->tp.flags &= ~TP_FLAG_TRACE; + } else { + tk->tp.flags &= ~TP_FLAG_PROFILE; + } + } out: return ret; } From dab04fda26530ee686b14b02aa2c5e716cef153c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 25 Jul 2018 22:28:56 -0400 Subject: [PATCH 140/519] tracing: Quiet gcc warning about maybe unused link variable commit 2519c1bbe38d7acacc9aacba303ca6f97482ed53 upstream. Commit 57ea2a34adf4 ("tracing/kprobes: Fix trace_probe flags on enable_trace_kprobe() failure") added an if statement that depends on another if statement that gcc doesn't see will initialize the "link" variable and gives the warning: "warning: 'link' may be used uninitialized in this function" It is really a false positive, but to quiet the warning, and also to make sure that it never actually is used uninitialized, initialize the "link" variable to NULL and add an if (!WARN_ON_ONCE(!link)) where the compiler thinks it could be used uninitialized. Cc: stable@vger.kernel.org Fixes: 57ea2a34adf4 ("tracing/kprobes: Fix trace_probe flags on enable_trace_kprobe() failure") Reported-by: kbuild test robot Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_kprobe.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index a357d7e93934..f0ee722be520 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -349,7 +349,7 @@ static struct trace_kprobe *find_trace_kprobe(const char *event, static int enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) { - struct event_file_link *link; + struct event_file_link *link = NULL; int ret = 0; if (file) { @@ -375,7 +375,9 @@ enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) if (ret) { if (file) { - list_del_rcu(&link->list); + /* Notice the if is true on not WARN() */ + if (!WARN_ON_ONCE(!link)) + list_del_rcu(&link->list); kfree(link); tk->tp.flags &= ~TP_FLAG_TRACE; } else { From 2670ecf4711430d9ee263cad666b6ea2034952d5 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 12 Jun 2018 08:57:53 +0200 Subject: [PATCH 141/519] xen/netfront: raise max number of slots in xennet_get_responses() [ Upstream commit 57f230ab04d2910a06d17d988f1c4d7586a59113 ] The max number of slots used in xennet_get_responses() is set to MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD). In old kernel-xen MAX_SKB_FRAGS was 18, while nowadays it is 17. This difference is resulting in frequent messages "too many slots" and a reduced network throughput for some workloads (factor 10 below that of a kernel-xen based guest). Replacing MAX_SKB_FRAGS by XEN_NETIF_NR_SLOTS_MIN for calculation of the max number of slots to use solves that problem (tests showed no more messages "too many slots" and throughput was as high as with the kernel-xen based guest system). Replace MAX_SKB_FRAGS-2 by XEN_NETIF_NR_SLOTS_MIN-1 in netfront_tx_slot_available() for making it clearer what is really being tested without actually modifying the tested value. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index a0de2453fa09..3bb3d6d9117c 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -238,7 +238,7 @@ static void rx_refill_timeout(unsigned long data) static int netfront_tx_slot_available(struct netfront_queue *queue) { return (queue->tx.req_prod_pvt - queue->tx.rsp_cons) < - (NET_TX_RING_SIZE - MAX_SKB_FRAGS - 2); + (NET_TX_RING_SIZE - XEN_NETIF_NR_SLOTS_MIN - 1); } static void xennet_maybe_wake_tx(struct netfront_queue *queue) @@ -775,7 +775,7 @@ static int xennet_get_responses(struct netfront_queue *queue, RING_IDX cons = queue->rx.rsp_cons; struct sk_buff *skb = xennet_get_rx_skb(queue, cons); grant_ref_t ref = xennet_get_rx_ref(queue, cons); - int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD); + int max = XEN_NETIF_NR_SLOTS_MIN + (rx->status <= RX_COPY_THRESHOLD); int slots = 1; int err = 0; unsigned long ret; From 55e526aa96db8a70b69524cbdbb8168a20e4ea38 Mon Sep 17 00:00:00 2001 From: Zhouyang Jia Date: Mon, 11 Jun 2018 16:18:40 +0800 Subject: [PATCH 142/519] ALSA: emu10k1: add error handling for snd_ctl_add [ Upstream commit 6d531e7b972cb62ded011c2dfcc2d9f72ea6c421 ] When snd_ctl_add fails, the lack of error-handling code may cause unexpected results. This patch adds error-handling code after calling snd_ctl_add. Signed-off-by: Zhouyang Jia Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/pci/emu10k1/emupcm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/pci/emu10k1/emupcm.c b/sound/pci/emu10k1/emupcm.c index 14a305bd8a98..72e442d86bb1 100644 --- a/sound/pci/emu10k1/emupcm.c +++ b/sound/pci/emu10k1/emupcm.c @@ -1850,7 +1850,9 @@ int snd_emu10k1_pcm_efx(struct snd_emu10k1 *emu, int device) if (!kctl) return -ENOMEM; kctl->id.device = device; - snd_ctl_add(emu->card, kctl); + err = snd_ctl_add(emu->card, kctl); + if (err < 0) + return err; snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV, snd_dma_pci_data(emu->pci), 64*1024, 64*1024); From 8edf67eda05a8aa9d1e1f0d8c3c9f98cd1cb93f7 Mon Sep 17 00:00:00 2001 From: Zhouyang Jia Date: Mon, 11 Jun 2018 16:04:06 +0800 Subject: [PATCH 143/519] ALSA: fm801: add error handling for snd_ctl_add [ Upstream commit ef1ffbe7889e99f5b5cccb41c89e5c94f50f3218 ] When snd_ctl_add fails, the lack of error-handling code may cause unexpected results. This patch adds error-handling code after calling snd_ctl_add. Signed-off-by: Zhouyang Jia Acked-by: Andy Shevchenko Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/pci/fm801.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/sound/pci/fm801.c b/sound/pci/fm801.c index 1fdd92b6f18f..d6e89a6d0bb9 100644 --- a/sound/pci/fm801.c +++ b/sound/pci/fm801.c @@ -1050,11 +1050,19 @@ static int snd_fm801_mixer(struct fm801 *chip) if ((err = snd_ac97_mixer(chip->ac97_bus, &ac97, &chip->ac97_sec)) < 0) return err; } - for (i = 0; i < FM801_CONTROLS; i++) - snd_ctl_add(chip->card, snd_ctl_new1(&snd_fm801_controls[i], chip)); + for (i = 0; i < FM801_CONTROLS; i++) { + err = snd_ctl_add(chip->card, + snd_ctl_new1(&snd_fm801_controls[i], chip)); + if (err < 0) + return err; + } if (chip->multichannel) { - for (i = 0; i < FM801_CONTROLS_MULTI; i++) - snd_ctl_add(chip->card, snd_ctl_new1(&snd_fm801_controls_multi[i], chip)); + for (i = 0; i < FM801_CONTROLS_MULTI; i++) { + err = snd_ctl_add(chip->card, + snd_ctl_new1(&snd_fm801_controls_multi[i], chip)); + if (err < 0) + return err; + } } return 0; } From e6960af9566e9aeee3db07ac2d6a4f70f5731948 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Fri, 8 Jun 2018 16:31:46 -0400 Subject: [PATCH 144/519] nfsd: fix potential use-after-free in nfsd4_decode_getdeviceinfo [ Upstream commit 3171822fdcdd6e6d536047c425af6dc7a92dc585 ] When running a fuzz tester against a KASAN-enabled kernel, the following splat periodically occurs. The problem occurs when the test sends a GETDEVICEINFO request with a malformed xdr array (size but no data) for gdia_notify_types and the array size is > 0x3fffffff, which results in an overflow in the value of nbytes which is passed to read_buf(). If the array size is 0x40000000, 0x80000000, or 0xc0000000, then after the overflow occurs, the value of nbytes 0, and when that happens the pointer returned by read_buf() points to the end of the xdr data (i.e. argp->end) when really it should be returning NULL. Fix this by returning NFS4ERR_BAD_XDR if the array size is > 1000 (this value is arbitrary, but it's the same threshold used by nfsd4_decode_bitmap()... in could really be any value >= 1 since it's expected to get at most a single bitmap in gdia_notify_types). [ 119.256854] ================================================================== [ 119.257611] BUG: KASAN: use-after-free in nfsd4_decode_getdeviceinfo+0x5a4/0x5b0 [nfsd] [ 119.258422] Read of size 4 at addr ffff880113ada000 by task nfsd/538 [ 119.259146] CPU: 0 PID: 538 Comm: nfsd Not tainted 4.17.0+ #1 [ 119.259662] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.3-1.fc25 04/01/2014 [ 119.261202] Call Trace: [ 119.262265] dump_stack+0x71/0xab [ 119.263371] print_address_description+0x6a/0x270 [ 119.264609] kasan_report+0x258/0x380 [ 119.265854] ? nfsd4_decode_getdeviceinfo+0x5a4/0x5b0 [nfsd] [ 119.267291] nfsd4_decode_getdeviceinfo+0x5a4/0x5b0 [nfsd] [ 119.268549] ? nfs4svc_decode_compoundargs+0xa5b/0x13c0 [nfsd] [ 119.269873] ? nfsd4_decode_sequence+0x490/0x490 [nfsd] [ 119.271095] nfs4svc_decode_compoundargs+0xa5b/0x13c0 [nfsd] [ 119.272393] ? nfsd4_release_compoundargs+0x1b0/0x1b0 [nfsd] [ 119.273658] nfsd_dispatch+0x183/0x850 [nfsd] [ 119.274918] svc_process+0x161c/0x31a0 [sunrpc] [ 119.276172] ? svc_printk+0x190/0x190 [sunrpc] [ 119.277386] ? svc_xprt_release+0x451/0x680 [sunrpc] [ 119.278622] nfsd+0x2b9/0x430 [nfsd] [ 119.279771] ? nfsd_destroy+0x1c0/0x1c0 [nfsd] [ 119.281157] kthread+0x2db/0x390 [ 119.282347] ? kthread_create_worker_on_cpu+0xc0/0xc0 [ 119.283756] ret_from_fork+0x35/0x40 [ 119.286041] Allocated by task 436: [ 119.287525] kasan_kmalloc+0xa0/0xd0 [ 119.288685] kmem_cache_alloc+0xe9/0x1f0 [ 119.289900] get_empty_filp+0x7b/0x410 [ 119.291037] path_openat+0xca/0x4220 [ 119.292242] do_filp_open+0x182/0x280 [ 119.293411] do_sys_open+0x216/0x360 [ 119.294555] do_syscall_64+0xa0/0x2f0 [ 119.295721] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 119.298068] Freed by task 436: [ 119.299271] __kasan_slab_free+0x130/0x180 [ 119.300557] kmem_cache_free+0x78/0x210 [ 119.301823] rcu_process_callbacks+0x35b/0xbd0 [ 119.303162] __do_softirq+0x192/0x5ea [ 119.305443] The buggy address belongs to the object at ffff880113ada000 which belongs to the cache filp of size 256 [ 119.308556] The buggy address is located 0 bytes inside of 256-byte region [ffff880113ada000, ffff880113ada100) [ 119.311376] The buggy address belongs to the page: [ 119.312728] page:ffffea00044eb680 count:1 mapcount:0 mapping:0000000000000000 index:0xffff880113ada780 [ 119.314428] flags: 0x17ffe000000100(slab) [ 119.315740] raw: 0017ffe000000100 0000000000000000 ffff880113ada780 00000001000c0001 [ 119.317379] raw: ffffea0004553c60 ffffea00045c11e0 ffff88011b167e00 0000000000000000 [ 119.319050] page dumped because: kasan: bad access detected [ 119.321652] Memory state around the buggy address: [ 119.322993] ffff880113ad9f00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 119.324515] ffff880113ad9f80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 119.326087] >ffff880113ada000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 119.327547] ^ [ 119.328730] ffff880113ada080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 119.330218] ffff880113ada100: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb [ 119.331740] ================================================================== Signed-off-by: Scott Mayhew Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4xdr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 57e3262ec57a..ee0da259a3d3 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1538,6 +1538,8 @@ nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp, gdev->gd_maxcount = be32_to_cpup(p++); num = be32_to_cpup(p++); if (num) { + if (num > 1000) + goto xdr_error; READ_BUF(4 * num); gdev->gd_notify_types = be32_to_cpup(p++); for (i = 1; i < num; i++) { From b752dfd91a3e9712aa68468b63725c927adcfaa4 Mon Sep 17 00:00:00 2001 From: Chintan Pandya Date: Thu, 7 Jun 2018 17:06:50 -0700 Subject: [PATCH 145/519] mm: vmalloc: avoid racy handling of debugobjects in vunmap [ Upstream commit f3c01d2f3ade6790db67f80fef60df84424f8964 ] Currently, __vunmap flow is, 1) Release the VM area 2) Free the debug objects corresponding to that vm area. This leave some race window open. 1) Release the VM area 1.5) Some other client gets the same vm area 1.6) This client allocates new debug objects on the same vm area 2) Free the debug objects corresponding to this vm area. Here, we actually free 'other' client's debug objects. Fix this by freeing the debug objects first and then releasing the VM area. Link: http://lkml.kernel.org/r/1523961828-9485-2-git-send-email-cpandya@codeaurora.org Signed-off-by: Chintan Pandya Reviewed-by: Andrew Morton Cc: Ard Biesheuvel Cc: Byungchul Park Cc: Catalin Marinas Cc: Florian Fainelli Cc: Johannes Weiner Cc: Laura Abbott Cc: Vlastimil Babka Cc: Wei Yang Cc: Yisheng Xie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- mm/vmalloc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 8e3c9c5a3042..de8e372ece04 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1460,7 +1460,7 @@ static void __vunmap(const void *addr, int deallocate_pages) addr)) return; - area = remove_vm_area(addr); + area = find_vmap_area((unsigned long)addr)->vm; if (unlikely(!area)) { WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n", addr); @@ -1470,6 +1470,7 @@ static void __vunmap(const void *addr, int deallocate_pages) debug_check_no_locks_freed(addr, get_vm_area_size(area)); debug_check_no_obj_freed(addr, get_vm_area_size(area)); + remove_vm_area(addr); if (deallocate_pages) { int i; From 3231613224c076ed1f0984e382cbd2df0d64c260 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Thu, 7 Jun 2018 17:05:17 -0700 Subject: [PATCH 146/519] mm/slub.c: add __printf verification to slab_err() [ Upstream commit a38965bf941b7c2af50de09c96bc5f03e136caef ] __printf is useful to verify format and arguments. Remove the following warning (with W=1): mm/slub.c:721:2: warning: function might be possible candidate for `gnu_printf' format attribute [-Wsuggest-attribute=format] Link: http://lkml.kernel.org/r/20180505200706.19986-1-malat@debian.org Signed-off-by: Mathieu Malaterre Reviewed-by: Andrew Morton Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index 4cf3a9c768b1..2284c4333857 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -659,7 +659,7 @@ void object_err(struct kmem_cache *s, struct page *page, print_trailer(s, page, object); } -static void slab_err(struct kmem_cache *s, struct page *page, +static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page, const char *fmt, ...) { va_list args; From af1ecd4b5f740c30f7ed72bb8af0d7f74cc1b88d Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 5 Jun 2018 23:09:14 +0200 Subject: [PATCH 147/519] rtc: ensure rtc_set_alarm fails when alarms are not supported [ Upstream commit abfdff44bc38e9e2ef7929f633fb8462632299d4 ] When using RTC_ALM_SET or RTC_WKALM_SET with rtc_wkalrm.enabled not set, rtc_timer_enqueue() is not called and rtc_set_alarm() may succeed but the subsequent RTC_AIE_ON ioctl will fail. RTC_ALM_READ would also fail in that case. Ensure rtc_set_alarm() fails when alarms are not supported to avoid letting programs think the alarms are working for a particular RTC when they are not. Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/interface.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index c2cf9485fe32..8c10f3db6336 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -349,6 +349,11 @@ int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) { int err; + if (!rtc->ops) + return -ENODEV; + else if (!rtc->ops->set_alarm) + return -EINVAL; + err = rtc_valid_tm(&alarm->time); if (err != 0) return err; From d77ff3576f734e5cc457679b8a5e21f413dbb68b Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Thu, 31 May 2018 18:45:21 +0200 Subject: [PATCH 148/519] netfilter: ipset: List timing out entries with "timeout 1" instead of zero [ Upstream commit bd975e691486ba52790ba23cc9b4fecab7bc0d31 ] When listing sets with timeout support, there's a probability that just timing out entries with "0" timeout value is listed/saved. However when restoring the saved list, the zero timeout value means permanent elelements. The new behaviour is that timing out entries are listed with "timeout 1" instead of zero. Fixes netfilter bugzilla #1258. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/netfilter/ipset/ip_set_timeout.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h index 1d6a935c1ac5..8793f5a7b820 100644 --- a/include/linux/netfilter/ipset/ip_set_timeout.h +++ b/include/linux/netfilter/ipset/ip_set_timeout.h @@ -65,8 +65,14 @@ ip_set_timeout_set(unsigned long *timeout, u32 value) static inline u32 ip_set_timeout_get(unsigned long *timeout) { - return *timeout == IPSET_ELEM_PERMANENT ? 0 : - jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC; + u32 t; + + if (*timeout == IPSET_ELEM_PERMANENT) + return 0; + + t = jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC; + /* Zero value in userspace means no timeout */ + return t == 0 ? 1 : t; } #endif /* __KERNEL__ */ From 52175c849bd4c01dd14038b4401d5044d99a6b0a Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 1 Jun 2018 11:31:44 -0700 Subject: [PATCH 149/519] infiniband: fix a possible use-after-free bug [ Upstream commit cb2595c1393b4a5211534e6f0a0fbad369e21ad8 ] ucma_process_join() will free the new allocated "mc" struct, if there is any error after that, especially the copy_to_user(). But in parallel, ucma_leave_multicast() could find this "mc" through idr_find() before ucma_process_join() frees it, since it is already published. So "mc" could be used in ucma_leave_multicast() after it is been allocated and freed in ucma_process_join(), since we don't refcnt it. Fix this by separating "publish" from ID allocation, so that we can get an ID first and publish it later after copy_to_user(). Fixes: c8f6a362bf3e ("RDMA/cma: Add multicast communication support") Reported-by: Noam Rathaus Signed-off-by: Cong Wang Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/ucma.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 795938edce3f..55aa8d3d752f 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -217,7 +217,7 @@ static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx) return NULL; mutex_lock(&mut); - mc->id = idr_alloc(&multicast_idr, mc, 0, 0, GFP_KERNEL); + mc->id = idr_alloc(&multicast_idr, NULL, 0, 0, GFP_KERNEL); mutex_unlock(&mut); if (mc->id < 0) goto error; @@ -1375,6 +1375,10 @@ static ssize_t ucma_process_join(struct ucma_file *file, goto err3; } + mutex_lock(&mut); + idr_replace(&multicast_idr, mc, mc->id); + mutex_unlock(&mut); + mutex_unlock(&file->mut); ucma_put_ctx(ctx); return 0; From c48557366d5244b7736b218633c5ef7a55c18f53 Mon Sep 17 00:00:00 2001 From: Stewart Smith Date: Thu, 29 Mar 2018 17:02:46 +1100 Subject: [PATCH 150/519] hvc_opal: don't set tb_ticks_per_usec in udbg_init_opal_common() [ Upstream commit 447808bf500a7cc92173266a59f8a494e132b122 ] time_init() will set up tb_ticks_per_usec based on reality. time_init() is called *after* udbg_init_opal_common() during boot. from arch/powerpc/kernel/time.c: unsigned long tb_ticks_per_usec = 100; /* sane default */ Currently, all powernv systems have a timebase frequency of 512mhz (512000000/1000000 == 0x200) - although there's nothing written down anywhere that I can find saying that we couldn't make that different based on the requirements in the ISA. So, we've been (accidentally) thwacking the (currently) correct (for powernv at least) value for tb_ticks_per_usec earlier than we otherwise would have. The "sane default" seems to be adequate for our purposes between udbg_init_opal_common() and time_init() being called, and if it isn't, then we should probably be setting it somewhere that isn't hvc_opal.c! Signed-off-by: Stewart Smith Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/tty/hvc/hvc_opal.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/tty/hvc/hvc_opal.c b/drivers/tty/hvc/hvc_opal.c index 47b54c6aefd2..9f660e55d1ba 100644 --- a/drivers/tty/hvc/hvc_opal.c +++ b/drivers/tty/hvc/hvc_opal.c @@ -323,7 +323,6 @@ static void udbg_init_opal_common(void) udbg_putc = udbg_opal_putc; udbg_getc = udbg_opal_getc; udbg_getc_poll = udbg_opal_getc_poll; - tb_ticks_per_usec = 0x200; /* Make udelay not suck */ } void __init hvc_opal_init_early(void) From b5145f8b3506ad25b4c31970057fabca1543b325 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 30 May 2018 20:31:22 +1000 Subject: [PATCH 151/519] powerpc/64s: Fix compiler store ordering to SLB shadow area [ Upstream commit 926bc2f100c24d4842b3064b5af44ae964c1d81c ] The stores to update the SLB shadow area must be made as they appear in the C code, so that the hypervisor does not see an entry with mismatched vsid and esid. Use WRITE_ONCE for this. GCC has been observed to elide the first store to esid in the update, which means that if the hypervisor interrupts the guest after storing to vsid, it could see an entry with old esid and new vsid, which may possibly result in memory corruption. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/slb.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 515730e499fe..309027208f7c 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -69,14 +69,14 @@ static inline void slb_shadow_update(unsigned long ea, int ssize, * updating it. No write barriers are needed here, provided * we only update the current CPU's SLB shadow buffer. */ - p->save_area[index].esid = 0; - p->save_area[index].vsid = cpu_to_be64(mk_vsid_data(ea, ssize, flags)); - p->save_area[index].esid = cpu_to_be64(mk_esid_data(ea, ssize, index)); + WRITE_ONCE(p->save_area[index].esid, 0); + WRITE_ONCE(p->save_area[index].vsid, cpu_to_be64(mk_vsid_data(ea, ssize, flags))); + WRITE_ONCE(p->save_area[index].esid, cpu_to_be64(mk_esid_data(ea, ssize, index))); } static inline void slb_shadow_clear(enum slb_index index) { - get_slb_shadow()->save_area[index].esid = 0; + WRITE_ONCE(get_slb_shadow()->save_area[index].esid, 0); } static inline void create_shadowed_slbe(unsigned long ea, int ssize, From ded30c95e8588ccdd9f281f36c0d37d7d030ef9d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 29 May 2018 14:56:19 +0300 Subject: [PATCH 152/519] RDMA/mad: Convert BUG_ONs to error flows [ Upstream commit 2468b82d69e3a53d024f28d79ba0fdb8bf43dfbf ] Let's perform checks in-place instead of BUG_ONs. Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/mad.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 8d84c563ba75..616173b7a5e8 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1548,7 +1548,8 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, mad_reg_req->oui, 3)) { method = &(*vendor_table)->vendor_class[ vclass]->method_table[i]; - BUG_ON(!*method); + if (!*method) + goto error3; goto check_in_use; } } @@ -1558,10 +1559,12 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, vclass]->oui[i])) { method = &(*vendor_table)->vendor_class[ vclass]->method_table[i]; - BUG_ON(*method); /* Allocate method table for this OUI */ - if ((ret = allocate_method_table(method))) - goto error3; + if (!*method) { + ret = allocate_method_table(method); + if (ret) + goto error3; + } memcpy((*vendor_table)->vendor_class[vclass]->oui[i], mad_reg_req->oui, 3); goto check_in_use; From 7a5468e02be689557fe4fead5d4897adc5c6932f Mon Sep 17 00:00:00 2001 From: Anatoly Pugachev Date: Mon, 28 May 2018 02:06:37 +0300 Subject: [PATCH 153/519] disable loading f2fs module on PAGE_SIZE > 4KB [ Upstream commit 4071e67cffcc5c2a007116a02437471351f550eb ] The following patch disables loading of f2fs module on architectures which have PAGE_SIZE > 4096 , since it is impossible to mount f2fs on such architectures , log messages are: mount: /mnt: wrong fs type, bad option, bad superblock on /dev/vdiskb1, missing codepage or helper program, or other error. /dev/vdiskb1: F2FS filesystem, UUID=1d8b9ca4-2389-4910-af3b-10998969f09c, volume name "" May 15 18:03:13 ttip kernel: F2FS-fs (vdiskb1): Invalid page_cache_size (8192), supports only 4KB May 15 18:03:13 ttip kernel: F2FS-fs (vdiskb1): Can't find valid F2FS filesystem in 1th superblock May 15 18:03:13 ttip kernel: F2FS-fs (vdiskb1): Invalid page_cache_size (8192), supports only 4KB May 15 18:03:13 ttip kernel: F2FS-fs (vdiskb1): Can't find valid F2FS filesystem in 2th superblock May 15 18:03:13 ttip kernel: F2FS-fs (vdiskb1): Invalid page_cache_size (8192), supports only 4KB which was introduced by git commit 5c9b469295fb6b10d98923eab5e79c4edb80ed20 tested on git kernel 4.17.0-rc6-00309-gec30dcf7f425 with patch applied: modprobe: ERROR: could not insert 'f2fs': Invalid argument May 28 01:40:28 v215 kernel: F2FS not supported on PAGE_SIZE(8192) != 4096 Signed-off-by: Anatoly Pugachev Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/super.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 4f666368aa85..6cc67e1bbb41 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1566,6 +1566,12 @@ static int __init init_f2fs_fs(void) { int err; + if (PAGE_SIZE != F2FS_BLKSIZE) { + printk("F2FS not supported on PAGE_SIZE(%lu) != %d\n", + PAGE_SIZE, F2FS_BLKSIZE); + return -EINVAL; + } + f2fs_build_trace_ios(); err = init_inodecache(); From 52941707f789de36db5a12cd4e585a77140d546f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 26 May 2018 18:03:34 +0800 Subject: [PATCH 154/519] f2fs: fix to don't trigger writeback during recovery [ Upstream commit 64c74a7ab505ea40d1b3e5d02735ecab08ae1b14 ] - f2fs_fill_super - recover_fsync_data - recover_data - del_fsync_inode - iput - iput_final - write_inode_now - f2fs_write_inode - f2fs_balance_fs - f2fs_balance_fs_bg - sync_dirty_inodes With data_flush mount option, during recovery, in order to avoid entering above writeback flow, let's detect recovery status and do skip in f2fs_balance_fs_bg. Signed-off-by: Chao Yu Signed-off-by: Yunlei He Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/segment.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f77b3258454a..2bba0c4ef4b7 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -295,6 +295,9 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi) void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) { + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) + return; + /* try to shrink extent cache when there is no enough memory */ if (!available_free_memory(sbi, EXTENT_CACHE)) f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER); From 3b14ad7ca75ec8600a73538cc6240be008683585 Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Tue, 29 May 2018 16:13:03 -0600 Subject: [PATCH 155/519] usbip: usbip_detach: Fix memory, udev context and udev leak [ Upstream commit d179f99a651685b19333360e6558110da2fe9bd7 ] detach_port() fails to call usbip_vhci_driver_close() from its error path after usbip_vhci_detach_device() returns failure, leaking memory allocated in usbip_vhci_driver_open() and holding udev_context and udev references. Fix it to call usbip_vhci_driver_close(). Signed-off-by: Shuah Khan (Samsung OSG) Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/usb/usbip/src/usbip_detach.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/usb/usbip/src/usbip_detach.c b/tools/usb/usbip/src/usbip_detach.c index 9db9d21bb2ec..6a8db858caa5 100644 --- a/tools/usb/usbip/src/usbip_detach.c +++ b/tools/usb/usbip/src/usbip_detach.c @@ -43,7 +43,7 @@ void usbip_detach_usage(void) static int detach_port(char *port) { - int ret; + int ret = 0; uint8_t portnum; char path[PATH_MAX+1]; @@ -73,9 +73,12 @@ static int detach_port(char *port) } ret = usbip_vhci_detach_device(portnum); - if (ret < 0) - return -1; + if (ret < 0) { + ret = -1; + goto call_driver_close; + } +call_driver_close: usbip_vhci_driver_close(); return ret; From b4d9e5e88b8f0d71b00928bf8c5d952ce6fbdcc9 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 3 May 2018 11:25:08 -0700 Subject: [PATCH 156/519] perf/x86/intel/uncore: Correct fixed counter index check in generic code [ Upstream commit 4749f8196452eeb73cf2086a6a9705bae479d33d ] There is no index which is bigger than UNCORE_PMC_IDX_FIXED. The only exception is client IMC uncore, which has been specially handled. For generic code, it is not correct to use >= to check fixed counter. The code quality issue will bring problem when a new counter index is introduced. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Cc: acme@kernel.org Cc: eranian@google.com Link: http://lkml.kernel.org/r/1525371913-10597-3-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 61215a69b03d..b22e9c4dd111 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -229,7 +229,7 @@ void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *e u64 prev_count, new_count, delta; int shift; - if (event->hw.idx >= UNCORE_PMC_IDX_FIXED) + if (event->hw.idx == UNCORE_PMC_IDX_FIXED) shift = 64 - uncore_fixed_ctr_bits(box); else shift = 64 - uncore_perf_ctr_bits(box); From d6a260fe056bef936912f7f9f119a996a1bebe70 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 3 May 2018 11:25:07 -0700 Subject: [PATCH 157/519] perf/x86/intel/uncore: Correct fixed counter index check for NHM [ Upstream commit d71f11c076c420c4e2fceb4faefa144e055e0935 ] For Nehalem and Westmere, there is only one fixed counter for W-Box. There is no index which is bigger than UNCORE_PMC_IDX_FIXED. It is not correct to use >= to check fixed counter. The code quality issue will bring problem when new counter index is introduced. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Cc: acme@kernel.org Cc: eranian@google.com Link: http://lkml.kernel.org/r/1525371913-10597-2-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c index 2749965afed0..83cadc2605a7 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c @@ -240,7 +240,7 @@ static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct p { struct hw_perf_event *hwc = &event->hw; - if (hwc->idx >= UNCORE_PMC_IDX_FIXED) + if (hwc->idx == UNCORE_PMC_IDX_FIXED) wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0); else if (box->pmu->type->event_mask & NHMEX_PMON_CTL_EN_BIT0) wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22); From 8477cd5e00234d26c58ffe5cf99a7dde57841315 Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Thu, 22 Mar 2018 14:14:45 +0200 Subject: [PATCH 158/519] iwlwifi: pcie: fix race in Rx buffer allocator [ Upstream commit 0f22e40053bd5378ad1e3250e65c574fd61c0cd6 ] Make sure the rx_allocator worker is canceled before running the rx_init routine. rx_init frees and re-allocates all rxb's pages. The rx_allocator worker also allocates pages for the used rxb's. Running rx_init and rx_allocator simultaniously causes a kernel panic. Fix that by canceling the work in rx_init. Signed-off-by: Shaul Triebitz Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/pcie/rx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/iwlwifi/pcie/rx.c b/drivers/net/wireless/iwlwifi/pcie/rx.c index e06591f625c4..d6f9858ff2de 100644 --- a/drivers/net/wireless/iwlwifi/pcie/rx.c +++ b/drivers/net/wireless/iwlwifi/pcie/rx.c @@ -713,6 +713,8 @@ int iwl_pcie_rx_init(struct iwl_trans *trans) WQ_HIGHPRI | WQ_UNBOUND, 1); INIT_WORK(&rba->rx_alloc, iwl_pcie_rx_allocator_work); + cancel_work_sync(&rba->rx_alloc); + spin_lock(&rba->lock); atomic_set(&rba->req_pending, 0); atomic_set(&rba->req_ready, 0); From 6d143e2c4545d7e2a13b22a5fcfcc98b38b9457d Mon Sep 17 00:00:00 2001 From: Thierry Escande Date: Tue, 29 May 2018 18:37:16 +0200 Subject: [PATCH 159/519] Bluetooth: hci_qca: Fix "Sleep inside atomic section" warning [ Upstream commit 9960521c44a5d828f29636ceac0600603ecbddbf ] This patch fixes the following warning during boot: do not call blocking ops when !TASK_RUNNING; state=1 set at [<(ptrval)>] qca_setup+0x194/0x750 [hci_uart] WARNING: CPU: 2 PID: 1878 at kernel/sched/core.c:6135 __might_sleep+0x7c/0x88 In qca_set_baudrate(), the current task state is set to TASK_UNINTERRUPTIBLE before going to sleep for 300ms. It was then restored to TASK_INTERRUPTIBLE. This patch sets the current task state back to TASK_RUNNING instead. Signed-off-by: Thierry Escande Signed-off-by: Marcel Holtmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/hci_qca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 476d39c7ba20..ecfb9ed2cff6 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -884,7 +884,7 @@ static int qca_set_baudrate(struct hci_dev *hdev, uint8_t baudrate) */ set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(msecs_to_jiffies(BAUDRATE_SETTLE_TIMEOUT_MS)); - set_current_state(TASK_INTERRUPTIBLE); + set_current_state(TASK_RUNNING); return 0; } From 276a8ad16b34837b3ad586beb7339057e48510a7 Mon Sep 17 00:00:00 2001 From: Jian-Hong Pan Date: Mon, 21 May 2018 18:09:20 +0800 Subject: [PATCH 160/519] Bluetooth: btusb: Add a new Realtek 8723DE ID 2ff8:b011 [ Upstream commit 66d9975c5a7c40aa7e4bb0ec0b0c37ba1f190923 ] Without this patch we cannot turn on the Bluethooth adapter on ASUS E406MA. T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2ff8 ProdID=b011 Rev= 2.00 S: Manufacturer=Realtek S: Product=802.11n WLAN Adapter S: SerialNumber=00e04c000001 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms Signed-off-by: Jian-Hong Pan Signed-off-by: Marcel Holtmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 91676535a1a3..4a899b41145e 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -339,6 +339,9 @@ static const struct usb_device_id blacklist_table[] = { /* Additional Realtek 8723BU Bluetooth devices */ { USB_DEVICE(0x7392, 0xa611), .driver_info = BTUSB_REALTEK }, + /* Additional Realtek 8723DE Bluetooth devices */ + { USB_DEVICE(0x2ff8, 0xb011), .driver_info = BTUSB_REALTEK }, + /* Additional Realtek 8821AE Bluetooth devices */ { USB_DEVICE(0x0b05, 0x17dc), .driver_info = BTUSB_REALTEK }, { USB_DEVICE(0x13d3, 0x3414), .driver_info = BTUSB_REALTEK }, From 8fe7c570ce064514b0b928089966e551bafc443b Mon Sep 17 00:00:00 2001 From: Kai Chieh Chuang Date: Mon, 28 May 2018 10:18:18 +0800 Subject: [PATCH 161/519] ASoC: dpcm: fix BE dai not hw_free and shutdown [ Upstream commit 9c0ac70ad24d76b873c1551e27790c7f6a815d5c ] In case, one BE is used by two FE1/FE2 FE1--->BE--> | FE2----] when FE1/FE2 call dpcm_be_dai_hw_free() together the BE users will be 2 (> 1), hence cannot be hw_free the be state will leave at, ex. SND_SOC_DPCM_STATE_STOP later FE1/FE2 call dpcm_be_dai_shutdown(), will be skip due to wrong state. leaving the BE not being hw_free and shutdown. The BE dai will be hw_free later when calling dpcm_be_dai_shutdown() if still in invalid state. Signed-off-by: KaiChieh Chuang Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-pcm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 977066ba1769..43b80db952d1 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1682,8 +1682,10 @@ int dpcm_be_dai_shutdown(struct snd_soc_pcm_runtime *fe, int stream) continue; if ((be->dpcm[stream].state != SND_SOC_DPCM_STATE_HW_FREE) && - (be->dpcm[stream].state != SND_SOC_DPCM_STATE_OPEN)) - continue; + (be->dpcm[stream].state != SND_SOC_DPCM_STATE_OPEN)) { + soc_pcm_hw_free(be_substream); + be->dpcm[stream].state = SND_SOC_DPCM_STATE_HW_FREE; + } dev_dbg(be->dev, "ASoC: close BE %s\n", dpcm->fe->dai_link->name); From d47b6f739d45661e046e06fb40f843c10f452463 Mon Sep 17 00:00:00 2001 From: Vincent Palatin Date: Wed, 18 Apr 2018 12:23:58 +0200 Subject: [PATCH 162/519] mfd: cros_ec: Fail early if we cannot identify the EC [ Upstream commit 0dbbf25561b29ffab5ba6277429760abdf49ceff ] If we cannot communicate with the EC chip to detect the protocol version and its features, it's very likely useless to continue. Else we will commit all kind of uninformed mistakes (using the wrong protocol, the wrong buffer size, mixing the EC with other chips). Signed-off-by: Vincent Palatin Acked-by: Benson Leung Signed-off-by: Enric Balletbo i Serra Reviewed-by: Gwendal Grignou Reviewed-by: Andy Shevchenko Signed-off-by: Lee Jones Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/cros_ec.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c index 0eee63542038..115a6f67ab51 100644 --- a/drivers/mfd/cros_ec.c +++ b/drivers/mfd/cros_ec.c @@ -68,7 +68,11 @@ int cros_ec_register(struct cros_ec_device *ec_dev) mutex_init(&ec_dev->lock); - cros_ec_query_all(ec_dev); + err = cros_ec_query_all(ec_dev); + if (err) { + dev_err(dev, "Cannot identify the EC: error %d\n", err); + return err; + } err = mfd_add_devices(ec_dev->dev, PLATFORM_DEVID_AUTO, &ec_cell, 1, NULL, ec_dev->irq, NULL); From efd8946b75739deb614b5c889452bed98ba32fa0 Mon Sep 17 00:00:00 2001 From: Ganapathi Bhat Date: Thu, 24 May 2018 19:18:27 +0530 Subject: [PATCH 163/519] mwifiex: handle race during mwifiex_usb_disconnect [ Upstream commit b817047ae70c0bd67b677b65d0d69d72cd6e9728 ] Race condition is observed during rmmod of mwifiex_usb: 1. The rmmod thread will call mwifiex_usb_disconnect(), download SHUTDOWN command and do wait_event_interruptible_timeout(), waiting for response. 2. The main thread will handle the response and will do a wake_up_interruptible(), unblocking rmmod thread. 3. On getting unblocked, rmmod thread will make rx_cmd.urb = NULL in mwifiex_usb_free(). 4. The main thread will try to resubmit rx_cmd.urb in mwifiex_usb_submit_rx_urb(), which is NULL. To fix, wait for main thread to complete before calling mwifiex_usb_free(). Signed-off-by: Ganapathi Bhat Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mwifiex/usb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/mwifiex/usb.c b/drivers/net/wireless/mwifiex/usb.c index e43aff932360..1a1b1de87583 100644 --- a/drivers/net/wireless/mwifiex/usb.c +++ b/drivers/net/wireless/mwifiex/usb.c @@ -624,6 +624,9 @@ static void mwifiex_usb_disconnect(struct usb_interface *intf) MWIFIEX_FUNC_SHUTDOWN); } + if (adapter->workqueue) + flush_workqueue(adapter->workqueue); + mwifiex_usb_free(card); mwifiex_dbg(adapter, FATAL, From b5326119895704e7912ffad25bb3360aa75bb780 Mon Sep 17 00:00:00 2001 From: Eyal Reizer Date: Mon, 28 May 2018 11:36:42 +0300 Subject: [PATCH 164/519] wlcore: sdio: check for valid platform device data before suspend [ Upstream commit 6e91d48371e79862ea2c05867aaebe4afe55a865 ] the wl pointer can be null In case only wlcore_sdio is probed while no WiLink module is successfully probed, as in the case of mounting a wl12xx module while using a device tree file configured with wl18xx related settings. In this case the system was crashing in wl1271_suspend() as platform device data is not set. Make sure wl the pointer is valid before using it. Signed-off-by: Eyal Reizer Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ti/wlcore/sdio.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/ti/wlcore/sdio.c b/drivers/net/wireless/ti/wlcore/sdio.c index c172da56b550..e4a8280cea83 100644 --- a/drivers/net/wireless/ti/wlcore/sdio.c +++ b/drivers/net/wireless/ti/wlcore/sdio.c @@ -388,6 +388,11 @@ static int wl1271_suspend(struct device *dev) mmc_pm_flag_t sdio_flags; int ret = 0; + if (!wl) { + dev_err(dev, "no wilink module was probed\n"); + goto out; + } + dev_dbg(dev, "wl1271 suspend. wow_enabled: %d\n", wl->wow_enabled); From 46431d9c28f6859f8e568ac7db92137f1da31100 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 21 May 2018 08:43:02 -0400 Subject: [PATCH 165/519] media: videobuf2-core: don't call memop 'finish' when queueing [ Upstream commit 90b2da89a083e1395cb322521a42397c49ae4500 ] When a buffer is queued or requeued in vb2_buffer_done, then don't call the finish memop. In this case the buffer is only returned to vb2, not to userspace. Calling 'finish' here will cause an unbalance when the queue is canceled, since the core will call the same memop again. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/videobuf2-core.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c index bb1e19f7ed5a..0c1a42bf27fd 100644 --- a/drivers/media/v4l2-core/videobuf2-core.c +++ b/drivers/media/v4l2-core/videobuf2-core.c @@ -870,9 +870,12 @@ void vb2_buffer_done(struct vb2_buffer *vb, enum vb2_buffer_state state) dprintk(4, "done processing on buffer %d, state: %d\n", vb->index, state); - /* sync buffers */ - for (plane = 0; plane < vb->num_planes; ++plane) - call_void_memop(vb, finish, vb->planes[plane].mem_priv); + if (state != VB2_BUF_STATE_QUEUED && + state != VB2_BUF_STATE_REQUEUEING) { + /* sync buffers */ + for (plane = 0; plane < vb->num_planes; ++plane) + call_void_memop(vb, finish, vb->planes[plane].mem_priv); + } spin_lock_irqsave(&q->done_lock, flags); if (state == VB2_BUF_STATE_QUEUED || From 082c9832168598ee825894f126c66476ee8be8ac Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 24 Apr 2018 14:53:56 +0200 Subject: [PATCH 166/519] btrfs: add barriers to btrfs_sync_log before log_commit_wait wakeups [ Upstream commit 3d3a2e610ea5e7c6d4f9481ecce5d8e2d8317843 ] Currently the code assumes that there's an implied barrier by the sequence of code preceding the wakeup, namely the mutex unlock. As Nikolay pointed out: I think this is wrong (not your code) but the original assumption that the RELEASE semantics provided by mutex_unlock is sufficient. According to memory-barriers.txt: Section 'LOCK ACQUISITION FUNCTIONS' states: (2) RELEASE operation implication: Memory operations issued before the RELEASE will be completed before the RELEASE operation has completed. Memory operations issued after the RELEASE *may* be completed before the RELEASE operation has completed. (I've bolded the may portion) The example given there: As an example, consider the following: *A = a; *B = b; ACQUIRE *C = c; *D = d; RELEASE *E = e; *F = f; The following sequence of events is acceptable: ACQUIRE, {*F,*A}, *E, {*C,*D}, *B, RELEASE So if we assume that *C is modifying the flag which the waitqueue is checking, and *E is the actual wakeup, then those accesses can be re-ordered... IMHO this code should be considered broken... Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 738f5d6beb95..2c7f9a5f8717 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2961,8 +2961,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, mutex_unlock(&log_root_tree->log_mutex); /* - * The barrier before waitqueue_active is implied by mutex_unlock + * The barrier before waitqueue_active is needed so all the updates + * above are seen by the woken threads. It might not be necessary, but + * proving that seems to be hard. */ + smp_mb(); if (waitqueue_active(&log_root_tree->log_commit_wait[index2])) wake_up(&log_root_tree->log_commit_wait[index2]); out: @@ -2973,8 +2976,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, mutex_unlock(&root->log_mutex); /* - * The barrier before waitqueue_active is implied by mutex_unlock + * The barrier before waitqueue_active is needed so all the updates + * above are seen by the woken threads. It might not be necessary, but + * proving that seems to be hard. */ + smp_mb(); if (waitqueue_active(&root->log_commit_wait[index1])) wake_up(&root->log_commit_wait[index1]); return ret; From 40e082b99a1eb935dab65a869593617407483016 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 14 May 2018 09:38:13 +0800 Subject: [PATCH 167/519] btrfs: qgroup: Finish rescan when hit the last leaf of extent tree [ Upstream commit ff3d27a048d926b3920ccdb75d98788c567cae0d ] Under the following case, qgroup rescan can double account cowed tree blocks: In this case, extent tree only has one tree block. - | transid=5 last committed=4 | btrfs_qgroup_rescan_worker() | |- btrfs_start_transaction() | | transid = 5 | |- qgroup_rescan_leaf() | |- btrfs_search_slot_for_read() on extent tree | Get the only extent tree block from commit root (transid = 4). | Scan it, set qgroup_rescan_progress to the last | EXTENT/META_ITEM + 1 | now qgroup_rescan_progress = A + 1. | | fs tree get CoWed, new tree block is at A + 16K | transid 5 get committed - | transid=6 last committed=5 | btrfs_qgroup_rescan_worker() | btrfs_qgroup_rescan_worker() | |- btrfs_start_transaction() | | transid = 5 | |- qgroup_rescan_leaf() | |- btrfs_search_slot_for_read() on extent tree | Get the only extent tree block from commit root (transid = 5). | scan it using qgroup_rescan_progress (A + 1). | found new tree block beyong A, and it's fs tree block, | account it to increase qgroup numbers. - In above case, tree block A, and tree block A + 16K get accounted twice, while qgroup rescan should stop when it already reach the last leaf, other than continue using its qgroup_rescan_progress. Such case could happen by just looping btrfs/017 and with some possibility it can hit such double qgroup accounting problem. Fix it by checking the path to determine if we should finish qgroup rescan, other than relying on next loop to exit. Reported-by: Nikolay Borisov Signed-off-by: Qu Wenruo Signed-off-by: David Sterba Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/qgroup.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 88d9b66e2207..a751937dded5 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2185,6 +2185,21 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans) BUG(); } +/* + * Check if the leaf is the last leaf. Which means all node pointers + * are at their last position. + */ +static bool is_last_leaf(struct btrfs_path *path) +{ + int i; + + for (i = 1; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) { + if (path->slots[i] != btrfs_header_nritems(path->nodes[i]) - 1) + return false; + } + return true; +} + /* * returns < 0 on error, 0 when more leafs are to be scanned. * returns 1 when done. @@ -2198,6 +2213,7 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, struct ulist *roots = NULL; struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem); u64 num_bytes; + bool done; int slot; int ret; @@ -2225,6 +2241,7 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, mutex_unlock(&fs_info->qgroup_rescan_lock); return ret; } + done = is_last_leaf(path); btrfs_item_key_to_cpu(path->nodes[0], &found, btrfs_header_nritems(path->nodes[0]) - 1); @@ -2271,6 +2288,8 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, } btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); + if (done && !ret) + ret = 1; return ret; } From 549f4ee6884758a683ab1e0245987a8a7a38563a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 18 May 2018 18:56:24 +0200 Subject: [PATCH 168/519] PCI: Prevent sysfs disable of device while driver is attached [ Upstream commit 6f5cdfa802733dcb561bf664cc89d203f2fd958f ] Manipulating the enable_cnt behind the back of the driver will wreak complete havoc with the kernel state, so disallow it. Signed-off-by: Christoph Hellwig Signed-off-by: Bjorn Helgaas Reviewed-by: Johannes Thumshirn Acked-by: Keith Busch Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci-sysfs.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index ec91cd17bf34..5fb4ed6ea322 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -180,13 +180,16 @@ static ssize_t enable_store(struct device *dev, struct device_attribute *attr, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!val) { - if (pci_is_enabled(pdev)) - pci_disable_device(pdev); - else - result = -EIO; - } else + device_lock(dev); + if (dev->driver) + result = -EBUSY; + else if (val) result = pci_enable_device(pdev); + else if (pci_is_enabled(pdev)) + pci_disable_device(pdev); + else + result = -EIO; + device_unlock(dev); return result < 0 ? result : count; } From da1602d2d95f857cfbc8b4b59c3da2a9988fc38d Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 23 May 2018 11:11:30 +0300 Subject: [PATCH 169/519] ath: Add regulatory mapping for FCC3_ETSIC [ Upstream commit 01fb2994a98dc72c8818c274f7b5983d5dd885c7 ] The regdomain code is used to select the correct the correct conformance test limits (CTL) for a country. If the regdomain code isn't available and it is still programmed in the EEPROM then it will cause an error and stop the initialization with: Invalid EEPROM contents The current CTL mappings for this regdomain code are: * 2.4GHz: ETSI * 5GHz: FCC Signed-off-by: Sven Eckelmann Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/regd_common.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h index bdd2b4d61f2f..7d955fa8c24c 100644 --- a/drivers/net/wireless/ath/regd_common.h +++ b/drivers/net/wireless/ath/regd_common.h @@ -35,6 +35,7 @@ enum EnumRd { FRANCE_RES = 0x31, FCC3_FCCA = 0x3A, FCC3_WORLD = 0x3B, + FCC3_ETSIC = 0x3F, ETSI1_WORLD = 0x37, ETSI3_ETSIA = 0x32, @@ -168,6 +169,7 @@ static struct reg_dmn_pair_mapping regDomainPairs[] = { {FCC2_ETSIC, CTL_FCC, CTL_ETSI}, {FCC3_FCCA, CTL_FCC, CTL_FCC}, {FCC3_WORLD, CTL_FCC, CTL_ETSI}, + {FCC3_ETSIC, CTL_FCC, CTL_ETSI}, {FCC4_FCCA, CTL_FCC, CTL_FCC}, {FCC5_FCCA, CTL_FCC, CTL_FCC}, {FCC6_FCCA, CTL_FCC, CTL_FCC}, From a64815422466d95561c7dd68fec26b693e5893f1 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 23 May 2018 11:11:18 +0300 Subject: [PATCH 170/519] ath: Add regulatory mapping for ETSI8_WORLD [ Upstream commit 45faf6e096da8bb80e1ddf8c08a26a9601d9469e ] The regdomain code is used to select the correct the correct conformance test limits (CTL) for a country. If the regdomain code isn't available and it is still programmed in the EEPROM then it will cause an error and stop the initialization with: Invalid EEPROM contents The current CTL mappings for this regdomain code are: * 2.4GHz: ETSI * 5GHz: ETSI Signed-off-by: Sven Eckelmann Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/regd_common.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h index 7d955fa8c24c..7c0fcbbf1900 100644 --- a/drivers/net/wireless/ath/regd_common.h +++ b/drivers/net/wireless/ath/regd_common.h @@ -45,6 +45,7 @@ enum EnumRd { ETSI4_ETSIC = 0x38, ETSI5_WORLD = 0x39, ETSI6_WORLD = 0x34, + ETSI8_WORLD = 0x3D, ETSI_RESERVED = 0x33, MKK1_MKKA = 0x40, @@ -181,6 +182,7 @@ static struct reg_dmn_pair_mapping regDomainPairs[] = { {ETSI4_WORLD, CTL_ETSI, CTL_ETSI}, {ETSI5_WORLD, CTL_ETSI, CTL_ETSI}, {ETSI6_WORLD, CTL_ETSI, CTL_ETSI}, + {ETSI8_WORLD, CTL_ETSI, CTL_ETSI}, /* XXX: For ETSI3_ETSIA, Was NO_CTL meant for the 2 GHz band ? */ {ETSI3_ETSIA, CTL_ETSI, CTL_ETSI}, From beb2347e3f7a9757cb43a85ae3b63f154b88e060 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 23 May 2018 11:11:14 +0300 Subject: [PATCH 171/519] ath: Add regulatory mapping for APL13_WORLD [ Upstream commit 9ba8df0c52b3e6baa436374b429d3d73bd09a320 ] The regdomain code is used to select the correct the correct conformance test limits (CTL) for a country. If the regdomain code isn't available and it is still programmed in the EEPROM then it will cause an error and stop the initialization with: Invalid EEPROM contents The current CTL mappings for this regdomain code are: * 2.4GHz: ETSI * 5GHz: ETSI Signed-off-by: Sven Eckelmann Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/regd_common.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h index 7c0fcbbf1900..2c873840a46a 100644 --- a/drivers/net/wireless/ath/regd_common.h +++ b/drivers/net/wireless/ath/regd_common.h @@ -69,6 +69,7 @@ enum EnumRd { APL1_ETSIC = 0x55, APL2_ETSIC = 0x56, APL5_WORLD = 0x58, + APL13_WORLD = 0x5A, APL6_WORLD = 0x5B, APL7_FCCA = 0x5C, APL8_WORLD = 0x5D, @@ -195,6 +196,7 @@ static struct reg_dmn_pair_mapping regDomainPairs[] = { {APL3_WORLD, CTL_FCC, CTL_ETSI}, {APL4_WORLD, CTL_FCC, CTL_ETSI}, {APL5_WORLD, CTL_FCC, CTL_ETSI}, + {APL13_WORLD, CTL_ETSI, CTL_ETSI}, {APL6_WORLD, CTL_ETSI, CTL_ETSI}, {APL8_WORLD, CTL_ETSI, CTL_ETSI}, {APL9_WORLD, CTL_ETSI, CTL_ETSI}, From ca1d2c2d4087dfe59d90b314c465d6038799fc28 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 23 May 2018 11:11:05 +0300 Subject: [PATCH 172/519] ath: Add regulatory mapping for APL2_FCCA [ Upstream commit 4f183687e3fad3ce0e06e38976cad81bc4541990 ] The regdomain code is used to select the correct the correct conformance test limits (CTL) for a country. If the regdomain code isn't available and it is still programmed in the EEPROM then it will cause an error and stop the initialization with: Invalid EEPROM contents The current CTL mappings for this regdomain code are: * 2.4GHz: FCC * 5GHz: FCC Signed-off-by: Sven Eckelmann Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/regd_common.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h index 2c873840a46a..d8a7db4976f0 100644 --- a/drivers/net/wireless/ath/regd_common.h +++ b/drivers/net/wireless/ath/regd_common.h @@ -61,6 +61,7 @@ enum EnumRd { MKK1_MKKA1 = 0x4A, MKK1_MKKA2 = 0x4B, MKK1_MKKC = 0x4C, + APL2_FCCA = 0x4D, APL3_FCCA = 0x50, APL1_WORLD = 0x52, @@ -193,6 +194,7 @@ static struct reg_dmn_pair_mapping regDomainPairs[] = { {FCC1_FCCA, CTL_FCC, CTL_FCC}, {APL1_WORLD, CTL_FCC, CTL_ETSI}, {APL2_WORLD, CTL_FCC, CTL_ETSI}, + {APL2_FCCA, CTL_FCC, CTL_FCC}, {APL3_WORLD, CTL_FCC, CTL_ETSI}, {APL4_WORLD, CTL_FCC, CTL_ETSI}, {APL5_WORLD, CTL_FCC, CTL_ETSI}, From b90059eadf8c4bdcf58ee4ac309eb9f412643a26 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 23 May 2018 11:10:54 +0300 Subject: [PATCH 173/519] ath: Add regulatory mapping for Uganda [ Upstream commit 1ea3986ad2bc72081c69f3fbc1e5e0eeb3c44f17 ] The country code is used by the ath to detect the ISO 3166-1 alpha-2 name and to select the correct conformance test limits (CTL) for a country. If the country isn't available and it is still programmed in the EEPROM then it will cause an error and stop the initialization with: Invalid EEPROM contents The current CTL mappings for this country are: * 2.4GHz: ETSI * 5GHz: FCC Signed-off-by: Sven Eckelmann Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/regd.h | 1 + drivers/net/wireless/ath/regd_common.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/wireless/ath/regd.h b/drivers/net/wireless/ath/regd.h index 37f53bd8fcb1..18a4d0557f18 100644 --- a/drivers/net/wireless/ath/regd.h +++ b/drivers/net/wireless/ath/regd.h @@ -175,6 +175,7 @@ enum CountryCode { CTRY_TUNISIA = 788, CTRY_TURKEY = 792, CTRY_UAE = 784, + CTRY_UGANDA = 800, CTRY_UKRAINE = 804, CTRY_UNITED_KINGDOM = 826, CTRY_UNITED_STATES = 840, diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h index d8a7db4976f0..cba1020bc854 100644 --- a/drivers/net/wireless/ath/regd_common.h +++ b/drivers/net/wireless/ath/regd_common.h @@ -467,6 +467,7 @@ static struct country_code_to_enum_rd allCountries[] = { {CTRY_TRINIDAD_Y_TOBAGO, FCC3_WORLD, "TT"}, {CTRY_TUNISIA, ETSI3_WORLD, "TN"}, {CTRY_TURKEY, ETSI3_WORLD, "TR"}, + {CTRY_UGANDA, FCC3_WORLD, "UG"}, {CTRY_UKRAINE, NULL1_WORLD, "UA"}, {CTRY_UAE, NULL1_WORLD, "AE"}, {CTRY_UNITED_KINGDOM, ETSI1_WORLD, "GB"}, From 0ac4e043466f5f3916e0bd3dacbf440ebacc9f61 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 23 May 2018 11:10:48 +0300 Subject: [PATCH 174/519] ath: Add regulatory mapping for Tanzania [ Upstream commit 667ddac5745fb9fddfe8f7fd2523070f50bd4442 ] The country code is used by the ath to detect the ISO 3166-1 alpha-2 name and to select the correct conformance test limits (CTL) for a country. If the country isn't available and it is still programmed in the EEPROM then it will cause an error and stop the initialization with: Invalid EEPROM contents The current CTL mappings for this country are: * 2.4GHz: ETSI * 5GHz: FCC Signed-off-by: Sven Eckelmann Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/regd.h | 1 + drivers/net/wireless/ath/regd_common.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/wireless/ath/regd.h b/drivers/net/wireless/ath/regd.h index 18a4d0557f18..7439e62f5262 100644 --- a/drivers/net/wireless/ath/regd.h +++ b/drivers/net/wireless/ath/regd.h @@ -170,6 +170,7 @@ enum CountryCode { CTRY_SWITZERLAND = 756, CTRY_SYRIA = 760, CTRY_TAIWAN = 158, + CTRY_TANZANIA = 834, CTRY_THAILAND = 764, CTRY_TRINIDAD_Y_TOBAGO = 780, CTRY_TUNISIA = 788, diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h index cba1020bc854..b85dc86cc188 100644 --- a/drivers/net/wireless/ath/regd_common.h +++ b/drivers/net/wireless/ath/regd_common.h @@ -463,6 +463,7 @@ static struct country_code_to_enum_rd allCountries[] = { {CTRY_SWITZERLAND, ETSI1_WORLD, "CH"}, {CTRY_SYRIA, NULL1_WORLD, "SY"}, {CTRY_TAIWAN, APL3_FCCA, "TW"}, + {CTRY_TANZANIA, APL1_WORLD, "TZ"}, {CTRY_THAILAND, FCC3_WORLD, "TH"}, {CTRY_TRINIDAD_Y_TOBAGO, FCC3_WORLD, "TT"}, {CTRY_TUNISIA, ETSI3_WORLD, "TN"}, From 49ad264237d82059109e982e3d35966edd51fef9 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 23 May 2018 11:10:43 +0300 Subject: [PATCH 175/519] ath: Add regulatory mapping for Serbia [ Upstream commit 2a3169a54bb53717928392a04fb84deb765b51f1 ] The country code is used by the ath to detect the ISO 3166-1 alpha-2 name and to select the correct conformance test limits (CTL) for a country. If the country isn't available and it is still programmed in the EEPROM then it will cause an error and stop the initialization with: Invalid EEPROM contents The current CTL mappings for this country are: * 2.4GHz: ETSI * 5GHz: ETSI Signed-off-by: Sven Eckelmann Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/regd.h | 1 + drivers/net/wireless/ath/regd_common.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/wireless/ath/regd.h b/drivers/net/wireless/ath/regd.h index 7439e62f5262..a0ed5f8b554e 100644 --- a/drivers/net/wireless/ath/regd.h +++ b/drivers/net/wireless/ath/regd.h @@ -159,6 +159,7 @@ enum CountryCode { CTRY_ROMANIA = 642, CTRY_RUSSIA = 643, CTRY_SAUDI_ARABIA = 682, + CTRY_SERBIA = 688, CTRY_SERBIA_MONTENEGRO = 891, CTRY_SINGAPORE = 702, CTRY_SLOVAKIA = 703, diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h index b85dc86cc188..1ced5a323cf8 100644 --- a/drivers/net/wireless/ath/regd_common.h +++ b/drivers/net/wireless/ath/regd_common.h @@ -452,6 +452,7 @@ static struct country_code_to_enum_rd allCountries[] = { {CTRY_ROMANIA, NULL1_WORLD, "RO"}, {CTRY_RUSSIA, NULL1_WORLD, "RU"}, {CTRY_SAUDI_ARABIA, NULL1_WORLD, "SA"}, + {CTRY_SERBIA, ETSI1_WORLD, "RS"}, {CTRY_SERBIA_MONTENEGRO, ETSI1_WORLD, "CS"}, {CTRY_SINGAPORE, APL6_WORLD, "SG"}, {CTRY_SLOVAKIA, ETSI1_WORLD, "SK"}, From d35dcd927fa5643183c6a5ca9c086688857b6763 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 23 May 2018 11:09:59 +0300 Subject: [PATCH 176/519] ath: Add regulatory mapping for Bermuda [ Upstream commit 9c790f2d234f65697e3b0948adbfdf36dbe63dd7 ] The country code is used by the ath to detect the ISO 3166-1 alpha-2 name and to select the correct conformance test limits (CTL) for a country. If the country isn't available and it is still programmed in the EEPROM then it will cause an error and stop the initialization with: Invalid EEPROM contents The current CTL mappings for this country are: * 2.4GHz: FCC * 5GHz: FCC Signed-off-by: Sven Eckelmann Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/regd.h | 1 + drivers/net/wireless/ath/regd_common.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/wireless/ath/regd.h b/drivers/net/wireless/ath/regd.h index a0ed5f8b554e..a9334d514154 100644 --- a/drivers/net/wireless/ath/regd.h +++ b/drivers/net/wireless/ath/regd.h @@ -74,6 +74,7 @@ enum CountryCode { CTRY_BELARUS = 112, CTRY_BELGIUM = 56, CTRY_BELIZE = 84, + CTRY_BERMUDA = 60, CTRY_BOLIVIA = 68, CTRY_BOSNIA_HERZ = 70, CTRY_BRAZIL = 76, diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h index 1ced5a323cf8..e13b96e45d53 100644 --- a/drivers/net/wireless/ath/regd_common.h +++ b/drivers/net/wireless/ath/regd_common.h @@ -313,6 +313,7 @@ static struct country_code_to_enum_rd allCountries[] = { {CTRY_BELGIUM, ETSI1_WORLD, "BE"}, {CTRY_BELGIUM2, ETSI4_WORLD, "BL"}, {CTRY_BELIZE, APL1_ETSIC, "BZ"}, + {CTRY_BERMUDA, FCC3_FCCA, "BM"}, {CTRY_BOLIVIA, APL1_ETSIC, "BO"}, {CTRY_BOSNIA_HERZ, ETSI1_WORLD, "BA"}, {CTRY_BRAZIL, FCC3_WORLD, "BR"}, From 8b299e29871ebafa9b5d1b8b30394f7c853445d4 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 23 May 2018 11:09:53 +0300 Subject: [PATCH 177/519] ath: Add regulatory mapping for Bahamas [ Upstream commit 699e2302c286a14afe7b7394151ce6c4e1790cc1 ] The country code is used by the ath to detect the ISO 3166-1 alpha-2 name and to select the correct conformance test limits (CTL) for a country. If the country isn't available and it is still programmed in the EEPROM then it will cause an error and stop the initialization with: Invalid EEPROM contents The current CTL mappings for this country are: * 2.4GHz: ETSI * 5GHz: FCC Signed-off-by: Sven Eckelmann Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/regd.h | 1 + drivers/net/wireless/ath/regd_common.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/wireless/ath/regd.h b/drivers/net/wireless/ath/regd.h index a9334d514154..184b6810cde9 100644 --- a/drivers/net/wireless/ath/regd.h +++ b/drivers/net/wireless/ath/regd.h @@ -68,6 +68,7 @@ enum CountryCode { CTRY_AUSTRALIA = 36, CTRY_AUSTRIA = 40, CTRY_AZERBAIJAN = 31, + CTRY_BAHAMAS = 44, CTRY_BAHRAIN = 48, CTRY_BANGLADESH = 50, CTRY_BARBADOS = 52, diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h index e13b96e45d53..15bbd1e0d912 100644 --- a/drivers/net/wireless/ath/regd_common.h +++ b/drivers/net/wireless/ath/regd_common.h @@ -306,6 +306,7 @@ static struct country_code_to_enum_rd allCountries[] = { {CTRY_AUSTRALIA2, FCC6_WORLD, "AU"}, {CTRY_AUSTRIA, ETSI1_WORLD, "AT"}, {CTRY_AZERBAIJAN, ETSI4_WORLD, "AZ"}, + {CTRY_BAHAMAS, FCC3_WORLD, "BS"}, {CTRY_BAHRAIN, APL6_WORLD, "BH"}, {CTRY_BANGLADESH, NULL1_WORLD, "BD"}, {CTRY_BARBADOS, FCC2_WORLD, "BB"}, From 753c9c38e28728cdfd8a3e4031f04823c660e960 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Thu, 22 Mar 2018 21:20:03 +0100 Subject: [PATCH 178/519] powerpc/32: Add a missing include header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c89ca593220931c150cffda24b4d4ccf82f13fc8 ] The header file was missing from the includes. Fix the following warning, treated as error with W=1: arch/powerpc/kernel/pci_32.c:286:6: error: no previous prototype for ‘sys_pciconfig_iobase’ [-Werror=missing-prototypes] Signed-off-by: Mathieu Malaterre Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/pci_32.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 1f7930037cb7..d9e41b77dd13 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include From c2d11a8dc4ddae672ed5d6b23179ef265ac24692 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Thu, 22 Mar 2018 21:19:56 +0100 Subject: [PATCH 179/519] powerpc/chrp/time: Make some functions static, add missing header include MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b87a358b4a1421abd544c0b554b1b7159b2b36c0 ] Add a missing include . These functions can all be static, make it so. Fix warnings treated as errors with W=1: arch/powerpc/platforms/chrp/time.c:41:13: error: no previous prototype for ‘chrp_time_init’ [-Werror=missing-prototypes] arch/powerpc/platforms/chrp/time.c:66:5: error: no previous prototype for ‘chrp_cmos_clock_read’ [-Werror=missing-prototypes] arch/powerpc/platforms/chrp/time.c:74:6: error: no previous prototype for ‘chrp_cmos_clock_write’ [-Werror=missing-prototypes] arch/powerpc/platforms/chrp/time.c:86:5: error: no previous prototype for ‘chrp_set_rtc_time’ [-Werror=missing-prototypes] arch/powerpc/platforms/chrp/time.c:130:6: error: no previous prototype for ‘chrp_get_rtc_time’ [-Werror=missing-prototypes] Signed-off-by: Mathieu Malaterre Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/chrp/time.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/chrp/time.c b/arch/powerpc/platforms/chrp/time.c index f803f4b8ab6f..8608e358217f 100644 --- a/arch/powerpc/platforms/chrp/time.c +++ b/arch/powerpc/platforms/chrp/time.c @@ -27,6 +27,8 @@ #include #include +#include + extern spinlock_t rtc_lock; #define NVRAM_AS0 0x74 @@ -62,7 +64,7 @@ long __init chrp_time_init(void) return 0; } -int chrp_cmos_clock_read(int addr) +static int chrp_cmos_clock_read(int addr) { if (nvram_as1 != 0) outb(addr>>8, nvram_as1); @@ -70,7 +72,7 @@ int chrp_cmos_clock_read(int addr) return (inb(nvram_data)); } -void chrp_cmos_clock_write(unsigned long val, int addr) +static void chrp_cmos_clock_write(unsigned long val, int addr) { if (nvram_as1 != 0) outb(addr>>8, nvram_as1); From dd76f0988378d6e09879435b0b27d3426e767419 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Wed, 4 Apr 2018 22:13:05 +0200 Subject: [PATCH 180/519] powerpc/powermac: Add missing prototype for note_bootable_part() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f72cf3f1d49f2c35d6cb682af2e8c93550f264e4 ] Add a missing prototype for function `note_bootable_part` to silence a warning treated as error with W=1: arch/powerpc/platforms/powermac/setup.c:361:12: error: no previous prototype for ‘note_bootable_part’ [-Werror=missing-prototypes] Suggested-by: Christophe Leroy Signed-off-by: Mathieu Malaterre Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powermac/setup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 8dd78f4e1af4..32fc56cf6261 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -359,6 +359,7 @@ static int pmac_late_init(void) } machine_late_initcall(powermac, pmac_late_init); +void note_bootable_part(dev_t dev, int part, int goodness); /* * This is __init_refok because we check for "initializing" before * touching any of the __init sensitive things and "initializing" From d3456beb1aab696ed7cae61183d0fd148d9d4f91 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Wed, 4 Apr 2018 22:07:46 +0200 Subject: [PATCH 181/519] powerpc/powermac: Mark variable x as unused MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5a4b475cf8511da721f20ba432c244061db7139f ] Since the value of x is never intended to be read, declare it with gcc attribute as unused. Fix warning treated as error with W=1: arch/powerpc/platforms/powermac/bootx_init.c:471:21: error: variable ‘x’ set but not used [-Werror=unused-but-set-variable] Suggested-by: Christophe Leroy Signed-off-by: Mathieu Malaterre Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powermac/bootx_init.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c index 76f5013c35e5..89237b84b096 100644 --- a/arch/powerpc/platforms/powermac/bootx_init.c +++ b/arch/powerpc/platforms/powermac/bootx_init.c @@ -467,7 +467,7 @@ void __init bootx_init(unsigned long r3, unsigned long r4) boot_infos_t *bi = (boot_infos_t *) r4; unsigned long hdr; unsigned long space; - unsigned long ptr, x; + unsigned long ptr; char *model; unsigned long offset = reloc_offset(); @@ -561,6 +561,8 @@ void __init bootx_init(unsigned long r3, unsigned long r4) * MMU switched OFF, so this should not be useful anymore. */ if (bi->version < 4) { + unsigned long x __maybe_unused; + bootx_printf("Touching pages...\n"); /* From a1fba0a39100549444ff4cc031ed927d3c0d462e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 24 May 2018 11:02:06 +0000 Subject: [PATCH 182/519] powerpc/8xx: fix invalid register expression in head_8xx.S [ Upstream commit e4ccb1dae6bdef228d729c076c38161ef6e7ca34 ] New binutils generate the following warning AS arch/powerpc/kernel/head_8xx.o arch/powerpc/kernel/head_8xx.S: Assembler messages: arch/powerpc/kernel/head_8xx.S:916: Warning: invalid register expression This patch fixes it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/head_8xx.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 78c1eba4c04a..01e274e6907b 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -720,7 +720,7 @@ start_here: tovirt(r6,r6) lis r5, abatron_pteptrs@h ori r5, r5, abatron_pteptrs@l - stw r5, 0xf0(r0) /* Must match your Abatron config file */ + stw r5, 0xf0(0) /* Must match your Abatron config file */ tophys(r5,r5) stw r6, 0(r5) From b6cfbfc127ca8b897b7434d52e3c67f2424695bd Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 23 May 2018 21:07:12 +0200 Subject: [PATCH 183/519] pinctrl: at91-pio4: add missing of_node_put [ Upstream commit 21816364715f508c10da1e087e352bc1e326614f ] The device node iterators perform an of_node_get on each iteration, so a jump out of the loop requires an of_node_put. The semantic patch that fixes this problem is as follows (http://coccinelle.lip6.fr): // @@ expression root,e; local idexpression child; iterator name for_each_child_of_node; @@ for_each_child_of_node(root, child) { ... when != of_node_put(child) when != e = child + of_node_put(child); ? break; ... } ... when != child // Signed-off-by: Julia Lawall Acked-by: Ludovic Desroches Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-at91-pio4.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-at91-pio4.c b/drivers/pinctrl/pinctrl-at91-pio4.c index 271cca63e9bd..9aa82a4e9e25 100644 --- a/drivers/pinctrl/pinctrl-at91-pio4.c +++ b/drivers/pinctrl/pinctrl-at91-pio4.c @@ -568,8 +568,10 @@ static int atmel_pctl_dt_node_to_map(struct pinctrl_dev *pctldev, for_each_child_of_node(np_config, np) { ret = atmel_pctl_dt_subnode_to_map(pctldev, np, map, &reserved_maps, num_maps); - if (ret < 0) + if (ret < 0) { + of_node_put(np); break; + } } } From c51df1153c835b0b6b1b436e0394f0e76758bc59 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 23 May 2018 17:19:22 -0500 Subject: [PATCH 184/519] PCI: pciehp: Request control of native hotplug only if supported [ Upstream commit 408fec36a1ab3d14273c2116b449ef1e9be3cb8b ] Currently we request control of native PCIe hotplug unconditionally. Native PCIe hotplug events are handled by the pciehp driver, and if it is not enabled those events will be lost. Request control of native PCIe hotplug only if the pciehp driver is enabled, so we will actually handle native PCIe hotplug events. Suggested-by: Bjorn Helgaas Signed-off-by: Mika Westerberg Signed-off-by: Bjorn Helgaas Reviewed-by: Rafael J. Wysocki Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/pci_root.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index ae3fe4e64203..3b0b4bd67b71 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -472,9 +472,11 @@ static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm) } control = OSC_PCI_EXPRESS_CAPABILITY_CONTROL - | OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | OSC_PCI_EXPRESS_PME_CONTROL; + if (IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE)) + control |= OSC_PCI_EXPRESS_NATIVE_HP_CONTROL; + if (pci_aer_available()) { if (aer_acpi_firmware_first()) dev_info(&device->dev, From 5c174f95e7ded3f492d05425a205420dad0f7926 Mon Sep 17 00:00:00 2001 From: Xinming Hu Date: Fri, 18 May 2018 15:38:54 +0800 Subject: [PATCH 185/519] mwifiex: correct histogram data with appropriate index [ Upstream commit 30bfce0b63fa68c14ae1613eb9d259fa18644074 ] Correct snr/nr/rssi data index to avoid possible buffer underflow. Signed-off-by: Xinming Hu Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mwifiex/util.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mwifiex/util.c b/drivers/net/wireless/mwifiex/util.c index 0cec8a64473e..eb5ffa5b1c6c 100644 --- a/drivers/net/wireless/mwifiex/util.c +++ b/drivers/net/wireless/mwifiex/util.c @@ -702,12 +702,14 @@ void mwifiex_hist_data_set(struct mwifiex_private *priv, u8 rx_rate, s8 snr, s8 nflr) { struct mwifiex_histogram_data *phist_data = priv->hist_data; + s8 nf = -nflr; + s8 rssi = snr - nflr; atomic_inc(&phist_data->num_samples); atomic_inc(&phist_data->rx_rate[rx_rate]); - atomic_inc(&phist_data->snr[snr]); - atomic_inc(&phist_data->noise_flr[128 + nflr]); - atomic_inc(&phist_data->sig_str[nflr - snr]); + atomic_inc(&phist_data->snr[snr + 128]); + atomic_inc(&phist_data->noise_flr[nf + 128]); + atomic_inc(&phist_data->sig_str[rssi + 128]); } /* function to reset histogram data during init/reset */ From 3bdb17c1bf1f9414065f997873d01cfee17dceee Mon Sep 17 00:00:00 2001 From: Maya Erez Date: Thu, 3 May 2018 16:37:16 +0530 Subject: [PATCH 186/519] scsi: ufs: fix exception event handling [ Upstream commit 2e3611e9546c2ed4def152a51dfd34e8dddae7a5 ] The device can set the exception event bit in one of the response UPIU, for example to notify the need for urgent BKOPs operation. In such a case, the host driver calls ufshcd_exception_event_handler to handle this notification. When trying to check the exception event status (for finding the cause for the exception event), the device may be busy with additional SCSI commands handling and may not respond within the 100ms timeout. To prevent that, we need to block SCSI commands during handling of exception events and allow retransmissions of the query requests, in case of timeout. Signed-off-by: Subhash Jadavani Signed-off-by: Maya Erez Signed-off-by: Can Guo Signed-off-by: Asutosh Das Reviewed-by: Subhash Jadavani Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ufs/ufshcd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 18f26cf1e24d..8c58adadb728 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -3447,6 +3447,7 @@ static void ufshcd_exception_event_handler(struct work_struct *work) hba = container_of(work, struct ufs_hba, eeh_work); pm_runtime_get_sync(hba->dev); + scsi_block_requests(hba->host); err = ufshcd_get_ee_status(hba, &status); if (err) { dev_err(hba->dev, "%s: failed to get exception status %d\n", @@ -3462,6 +3463,7 @@ static void ufshcd_exception_event_handler(struct work_struct *work) __func__, err); } out: + scsi_unblock_requests(hba->host); pm_runtime_put_sync(hba->dev); return; } From c1da6e315e1b00867c0a0c900b3549c5f940aea9 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 17 May 2018 20:02:23 +0200 Subject: [PATCH 187/519] ALSA: emu10k1: Rate-limit error messages about page errors [ Upstream commit 11d42c81036324697d367600bfc16f6dd37636fd ] The error messages at sanity checks of memory pages tend to repeat too many times once when it hits, and without the rate limit, it may flood and become unreadable. Replace such messages with the *_ratelimited() variant. Bugzilla: http://bugzilla.opensuse.org/show_bug.cgi?id=1093027 Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/pci/emu10k1/memory.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/pci/emu10k1/memory.c b/sound/pci/emu10k1/memory.c index 4f1f69be1865..8c778fa33031 100644 --- a/sound/pci/emu10k1/memory.c +++ b/sound/pci/emu10k1/memory.c @@ -237,13 +237,13 @@ search_empty(struct snd_emu10k1 *emu, int size) static int is_valid_page(struct snd_emu10k1 *emu, dma_addr_t addr) { if (addr & ~emu->dma_mask) { - dev_err(emu->card->dev, + dev_err_ratelimited(emu->card->dev, "max memory size is 0x%lx (addr = 0x%lx)!!\n", emu->dma_mask, (unsigned long)addr); return 0; } if (addr & (EMUPAGESIZE-1)) { - dev_err(emu->card->dev, "page is not aligned\n"); + dev_err_ratelimited(emu->card->dev, "page is not aligned\n"); return 0; } return 1; @@ -334,7 +334,7 @@ snd_emu10k1_alloc_pages(struct snd_emu10k1 *emu, struct snd_pcm_substream *subst else addr = snd_pcm_sgbuf_get_addr(substream, ofs); if (! is_valid_page(emu, addr)) { - dev_err(emu->card->dev, + dev_err_ratelimited(emu->card->dev, "emu: failure page = %d\n", idx); mutex_unlock(&hdr->block_mutex); return NULL; From 9f543747d4b460b418fa29cb87d26258af96e6ae Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Thu, 17 May 2018 15:27:22 +0800 Subject: [PATCH 188/519] regulator: pfuze100: add .is_enable() for pfuze100_swb_regulator_ops [ Upstream commit 0b01fd3d40fe6402e5fa3b491ef23109feb1aaa5 ] If is_enabled() is not defined, regulator core will assume this regulator is already enabled, then it can NOT be really enabled after disabled. Based on Li Jun's patch from the NXP kernel tree. Signed-off-by: Anson Huang Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/pfuze100-regulator.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/regulator/pfuze100-regulator.c b/drivers/regulator/pfuze100-regulator.c index 2a44e5dd9c2a..c68556bf6f39 100644 --- a/drivers/regulator/pfuze100-regulator.c +++ b/drivers/regulator/pfuze100-regulator.c @@ -152,6 +152,7 @@ static struct regulator_ops pfuze100_sw_regulator_ops = { static struct regulator_ops pfuze100_swb_regulator_ops = { .enable = regulator_enable_regmap, .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, .list_voltage = regulator_list_voltage_table, .map_voltage = regulator_map_voltage_ascend, .set_voltage_sel = regulator_set_voltage_sel_regmap, From c356cd64f01667cadf12d4fa0008164207bf533e Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Fri, 4 May 2018 18:08:10 +0800 Subject: [PATCH 189/519] md: fix NULL dereference of mddev->pers in remove_and_add_spares() [ Upstream commit c42a0e2675721e1444f56e6132a07b7b1ec169ac ] We met NULL pointer BUG as follow: [ 151.760358] BUG: unable to handle kernel NULL pointer dereference at 0000000000000060 [ 151.761340] PGD 80000001011eb067 P4D 80000001011eb067 PUD 1011ea067 PMD 0 [ 151.762039] Oops: 0000 [#1] SMP PTI [ 151.762406] Modules linked in: [ 151.762723] CPU: 2 PID: 3561 Comm: mdadm-test Kdump: loaded Not tainted 4.17.0-rc1+ #238 [ 151.763542] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1.fc26 04/01/2014 [ 151.764432] RIP: 0010:remove_and_add_spares.part.56+0x13c/0x3a0 [ 151.765061] RSP: 0018:ffffc90001d7fcd8 EFLAGS: 00010246 [ 151.765590] RAX: 0000000000000000 RBX: ffff88013601d600 RCX: 0000000000000000 [ 151.766306] RDX: 0000000000000000 RSI: ffff88013601d600 RDI: ffff880136187000 [ 151.767014] RBP: ffff880136187018 R08: 0000000000000003 R09: 0000000000000051 [ 151.767728] R10: ffffc90001d7fed8 R11: 0000000000000000 R12: ffff88013601d600 [ 151.768447] R13: ffff8801298b1300 R14: ffff880136187000 R15: 0000000000000000 [ 151.769160] FS: 00007f2624276700(0000) GS:ffff88013ae80000(0000) knlGS:0000000000000000 [ 151.769971] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 151.770554] CR2: 0000000000000060 CR3: 0000000111aac000 CR4: 00000000000006e0 [ 151.771272] Call Trace: [ 151.771542] md_ioctl+0x1df2/0x1e10 [ 151.771906] ? __switch_to+0x129/0x440 [ 151.772295] ? __schedule+0x244/0x850 [ 151.772672] blkdev_ioctl+0x4bd/0x970 [ 151.773048] block_ioctl+0x39/0x40 [ 151.773402] do_vfs_ioctl+0xa4/0x610 [ 151.773770] ? dput.part.23+0x87/0x100 [ 151.774151] ksys_ioctl+0x70/0x80 [ 151.774493] __x64_sys_ioctl+0x16/0x20 [ 151.774877] do_syscall_64+0x5b/0x180 [ 151.775258] entry_SYSCALL_64_after_hwframe+0x44/0xa9 For raid6, when two disk of the array are offline, two spare disks can be added into the array. Before spare disks recovery completing, system reboot and mdadm thinks it is ok to restart the degraded array by md_ioctl(). Since disks in raid6 is not only_parity(), raid5_run() will abort, when there is no PPL feature or not setting 'start_dirty_degraded' parameter. Therefore, mddev->pers is NULL. But, mddev->raid_disks has been set and it will not be cleared when raid5_run abort. md_ioctl() can execute cmd 'HOT_REMOVE_DISK' to remove a disk by mdadm, which will cause NULL pointer dereference in remove_and_add_spares() finally. Signed-off-by: Yufen Yu Signed-off-by: Shaohua Li Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index 0663463df2f7..07f307402351 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6145,6 +6145,9 @@ static int hot_remove_disk(struct mddev *mddev, dev_t dev) struct md_rdev *rdev; int ret = -1; + if (!mddev->pers) + return -ENODEV; + rdev = find_rdev(mddev, dev); if (!rdev) return -ENXIO; From 199b0431d563fd22ded0455e8bf4cd28c839fe7d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 25 Apr 2018 11:04:21 -0400 Subject: [PATCH 190/519] media: smiapp: fix timeout checking in smiapp_read_nvm [ Upstream commit 7a2148dfda8001c983f0effd9afd8a7fa58e99c4 ] The current code decrements the timeout counter i and the end of each loop i is incremented, so the check for timeout will always be false and hence the timeout mechanism is just a dead code path. Potentially, if the RD_READY bit is not set, we could end up in an infinite loop. Fix this so the timeout starts from 1000 and decrements to zero, if at the end of the loop i is zero we have a timeout condition. Detected by CoverityScan, CID#1324008 ("Logically dead code") Fixes: ccfc97bdb5ae ("[media] smiapp: Add driver") Signed-off-by: Colin Ian King Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/smiapp/smiapp-core.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/media/i2c/smiapp/smiapp-core.c b/drivers/media/i2c/smiapp/smiapp-core.c index fb39dfd55e75..46a052c5be2e 100644 --- a/drivers/media/i2c/smiapp/smiapp-core.c +++ b/drivers/media/i2c/smiapp/smiapp-core.c @@ -981,7 +981,7 @@ static int smiapp_read_nvm(struct smiapp_sensor *sensor, if (rval) goto out; - for (i = 0; i < 1000; i++) { + for (i = 1000; i > 0; i--) { rval = smiapp_read( sensor, SMIAPP_REG_U8_DATA_TRANSFER_IF_1_STATUS, &s); @@ -992,11 +992,10 @@ static int smiapp_read_nvm(struct smiapp_sensor *sensor, if (s & SMIAPP_DATA_TRANSFER_IF_1_STATUS_RD_READY) break; - if (--i == 0) { - rval = -ETIMEDOUT; - goto out; - } - + } + if (!i) { + rval = -ETIMEDOUT; + goto out; } for (i = 0; i < SMIAPP_NVM_PAGE_SIZE; i++) { From c8306ac3b5fd3b60ccc06f2c52645ad9f58b5260 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 16 May 2018 20:07:18 +0200 Subject: [PATCH 191/519] ALSA: usb-audio: Apply rate limit to warning messages in URB complete callback [ Upstream commit 377a879d9832f4ba69bd6a1fc996bb4181b1e504 ] retire_capture_urb() may print warning messages when the given URB doesn't align, and this may flood the system log easily. Put the rate limit to the message for avoiding it. Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1093485 Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/usb/pcm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 8e8db4ddf365..a9079654107c 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -1300,7 +1300,7 @@ static void retire_capture_urb(struct snd_usb_substream *subs, if (bytes % (runtime->sample_bits >> 3) != 0) { int oldbytes = bytes; bytes = frames * stride; - dev_warn(&subs->dev->dev, + dev_warn_ratelimited(&subs->dev->dev, "Corrected urb data len. %d->%d\n", oldbytes, bytes); } From f12b01b896a506a5ad44120b2315f3018c2ea1e0 Mon Sep 17 00:00:00 2001 From: Terry Junge Date: Mon, 30 Apr 2018 13:32:46 -0700 Subject: [PATCH 192/519] HID: hid-plantronics: Re-resend Update to map button for PTT products [ Upstream commit 37e376df5f4993677c33968a0c19b0c5acbf1108 ] Add a mapping for Push-To-Talk joystick trigger button. Tested on ChromeBox/ChromeBook with various Plantronics devices. Signed-off-by: Terry Junge Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-plantronics.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-plantronics.c b/drivers/hid/hid-plantronics.c index febb21ee190e..584b10d3fc3d 100644 --- a/drivers/hid/hid-plantronics.c +++ b/drivers/hid/hid-plantronics.c @@ -2,7 +2,7 @@ * Plantronics USB HID Driver * * Copyright (c) 2014 JD Cole - * Copyright (c) 2015 Terry Junge + * Copyright (c) 2015-2018 Terry Junge */ /* @@ -48,6 +48,10 @@ static int plantronics_input_mapping(struct hid_device *hdev, unsigned short mapped_key; unsigned long plt_type = (unsigned long)hid_get_drvdata(hdev); + /* special case for PTT products */ + if (field->application == HID_GD_JOYSTICK) + goto defaulted; + /* handle volume up/down mapping */ /* non-standard types or multi-HID interfaces - plt_type is PID */ if (!(plt_type & HID_USAGE_PAGE)) { From 1f5e33948005cd1b720fd58717bb971109432875 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Tue, 24 Apr 2018 15:15:13 +0200 Subject: [PATCH 193/519] drm/radeon: fix mode_valid's return type [ Upstream commit 7a47f20eb1fb8fa8d7a8fe3a4fd8c721f04c2174 ] The method struct drm_connector_helper_funcs::mode_valid is defined as returning an 'enum drm_mode_status' but the driver implementation for this method uses an 'int' for it. Fix this by using 'enum drm_mode_status' in the driver too. Signed-off-by: Luc Van Oostenryck Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_connectors.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 1a2a7365d0b5..c6bf378534f8 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -844,7 +844,7 @@ static int radeon_lvds_get_modes(struct drm_connector *connector) return ret; } -static int radeon_lvds_mode_valid(struct drm_connector *connector, +static enum drm_mode_status radeon_lvds_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_encoder *encoder = radeon_best_single_encoder(connector); @@ -993,7 +993,7 @@ static int radeon_vga_get_modes(struct drm_connector *connector) return ret; } -static int radeon_vga_mode_valid(struct drm_connector *connector, +static enum drm_mode_status radeon_vga_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_device *dev = connector->dev; @@ -1136,7 +1136,7 @@ static int radeon_tv_get_modes(struct drm_connector *connector) return 1; } -static int radeon_tv_mode_valid(struct drm_connector *connector, +static enum drm_mode_status radeon_tv_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { if ((mode->hdisplay > 1024) || (mode->vdisplay > 768)) @@ -1477,7 +1477,7 @@ static void radeon_dvi_force(struct drm_connector *connector) radeon_connector->use_digital = true; } -static int radeon_dvi_mode_valid(struct drm_connector *connector, +static enum drm_mode_status radeon_dvi_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_device *dev = connector->dev; @@ -1778,7 +1778,7 @@ radeon_dp_detect(struct drm_connector *connector, bool force) return ret; } -static int radeon_dp_mode_valid(struct drm_connector *connector, +static enum drm_mode_status radeon_dp_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_device *dev = connector->dev; From 6a409759c64a8eeac8ccb23dcbc3c89241f2455c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Thu, 10 May 2018 23:59:19 +0200 Subject: [PATCH 194/519] powerpc/embedded6xx/hlwd-pic: Prevent interrupts from being handled by Starlet MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9dcb3df4281876731e4e8bff7940514d72375154 ] The interrupt controller inside the Wii's Hollywood chip is connected to two masters, the "Broadway" PowerPC and the "Starlet" ARM926, each with their own interrupt status and mask registers. When booting the Wii with mini[1], interrupts from the SD card controller (IRQ 7) are handled by the ARM, because mini provides SD access over IPC. Linux however can't currently use or disable this IPC service, so both sides try to handle IRQ 7 without coordination. Let's instead make sure that all interrupts that are unmasked on the PPC side are masked on the ARM side; this will also make sure that Linux can properly talk to the SD card controller (and potentially other devices). If access to a device through IPC is desired in the future, interrupts from that device should not be handled by Linux directly. [1]: https://github.com/lewurm/mini Signed-off-by: Jonathan Neuschäfer Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/embedded6xx/hlwd-pic.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c index 9b7975706bfc..9485f1024d46 100644 --- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c +++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c @@ -35,6 +35,8 @@ */ #define HW_BROADWAY_ICR 0x00 #define HW_BROADWAY_IMR 0x04 +#define HW_STARLET_ICR 0x08 +#define HW_STARLET_IMR 0x0c /* @@ -74,6 +76,9 @@ static void hlwd_pic_unmask(struct irq_data *d) void __iomem *io_base = irq_data_get_irq_chip_data(d); setbits32(io_base + HW_BROADWAY_IMR, 1 << irq); + + /* Make sure the ARM (aka. Starlet) doesn't handle this interrupt. */ + clrbits32(io_base + HW_STARLET_IMR, 1 << irq); } From 29f8cd10e537dc39254e4fcf36decf43529e6986 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 9 May 2018 12:12:15 -0700 Subject: [PATCH 195/519] HID: i2c-hid: check if device is there before really probing [ Upstream commit b3a81b6c4fc6730ac49e20d789a93c0faabafc98 ] On many Chromebooks touch devices are multi-sourced; the components are electrically compatible and one can be freely swapped for another without changing the OS image or firmware. To avoid bunch of scary messages when device is not actually present in the system let's try testing basic communication with it and if there is no response terminate probe early with -ENXIO. Signed-off-by: Dmitry Torokhov Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hid/i2c-hid/i2c-hid.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c index a5fed668fde1..4248d253c32a 100644 --- a/drivers/hid/i2c-hid/i2c-hid.c +++ b/drivers/hid/i2c-hid/i2c-hid.c @@ -1017,6 +1017,14 @@ static int i2c_hid_probe(struct i2c_client *client, pm_runtime_set_active(&client->dev); pm_runtime_enable(&client->dev); + /* Make sure there is something at this address */ + ret = i2c_smbus_read_byte(client); + if (ret < 0) { + dev_dbg(&client->dev, "nothing at this address: %d\n", ret); + ret = -ENXIO; + goto err_pm; + } + ret = i2c_hid_fetch_hid_descriptor(ihid); if (ret < 0) goto err_pm; From 6dbfa9b5ae65063cd61dc7fa11332e00bb794d8b Mon Sep 17 00:00:00 2001 From: DaeRyong Jeong Date: Tue, 1 May 2018 00:27:04 +0900 Subject: [PATCH 196/519] tty: Fix data race in tty_insert_flip_string_fixed_flag [ Upstream commit b6da31b2c07c46f2dcad1d86caa835227a16d9ff ] Unlike normal serials, in pty layer, there is no guarantee that multiple threads don't insert input characters at the same time. If it is happened, tty_insert_flip_string_fixed_flag can be executed concurrently. This can lead slab out-of-bounds write in tty_insert_flip_string_fixed_flag. Call sequences are as follows. CPU0 CPU1 n_tty_ioctl_helper n_tty_ioctl_helper __start_tty tty_send_xchar tty_wakeup pty_write n_hdlc_tty_wakeup tty_insert_flip_string n_hdlc_send_frames tty_insert_flip_string_fixed_flag pty_write tty_insert_flip_string tty_insert_flip_string_fixed_flag To fix the race, acquire port->lock in pty_write() before it inserts input characters to tty buffer. It prevents multiple threads from inserting input characters concurrently. The crash log is as follows: BUG: KASAN: slab-out-of-bounds in tty_insert_flip_string_fixed_flag+0xb5/ 0x130 drivers/tty/tty_buffer.c:316 at addr ffff880114fcc121 Write of size 1792 by task syz-executor0/30017 CPU: 1 PID: 30017 Comm: syz-executor0 Not tainted 4.8.0 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.2-0-g33fbe13 by qemu-project.org 04/01/2014 0000000000000000 ffff88011638f888 ffffffff81694cc3 ffff88007d802140 ffff880114fcb300 ffff880114fcc300 ffff880114fcb300 ffff88011638f8b0 ffffffff8130075c ffff88011638f940 ffff88007d802140 ffff880194fcc121 Call Trace: __dump_stack lib/dump_stack.c:15 [inline] dump_stack+0xb3/0x110 lib/dump_stack.c:51 kasan_object_err+0x1c/0x70 mm/kasan/report.c:156 print_address_description mm/kasan/report.c:194 [inline] kasan_report_error+0x1f7/0x4e0 mm/kasan/report.c:283 kasan_report+0x36/0x40 mm/kasan/report.c:303 check_memory_region_inline mm/kasan/kasan.c:292 [inline] check_memory_region+0x13e/0x1a0 mm/kasan/kasan.c:299 memcpy+0x37/0x50 mm/kasan/kasan.c:335 tty_insert_flip_string_fixed_flag+0xb5/0x130 drivers/tty/tty_buffer.c:316 tty_insert_flip_string include/linux/tty_flip.h:35 [inline] pty_write+0x7f/0xc0 drivers/tty/pty.c:115 n_hdlc_send_frames+0x1d4/0x3b0 drivers/tty/n_hdlc.c:419 n_hdlc_tty_wakeup+0x73/0xa0 drivers/tty/n_hdlc.c:496 tty_wakeup+0x92/0xb0 drivers/tty/tty_io.c:601 __start_tty.part.26+0x66/0x70 drivers/tty/tty_io.c:1018 __start_tty+0x34/0x40 drivers/tty/tty_io.c:1013 n_tty_ioctl_helper+0x146/0x1e0 drivers/tty/tty_ioctl.c:1138 n_hdlc_tty_ioctl+0xb3/0x2b0 drivers/tty/n_hdlc.c:794 tty_ioctl+0xa85/0x16d0 drivers/tty/tty_io.c:2992 vfs_ioctl fs/ioctl.c:43 [inline] do_vfs_ioctl+0x13e/0xba0 fs/ioctl.c:679 SYSC_ioctl fs/ioctl.c:694 [inline] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:685 entry_SYSCALL_64_fastpath+0x1f/0xbd Signed-off-by: DaeRyong Jeong Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/tty/pty.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index 96aa0ad32497..c8a2e5b0eff7 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -106,16 +106,19 @@ static void pty_unthrottle(struct tty_struct *tty) static int pty_write(struct tty_struct *tty, const unsigned char *buf, int c) { struct tty_struct *to = tty->link; + unsigned long flags; if (tty->stopped) return 0; if (c > 0) { + spin_lock_irqsave(&to->port->lock, flags); /* Stuff the data into the input queue of the other end */ c = tty_insert_flip_string(to->port, buf, c); /* And shovel */ if (c) tty_flip_buffer_push(to->port); + spin_unlock_irqrestore(&to->port->lock, flags); } return c; } From 7a9a331f0a5ca9434b3284a9a081c491b9075615 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 May 2018 13:14:33 +0100 Subject: [PATCH 197/519] dma-iommu: Fix compilation when !CONFIG_IOMMU_DMA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8a22a3e1e768c309b718f99bd86f9f25a453e0dc ] Inclusion of include/dma-iommu.h when CONFIG_IOMMU_DMA is not selected results in the following splat: In file included from drivers/irqchip/irq-gic-v3-mbi.c:20:0: ./include/linux/dma-iommu.h:95:69: error: unknown type name ‘dma_addr_t’ static inline int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) ^~~~~~~~~~ ./include/linux/dma-iommu.h:108:74: warning: ‘struct list_head’ declared inside parameter list will not be visible outside of this definition or declaration static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list) ^~~~~~~~~ scripts/Makefile.build:312: recipe for target 'drivers/irqchip/irq-gic-v3-mbi.o' failed Fix it by including linux/types.h. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Cc: Rob Herring Cc: Jason Cooper Cc: Ard Biesheuvel Cc: Srinivas Kandagatla Cc: Thomas Petazzoni Cc: Miquel Raynal Link: https://lkml.kernel.org/r/20180508121438.11301-5-marc.zyngier@arm.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/dma-iommu.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index fc481037478a..19baa7f4f403 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -17,6 +17,7 @@ #define __DMA_IOMMU_H #ifdef __KERNEL__ +#include #include #ifdef CONFIG_IOMMU_DMA From 09b8746fd4fb430c2dfcd257b8649bf6754df979 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 12 Jul 2016 07:21:46 -0400 Subject: [PATCH 198/519] media: rcar_jpu: Add missing clk_disable_unprepare() on error in jpu_open() [ Upstream commit 43d0d3c52787df0221d1c52494daabd824fe84f1 ] Add the missing clk_disable_unprepare() before return from jpu_open() in the software reset error handling case. Signed-off-by: Wei Yongjun Acked-by: Mikhail Ulyanov Reviewed-by: Kieran Bingham Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/rcar_jpu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/media/platform/rcar_jpu.c b/drivers/media/platform/rcar_jpu.c index f8e3e83c52a2..20de5e9fc217 100644 --- a/drivers/media/platform/rcar_jpu.c +++ b/drivers/media/platform/rcar_jpu.c @@ -1278,7 +1278,7 @@ static int jpu_open(struct file *file) /* ...issue software reset */ ret = jpu_reset(jpu); if (ret) - goto device_prepare_rollback; + goto jpu_reset_rollback; } jpu->ref_count++; @@ -1286,6 +1286,8 @@ static int jpu_open(struct file *file) mutex_unlock(&jpu->mutex); return 0; +jpu_reset_rollback: + clk_disable_unprepare(jpu->clk); device_prepare_rollback: mutex_unlock(&jpu->mutex); v4l_prepare_rollback: From cdebef38f64805a1802452acadd33e690bd7c503 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 9 May 2018 09:28:12 +0900 Subject: [PATCH 199/519] libata: Fix command retry decision [ Upstream commit 804689ad2d9b66d0d3920b48cf05881049d44589 ] For failed commands with valid sense data (e.g. NCQ commands), scsi_check_sense() is used in ata_analyze_tf() to determine if the command can be retried. In such case, rely on this decision and ignore the command error mask based decision done in ata_worth_retry(). This fixes useless retries of commands such as unaligned writes on zoned disks (TYPE_ZAC). Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-eh.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 75cced210b2a..7db76b5c7ada 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -2198,12 +2198,16 @@ static void ata_eh_link_autopsy(struct ata_link *link) if (qc->err_mask & ~AC_ERR_OTHER) qc->err_mask &= ~AC_ERR_OTHER; - /* SENSE_VALID trumps dev/unknown error and revalidation */ + /* + * SENSE_VALID trumps dev/unknown error and revalidation. Upper + * layers will determine whether the command is worth retrying + * based on the sense data and device class/type. Otherwise, + * determine directly if the command is worth retrying using its + * error mask and flags. + */ if (qc->flags & ATA_QCFLAG_SENSE_VALID) qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); - - /* determine whether the command is worth retrying */ - if (ata_eh_worth_retry(qc)) + else if (ata_eh_worth_retry(qc)) qc->flags |= ATA_QCFLAG_RETRY; /* accumulate error info */ From 1fd65d6a00853204652489fbe37278d3771638f8 Mon Sep 17 00:00:00 2001 From: Brad Love Date: Fri, 4 May 2018 17:53:35 -0400 Subject: [PATCH 200/519] media: saa7164: Fix driver name in debug output [ Upstream commit 0cc4655cb57af0b7e105d075c4f83f8046efafe7 ] This issue was reported by a user who downloaded a corrupt saa7164 firmware, then went looking for a valid xc5000 firmware to fix the error displayed...but the device in question has no xc5000, thus after much effort, the wild goose chase eventually led to a support call. The xc5000 has nothing to do with saa7164 (as far as I can tell), so replace the string with saa7164 as well as give a meaningful hint on the firmware mismatch. Signed-off-by: Brad Love Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/pci/saa7164/saa7164-fw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/pci/saa7164/saa7164-fw.c b/drivers/media/pci/saa7164/saa7164-fw.c index 269e0782c7b6..93d53195e8ca 100644 --- a/drivers/media/pci/saa7164/saa7164-fw.c +++ b/drivers/media/pci/saa7164/saa7164-fw.c @@ -430,7 +430,8 @@ int saa7164_downloadfirmware(struct saa7164_dev *dev) __func__, fw->size); if (fw->size != fwlength) { - printk(KERN_ERR "xc5000: firmware incorrect size\n"); + printk(KERN_ERR "saa7164: firmware incorrect size %zu != %u\n", + fw->size, fwlength); ret = -ENOMEM; goto out; } From b43d01323cf16432db5303dad3e5dfe0271c9240 Mon Sep 17 00:00:00 2001 From: Jane Wan Date: Tue, 8 May 2018 14:19:53 -0700 Subject: [PATCH 201/519] mtd: rawnand: fsl_ifc: fix FSL NAND driver to read all ONFI parameter pages [ Upstream commit a75bbe71a27875fdc61cde1af6d799037cef6bed ] Per ONFI specification (Rev. 4.0), if the CRC of the first parameter page read is not valid, the host should read redundant parameter page copies. Fix FSL NAND driver to read the two redundant copies which are mandatory in the specification. Signed-off-by: Jane Wan Signed-off-by: Boris Brezillon Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/fsl_ifc_nand.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c index 5e3fa5861039..2c0bbaed3609 100644 --- a/drivers/mtd/nand/fsl_ifc_nand.c +++ b/drivers/mtd/nand/fsl_ifc_nand.c @@ -449,9 +449,16 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, case NAND_CMD_READID: case NAND_CMD_PARAM: { + /* + * For READID, read 8 bytes that are currently used. + * For PARAM, read all 3 copies of 256-bytes pages. + */ + int len = 8; int timing = IFC_FIR_OP_RB; - if (command == NAND_CMD_PARAM) + if (command == NAND_CMD_PARAM) { timing = IFC_FIR_OP_RBCD; + len = 256 * 3; + } ifc_out32((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | (IFC_FIR_OP_UA << IFC_NAND_FIR0_OP1_SHIFT) | @@ -461,12 +468,8 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, &ifc->ifc_nand.nand_fcr0); ifc_out32(column, &ifc->ifc_nand.row3); - /* - * although currently it's 8 bytes for READID, we always read - * the maximum 256 bytes(for PARAM) - */ - ifc_out32(256, &ifc->ifc_nand.nand_fbcr); - ifc_nand_ctrl->read_bytes = 256; + ifc_out32(len, &ifc->ifc_nand.nand_fbcr); + ifc_nand_ctrl->read_bytes = len; set_addr(mtd, 0, 0, 0); fsl_ifc_run_command(mtd); From e97509a1b36ccb5e0b4743b4b5168c8bdb9df52c Mon Sep 17 00:00:00 2001 From: Sean Lanigan Date: Fri, 4 May 2018 16:48:23 +1000 Subject: [PATCH 202/519] brcmfmac: Add support for bcm43364 wireless chipset [ Upstream commit 9c4a121e82634aa000a702c98cd6f05b27d6e186 ] Add support for the BCM43364 chipset via an SDIO interface, as used in e.g. the Murata 1FX module. The BCM43364 uses the same firmware as the BCM43430 (which is already included), the only difference is the omission of Bluetooth. However, the SDIO_ID for the BCM43364 is 02D0:A9A4, giving it a MODALIAS of sdio:c00v02D0dA9A4, which doesn't get recognised and hence doesn't load the brcmfmac module. Adding the 'A9A4' ID in the appropriate place triggers the brcmfmac driver to load, and then correctly use the firmware file 'brcmfmac43430-sdio.bin'. Signed-off-by: Sean Lanigan Acked-by: Ulf Hansson Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c | 1 + include/linux/mmc/sdio_ids.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c index 59cef6c69fe8..91da67657f81 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c @@ -1109,6 +1109,7 @@ static const struct sdio_device_id brcmf_sdmmc_ids[] = { BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43340), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43341), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43362), + BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43364), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4335_4339), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43430), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4345), diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 83430f2ea757..e0325706b76d 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -33,6 +33,7 @@ #define SDIO_DEVICE_ID_BROADCOM_43341 0xa94d #define SDIO_DEVICE_ID_BROADCOM_4335_4339 0x4335 #define SDIO_DEVICE_ID_BROADCOM_43362 0xa962 +#define SDIO_DEVICE_ID_BROADCOM_43364 0xa9a4 #define SDIO_DEVICE_ID_BROADCOM_43430 0xa9a6 #define SDIO_DEVICE_ID_BROADCOM_4345 0x4345 #define SDIO_DEVICE_ID_BROADCOM_4354 0x4354 From 36ba2c8f617686ccfe1b0a9337ed3f662ce891fb Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 8 May 2018 10:18:39 +0200 Subject: [PATCH 203/519] s390/cpum_sf: Add data entry sizes to sampling trailer entry [ Upstream commit 77715b7ddb446bd39a06f3376e85f4bb95b29bb8 ] The CPU Measurement sampling facility creates a trailer entry for each Sample-Data-Block of stored samples. The trailer entry contains the sizes (in bytes) of the stored sampling types: - basic-sampling data entry size - diagnostic-sampling data entry size Both sizes are 2 bytes long. This patch changes the trailer entry definition to reflect this. Fixes: fcc77f507333 ("s390/cpum_sf: Atomically reset trailer entry fields of sample-data-blocks") Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/cpu_mf.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index 9dd04b9e9782..80c0b9c9c3b2 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -129,7 +129,9 @@ struct hws_trailer_entry { unsigned int f:1; /* 0 - Block Full Indicator */ unsigned int a:1; /* 1 - Alert request control */ unsigned int t:1; /* 2 - Timestamp format */ - unsigned long long:61; /* 3 - 63: Reserved */ + unsigned int :29; /* 3 - 31: Reserved */ + unsigned int bsdes:16; /* 32-47: size of basic SDE */ + unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */ }; unsigned long long flags; /* 0 - 63: All indicators */ }; From c4837ace416459c3f7394d73137ed0bb02869939 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 8 May 2018 07:53:39 +0200 Subject: [PATCH 204/519] perf: fix invalid bit in diagnostic entry [ Upstream commit 3c0a83b14ea71fef5ccc93a3bd2de5f892be3194 ] The s390 CPU measurement facility sampling mode supports basic entries and diagnostic entries. Each entry has a valid bit to indicate the status of the entry as valid or invalid. This bit is bit 31 in the diagnostic entry, but the bit mask definition refers to bit 30. Fix this by making the reserved field one bit larger. Fixes: 7e75fc3ff4cf ("s390/cpum_sf: Add raw data sampling to support the diagnostic-sampling function") Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/cpu_mf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index 80c0b9c9c3b2..b2f8c52b3840 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -113,7 +113,7 @@ struct hws_basic_entry { struct hws_diag_entry { unsigned int def:16; /* 0-15 Data Entry Format */ - unsigned int R:14; /* 16-19 and 20-30 reserved */ + unsigned int R:15; /* 16-19 and 20-30 reserved */ unsigned int I:1; /* 31 entry valid or invalid */ u8 data[]; /* Machine-dependent sample data */ } __packed; From 092b0288f150e17ed626079685a04318abbfbd81 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Mon, 7 May 2018 19:46:43 -0500 Subject: [PATCH 205/519] scsi: 3w-9xxx: fix a missing-check bug [ Upstream commit c9318a3e0218bc9dacc25be46b9eec363259536f ] In twa_chrdev_ioctl(), the ioctl driver command is firstly copied from the userspace pointer 'argp' and saved to the kernel object 'driver_command'. Then a security check is performed on the data buffer size indicated by 'driver_command', which is 'driver_command.buffer_length'. If the security check is passed, the entire ioctl command is copied again from the 'argp' pointer and saved to the kernel object 'tw_ioctl'. Then, various operations are performed on 'tw_ioctl' according to the 'cmd'. Given that the 'argp' pointer resides in userspace, a malicious userspace process can race to change the buffer size between the two copies. This way, the user can bypass the security check and inject invalid data buffer size. This can cause potential security issues in the following execution. This patch checks for capable(CAP_SYS_ADMIN) in twa_chrdev_open()t o avoid the above issues. Signed-off-by: Wenwen Wang Acked-by: Adam Radford Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/3w-9xxx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c index a56a7b243e91..5466246c69b4 100644 --- a/drivers/scsi/3w-9xxx.c +++ b/drivers/scsi/3w-9xxx.c @@ -889,6 +889,11 @@ static int twa_chrdev_open(struct inode *inode, struct file *file) unsigned int minor_number; int retval = TW_IOCTL_ERROR_OS_ENODEV; + if (!capable(CAP_SYS_ADMIN)) { + retval = -EACCES; + goto out; + } + minor_number = iminor(inode); if (minor_number >= twa_device_extension_count) goto out; From 2c29ed33045b82322518452b20b9b2ff9ebec2f7 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Mon, 7 May 2018 19:54:01 -0500 Subject: [PATCH 206/519] scsi: 3w-xxxx: fix a missing-check bug [ Upstream commit 9899e4d3523faaef17c67141aa80ff2088f17871 ] In tw_chrdev_ioctl(), the length of the data buffer is firstly copied from the userspace pointer 'argp' and saved to the kernel object 'data_buffer_length'. Then a security check is performed on it to make sure that the length is not more than 'TW_MAX_IOCTL_SECTORS * 512'. Otherwise, an error code -EINVAL is returned. If the security check is passed, the entire ioctl command is copied again from the 'argp' pointer and saved to the kernel object 'tw_ioctl'. Then, various operations are performed on 'tw_ioctl' according to the 'cmd'. Given that the 'argp' pointer resides in userspace, a malicious userspace process can race to change the buffer length between the two copies. This way, the user can bypass the security check and inject invalid data buffer length. This can cause potential security issues in the following execution. This patch checks for capable(CAP_SYS_ADMIN) in tw_chrdev_open() to avoid the above issues. Signed-off-by: Wenwen Wang Acked-by: Adam Radford Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/3w-xxxx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c index 2940bd769936..14af38036287 100644 --- a/drivers/scsi/3w-xxxx.c +++ b/drivers/scsi/3w-xxxx.c @@ -1034,6 +1034,9 @@ static int tw_chrdev_open(struct inode *inode, struct file *file) dprintk(KERN_WARNING "3w-xxxx: tw_ioctl_open()\n"); + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + minor_number = iminor(inode); if (minor_number >= tw_device_extension_count) return -ENODEV; From 4005ecaf46f28f89bf58ec28f7f7997a6fe15ecd Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 3 May 2018 13:54:32 +0300 Subject: [PATCH 207/519] scsi: megaraid: silence a static checker bug [ Upstream commit 27e833dabab74ee665e487e291c9afc6d71effba ] If we had more than 32 megaraid cards then it would cause memory corruption. That's not likely, of course, but it's handy to enforce it and make the static checker happy. Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/megaraid.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c index 9d05302a3bcd..19bffe0b2cc0 100644 --- a/drivers/scsi/megaraid.c +++ b/drivers/scsi/megaraid.c @@ -4197,6 +4197,9 @@ megaraid_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) int irq, i, j; int error = -ENODEV; + if (hba_count >= MAX_CONTROLLERS) + goto out; + if (pci_enable_device(pdev)) goto out; pci_set_master(pdev); From b9229445a4ae6237150d86a095586c094a1b967c Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 26 Apr 2018 13:51:16 +0200 Subject: [PATCH 208/519] thermal: exynos: fix setting rising_threshold for Exynos5433 [ Upstream commit 8bfc218d0ebbabcba8ed2b8ec1831e0cf1f71629 ] Add missing clearing of the previous value when setting rising temperature threshold. Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Eduardo Valentin Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/samsung/exynos_tmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c index fa61eff88496..16d45a25284f 100644 --- a/drivers/thermal/samsung/exynos_tmu.c +++ b/drivers/thermal/samsung/exynos_tmu.c @@ -585,6 +585,7 @@ static int exynos5433_tmu_initialize(struct platform_device *pdev) threshold_code = temp_to_code(data, temp); rising_threshold = readl(data->base + rising_reg_offset); + rising_threshold &= ~(0xff << j * 8); rising_threshold |= (threshold_code << j * 8); writel(rising_threshold, data->base + rising_reg_offset); From b23dab51e987787e358397b24831505668625b8a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 3 May 2018 18:37:17 -0700 Subject: [PATCH 209/519] bpf: fix references to free_bpf_prog_info() in comments [ Upstream commit ab7f5bf0928be2f148d000a6eaa6c0a36e74750e ] Comments in the verifier refer to free_bpf_prog_info() which seems to have never existed in tree. Replace it with free_used_maps(). Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 79e3c21a35d0..35dfa9e9d69e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2101,7 +2101,7 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env) /* hold the map. If the program is rejected by verifier, * the map will be released by release_maps() or it * will be used by the valid program until it's unloaded - * and all maps are released in free_bpf_prog_info() + * and all maps are released in free_used_maps() */ map = bpf_map_inc(map, false); if (IS_ERR(map)) { @@ -2487,7 +2487,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) vfree(log_buf); if (!env->prog->aux->used_maps) /* if we didn't copy map pointers into bpf_prog_info, release - * them now. Otherwise free_bpf_prog_info() will release them. + * them now. Otherwise free_used_maps() will release them. */ release_maps(env); *prog = env->prog; From 3d4a1e1cf348b5afab54d5903769f8eb5fe8e830 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 20 Apr 2018 08:32:16 -0400 Subject: [PATCH 210/519] media: siano: get rid of __le32/__le16 cast warnings [ Upstream commit e1b7f11b37def5f3021c06e8c2b4953e099357aa ] Those are all false-positives that appear with smatch when building for arm: drivers/media/common/siano/smsendian.c:38:36: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:38:36: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:38:36: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:38:36: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:38:36: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:38:36: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:47:44: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:47:44: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:47:44: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:47:44: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:47:44: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:47:44: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:67:35: warning: cast to restricted __le16 drivers/media/common/siano/smsendian.c:67:35: warning: cast to restricted __le16 drivers/media/common/siano/smsendian.c:67:35: warning: cast to restricted __le16 drivers/media/common/siano/smsendian.c:67:35: warning: cast to restricted __le16 drivers/media/common/siano/smsendian.c:84:44: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:84:44: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:84:44: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:84:44: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:84:44: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:84:44: warning: cast to restricted __le32 drivers/media/common/siano/smsendian.c:98:26: warning: cast to restricted __le16 drivers/media/common/siano/smsendian.c:98:26: warning: cast to restricted __le16 drivers/media/common/siano/smsendian.c:98:26: warning: cast to restricted __le16 drivers/media/common/siano/smsendian.c:98:26: warning: cast to restricted __le16 drivers/media/common/siano/smsendian.c:99:28: warning: cast to restricted __le16 drivers/media/common/siano/smsendian.c:99:28: warning: cast to restricted __le16 drivers/media/common/siano/smsendian.c:99:28: warning: cast to restricted __le16 drivers/media/common/siano/smsendian.c:99:28: warning: cast to restricted __le16 drivers/media/common/siano/smsendian.c:100:27: warning: cast to restricted __le16 drivers/media/common/siano/smsendian.c:100:27: warning: cast to restricted __le16 drivers/media/common/siano/smsendian.c:100:27: warning: cast to restricted __le16 drivers/media/common/siano/smsendian.c:100:27: warning: cast to restricted __le16 Get rid of them by adding explicit forced casts. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/common/siano/smsendian.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/media/common/siano/smsendian.c b/drivers/media/common/siano/smsendian.c index bfe831c10b1c..b95a631f23f9 100644 --- a/drivers/media/common/siano/smsendian.c +++ b/drivers/media/common/siano/smsendian.c @@ -35,7 +35,7 @@ void smsendian_handle_tx_message(void *buffer) switch (msg->x_msg_header.msg_type) { case MSG_SMS_DATA_DOWNLOAD_REQ: { - msg->msg_data[0] = le32_to_cpu(msg->msg_data[0]); + msg->msg_data[0] = le32_to_cpu((__force __le32)(msg->msg_data[0])); break; } @@ -44,7 +44,7 @@ void smsendian_handle_tx_message(void *buffer) sizeof(struct sms_msg_hdr))/4; for (i = 0; i < msg_words; i++) - msg->msg_data[i] = le32_to_cpu(msg->msg_data[i]); + msg->msg_data[i] = le32_to_cpu((__force __le32)msg->msg_data[i]); break; } @@ -64,7 +64,7 @@ void smsendian_handle_rx_message(void *buffer) { struct sms_version_res *ver = (struct sms_version_res *) msg; - ver->chip_model = le16_to_cpu(ver->chip_model); + ver->chip_model = le16_to_cpu((__force __le16)ver->chip_model); break; } @@ -81,7 +81,7 @@ void smsendian_handle_rx_message(void *buffer) sizeof(struct sms_msg_hdr))/4; for (i = 0; i < msg_words; i++) - msg->msg_data[i] = le32_to_cpu(msg->msg_data[i]); + msg->msg_data[i] = le32_to_cpu((__force __le32)msg->msg_data[i]); break; } @@ -95,9 +95,9 @@ void smsendian_handle_message_header(void *msg) #ifdef __BIG_ENDIAN struct sms_msg_hdr *phdr = (struct sms_msg_hdr *)msg; - phdr->msg_type = le16_to_cpu(phdr->msg_type); - phdr->msg_length = le16_to_cpu(phdr->msg_length); - phdr->msg_flags = le16_to_cpu(phdr->msg_flags); + phdr->msg_type = le16_to_cpu((__force __le16)phdr->msg_type); + phdr->msg_length = le16_to_cpu((__force __le16)phdr->msg_length); + phdr->msg_flags = le16_to_cpu((__force __le16)phdr->msg_flags); #endif /* __BIG_ENDIAN */ } EXPORT_SYMBOL_GPL(smsendian_handle_message_header); From f1a64c117f1363f17cfc7e5bd410ec6222031501 Mon Sep 17 00:00:00 2001 From: Satendra Singh Thakur Date: Thu, 3 May 2018 11:19:32 +0530 Subject: [PATCH 211/519] drm/atomic: Handling the case when setting old crtc for plane [ Upstream commit fc2a69f3903dfd97cd47f593e642b47918c949df ] In the func drm_atomic_set_crtc_for_plane, with the current code, if crtc of the plane_state and crtc passed as argument to the func are same, entire func will executed in vein. It will get state of crtc and clear and set the bits in plane_mask. All these steps are not required for same old crtc. Ideally, we should do nothing in this case, this patch handles the same, and causes the program to return without doing anything in such scenario. Signed-off-by: Satendra Singh Thakur Cc: Madhur Verma Cc: Hemanshu Srivastava Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/1525326572-25854-1-git-send-email-satendra.t@samsung.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_atomic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 50d74e5ce41b..355ad1b97df6 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -960,7 +960,9 @@ drm_atomic_set_crtc_for_plane(struct drm_plane_state *plane_state, { struct drm_plane *plane = plane_state->plane; struct drm_crtc_state *crtc_state; - + /* Nothing to do for same crtc*/ + if (plane_state->crtc == crtc) + return 0; if (plane_state->crtc) { crtc_state = drm_atomic_get_crtc_state(plane_state->state, plane_state->crtc); From 0743d20878ddec1f959d0fbb419cb4dc2ebfc374 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Wed, 2 May 2018 22:48:16 +0900 Subject: [PATCH 212/519] ALSA: hda/ca0132: fix build failure when a local macro is defined [ Upstream commit 8e142e9e628975b0dddd05cf1b095331dff6e2de ] DECLARE_TLV_DB_SCALE (alias of SNDRV_CTL_TLVD_DECLARE_DB_SCALE) is used but tlv.h is not included. This causes build failure when local macro is defined by comment-out. This commit fixes the bug. At the same time, the alias macro is replaced with a destination macro added at a commit 46e860f76804 ("ALSA: rename TLV-related macros so that they're friendly to user applications") Reported-by: Connor McAdams Fixes: 44f0c9782cc6 ('ALSA: hda/ca0132: Add tuning controls') Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_ca0132.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index 29e1ce2263bc..c55c0131be0a 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -38,6 +38,10 @@ /* Enable this to see controls for tuning purpose. */ /*#define ENABLE_TUNING_CONTROLS*/ +#ifdef ENABLE_TUNING_CONTROLS +#include +#endif + #define FLOAT_ZERO 0x00000000 #define FLOAT_ONE 0x3f800000 #define FLOAT_TWO 0x40000000 @@ -3067,8 +3071,8 @@ static int equalizer_ctl_put(struct snd_kcontrol *kcontrol, return 1; } -static const DECLARE_TLV_DB_SCALE(voice_focus_db_scale, 2000, 100, 0); -static const DECLARE_TLV_DB_SCALE(eq_db_scale, -2400, 100, 0); +static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(voice_focus_db_scale, 2000, 100, 0); +static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(eq_db_scale, -2400, 100, 0); static int add_tuning_control(struct hda_codec *codec, hda_nid_t pnid, hda_nid_t nid, From 4ea02c98bf3bb86356b460fe42c5c86f932fc3ce Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 9 Apr 2018 22:28:27 +0300 Subject: [PATCH 213/519] memory: tegra: Do not handle spurious interrupts [ Upstream commit bf3fbdfbec947cdd04b2f2c4bce11534c8786eee ] The ISR reads interrupts-enable mask, but doesn't utilize it. Apply the mask to the interrupt status and don't handle interrupts that MC driver haven't asked for. Kernel would disable spurious MC IRQ and report the error. This would happen only in a case of a very severe bug. Signed-off-by: Dmitry Osipenko Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/memory/tegra/mc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c index a1ae0cc2b86d..13dcee4ed0cf 100644 --- a/drivers/memory/tegra/mc.c +++ b/drivers/memory/tegra/mc.c @@ -252,8 +252,11 @@ static irqreturn_t tegra_mc_irq(int irq, void *data) unsigned int bit; /* mask all interrupts to avoid flooding */ - status = mc_readl(mc, MC_INTSTATUS); mask = mc_readl(mc, MC_INTMASK); + status = mc_readl(mc, MC_INTSTATUS) & mask; + + if (!status) + return IRQ_NONE; for_each_set_bit(bit, &status, 32) { const char *error = status_names[bit] ?: "unknown"; From e55c920c902705ac7e8d4ee565192fbb3dd251ae Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 9 Apr 2018 22:28:29 +0300 Subject: [PATCH 214/519] memory: tegra: Apply interrupts mask per SoC [ Upstream commit 1c74d5c0de0c2cc29fef97a19251da2ad6f579bd ] Currently we are enabling handling of interrupts specific to Tegra124+ which happen to overlap with previous generations. Let's specify interrupts mask per SoC generation for consistency and in a preparation of squashing of Tegra20 driver into the common one that will enable handling of GART faults which may be undesirable by newer generations. Signed-off-by: Dmitry Osipenko Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/memory/tegra/mc.c | 21 +++------------------ drivers/memory/tegra/mc.h | 9 +++++++++ drivers/memory/tegra/tegra114.c | 2 ++ drivers/memory/tegra/tegra124.c | 6 ++++++ drivers/memory/tegra/tegra210.c | 3 +++ drivers/memory/tegra/tegra30.c | 2 ++ include/soc/tegra/mc.h | 2 ++ 7 files changed, 27 insertions(+), 18 deletions(-) diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c index 13dcee4ed0cf..6ab481ee8ece 100644 --- a/drivers/memory/tegra/mc.c +++ b/drivers/memory/tegra/mc.c @@ -20,14 +20,6 @@ #include "mc.h" #define MC_INTSTATUS 0x000 -#define MC_INT_DECERR_MTS (1 << 16) -#define MC_INT_SECERR_SEC (1 << 13) -#define MC_INT_DECERR_VPR (1 << 12) -#define MC_INT_INVALID_APB_ASID_UPDATE (1 << 11) -#define MC_INT_INVALID_SMMU_PAGE (1 << 10) -#define MC_INT_ARBITRATION_EMEM (1 << 9) -#define MC_INT_SECURITY_VIOLATION (1 << 8) -#define MC_INT_DECERR_EMEM (1 << 6) #define MC_INTMASK 0x004 @@ -248,13 +240,11 @@ static const char *const error_names[8] = { static irqreturn_t tegra_mc_irq(int irq, void *data) { struct tegra_mc *mc = data; - unsigned long status, mask; + unsigned long status; unsigned int bit; /* mask all interrupts to avoid flooding */ - mask = mc_readl(mc, MC_INTMASK); - status = mc_readl(mc, MC_INTSTATUS) & mask; - + status = mc_readl(mc, MC_INTSTATUS) & mc->soc->intmask; if (!status) return IRQ_NONE; @@ -349,7 +339,6 @@ static int tegra_mc_probe(struct platform_device *pdev) const struct of_device_id *match; struct resource *res; struct tegra_mc *mc; - u32 value; int err; match = of_match_node(tegra_mc_of_match, pdev->dev.of_node); @@ -417,11 +406,7 @@ static int tegra_mc_probe(struct platform_device *pdev) WARN(!mc->soc->client_id_mask, "Missing client ID mask for this SoC\n"); - value = MC_INT_DECERR_MTS | MC_INT_SECERR_SEC | MC_INT_DECERR_VPR | - MC_INT_INVALID_APB_ASID_UPDATE | MC_INT_INVALID_SMMU_PAGE | - MC_INT_SECURITY_VIOLATION | MC_INT_DECERR_EMEM; - - mc_writel(mc, value, MC_INTMASK); + mc_writel(mc, mc->soc->intmask, MC_INTMASK); return 0; } diff --git a/drivers/memory/tegra/mc.h b/drivers/memory/tegra/mc.h index ddb16676c3af..24e020b4609b 100644 --- a/drivers/memory/tegra/mc.h +++ b/drivers/memory/tegra/mc.h @@ -14,6 +14,15 @@ #include +#define MC_INT_DECERR_MTS (1 << 16) +#define MC_INT_SECERR_SEC (1 << 13) +#define MC_INT_DECERR_VPR (1 << 12) +#define MC_INT_INVALID_APB_ASID_UPDATE (1 << 11) +#define MC_INT_INVALID_SMMU_PAGE (1 << 10) +#define MC_INT_ARBITRATION_EMEM (1 << 9) +#define MC_INT_SECURITY_VIOLATION (1 << 8) +#define MC_INT_DECERR_EMEM (1 << 6) + static inline u32 mc_readl(struct tegra_mc *mc, unsigned long offset) { return readl(mc->regs + offset); diff --git a/drivers/memory/tegra/tegra114.c b/drivers/memory/tegra/tegra114.c index ba8fff3d66a6..6d2a5a849d92 100644 --- a/drivers/memory/tegra/tegra114.c +++ b/drivers/memory/tegra/tegra114.c @@ -930,4 +930,6 @@ const struct tegra_mc_soc tegra114_mc_soc = { .atom_size = 32, .client_id_mask = 0x7f, .smmu = &tegra114_smmu_soc, + .intmask = MC_INT_INVALID_SMMU_PAGE | MC_INT_SECURITY_VIOLATION | + MC_INT_DECERR_EMEM, }; diff --git a/drivers/memory/tegra/tegra124.c b/drivers/memory/tegra/tegra124.c index 21e7255e3d96..234e74f97a4b 100644 --- a/drivers/memory/tegra/tegra124.c +++ b/drivers/memory/tegra/tegra124.c @@ -1019,6 +1019,9 @@ const struct tegra_mc_soc tegra124_mc_soc = { .smmu = &tegra124_smmu_soc, .emem_regs = tegra124_mc_emem_regs, .num_emem_regs = ARRAY_SIZE(tegra124_mc_emem_regs), + .intmask = MC_INT_DECERR_MTS | MC_INT_SECERR_SEC | MC_INT_DECERR_VPR | + MC_INT_INVALID_APB_ASID_UPDATE | MC_INT_INVALID_SMMU_PAGE | + MC_INT_SECURITY_VIOLATION | MC_INT_DECERR_EMEM, }; #endif /* CONFIG_ARCH_TEGRA_124_SOC */ @@ -1041,5 +1044,8 @@ const struct tegra_mc_soc tegra132_mc_soc = { .atom_size = 32, .client_id_mask = 0x7f, .smmu = &tegra132_smmu_soc, + .intmask = MC_INT_DECERR_MTS | MC_INT_SECERR_SEC | MC_INT_DECERR_VPR | + MC_INT_INVALID_APB_ASID_UPDATE | MC_INT_INVALID_SMMU_PAGE | + MC_INT_SECURITY_VIOLATION | MC_INT_DECERR_EMEM, }; #endif /* CONFIG_ARCH_TEGRA_132_SOC */ diff --git a/drivers/memory/tegra/tegra210.c b/drivers/memory/tegra/tegra210.c index 5e144abe4c18..47c78a6d8f00 100644 --- a/drivers/memory/tegra/tegra210.c +++ b/drivers/memory/tegra/tegra210.c @@ -1077,4 +1077,7 @@ const struct tegra_mc_soc tegra210_mc_soc = { .atom_size = 64, .client_id_mask = 0xff, .smmu = &tegra210_smmu_soc, + .intmask = MC_INT_DECERR_MTS | MC_INT_SECERR_SEC | MC_INT_DECERR_VPR | + MC_INT_INVALID_APB_ASID_UPDATE | MC_INT_INVALID_SMMU_PAGE | + MC_INT_SECURITY_VIOLATION | MC_INT_DECERR_EMEM, }; diff --git a/drivers/memory/tegra/tegra30.c b/drivers/memory/tegra/tegra30.c index b44737840e70..d0689428ea1a 100644 --- a/drivers/memory/tegra/tegra30.c +++ b/drivers/memory/tegra/tegra30.c @@ -952,4 +952,6 @@ const struct tegra_mc_soc tegra30_mc_soc = { .atom_size = 16, .client_id_mask = 0x7f, .smmu = &tegra30_smmu_soc, + .intmask = MC_INT_INVALID_SMMU_PAGE | MC_INT_SECURITY_VIOLATION | + MC_INT_DECERR_EMEM, }; diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h index 44202ff897fd..f759e0918037 100644 --- a/include/soc/tegra/mc.h +++ b/include/soc/tegra/mc.h @@ -99,6 +99,8 @@ struct tegra_mc_soc { u8 client_id_mask; const struct tegra_smmu_soc *smmu; + + u32 intmask; }; struct tegra_mc { From aa9190401a553e96edb4abe32b101d3e5c83f18f Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Tue, 24 Apr 2018 15:14:57 +0200 Subject: [PATCH 215/519] drm/gma500: fix psb_intel_lvds_mode_valid()'s return type [ Upstream commit 2ea009095c6e7396915a1d0dd480c41f02985f79 ] The method struct drm_connector_helper_funcs::mode_valid is defined as returning an 'enum drm_mode_status' but the driver implementation for this method, psb_intel_lvds_mode_valid(), uses an 'int' for it. Fix this by using 'enum drm_mode_status' for psb_intel_lvds_mode_valid(). Signed-off-by: Luc Van Oostenryck Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20180424131458.2060-1-luc.vanoostenryck@gmail.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/gma500/psb_intel_drv.h | 2 +- drivers/gpu/drm/gma500/psb_intel_lvds.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/gma500/psb_intel_drv.h b/drivers/gpu/drm/gma500/psb_intel_drv.h index 860dd2177ca1..283570080d47 100644 --- a/drivers/gpu/drm/gma500/psb_intel_drv.h +++ b/drivers/gpu/drm/gma500/psb_intel_drv.h @@ -252,7 +252,7 @@ extern int intelfb_remove(struct drm_device *dev, extern bool psb_intel_lvds_mode_fixup(struct drm_encoder *encoder, const struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode); -extern int psb_intel_lvds_mode_valid(struct drm_connector *connector, +extern enum drm_mode_status psb_intel_lvds_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode); extern int psb_intel_lvds_set_property(struct drm_connector *connector, struct drm_property *property, diff --git a/drivers/gpu/drm/gma500/psb_intel_lvds.c b/drivers/gpu/drm/gma500/psb_intel_lvds.c index 61e3a097a478..ccd1b8bf0fd5 100644 --- a/drivers/gpu/drm/gma500/psb_intel_lvds.c +++ b/drivers/gpu/drm/gma500/psb_intel_lvds.c @@ -343,7 +343,7 @@ static void psb_intel_lvds_restore(struct drm_connector *connector) } } -int psb_intel_lvds_mode_valid(struct drm_connector *connector, +enum drm_mode_status psb_intel_lvds_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_psb_private *dev_priv = connector->dev->dev_private; From 0eb135eea738adae5bbe3a93b806576e20cd66f6 Mon Sep 17 00:00:00 2001 From: Chris Novakovic Date: Tue, 24 Apr 2018 03:56:37 +0100 Subject: [PATCH 216/519] ipconfig: Correctly initialise ic_nameservers [ Upstream commit 300eec7c0a2495f771709c7642aa15f7cc148b83 ] ic_nameservers, which stores the list of name servers discovered by ipconfig, is initialised (i.e. has all of its elements set to NONE, or 0xffffffff) by ic_nameservers_predef() in the following scenarios: - before the "ip=" and "nfsaddrs=" kernel command line parameters are parsed (in ip_auto_config_setup()); - before autoconfiguring via DHCP or BOOTP (in ic_bootp_init()), in order to clear any values that may have been set after parsing "ip=" or "nfsaddrs=" and are no longer needed. This means that ic_nameservers_predef() is not called when neither "ip=" nor "nfsaddrs=" is specified on the kernel command line. In this scenario, every element in ic_nameservers remains set to 0x00000000, which is indistinguishable from ANY and causes pnp_seq_show() to write the following (bogus) information to /proc/net/pnp: #MANUAL nameserver 0.0.0.0 nameserver 0.0.0.0 nameserver 0.0.0.0 This is potentially problematic for systems that blindly link /etc/resolv.conf to /proc/net/pnp. Ensure that ic_nameservers is also initialised when neither "ip=" nor "nfsaddrs=" are specified by calling ic_nameservers_predef() in ip_auto_config(), but only when ip_auto_config_setup() was not called earlier. This causes the following to be written to /proc/net/pnp, and is consistent with what gets written when ipconfig is configured manually but no name servers are specified on the kernel command line: #MANUAL Signed-off-by: Chris Novakovic Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ipconfig.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 9d6b9c4c5f82..60f564db25a3 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -790,6 +790,11 @@ static void __init ic_bootp_init_ext(u8 *e) */ static inline void __init ic_bootp_init(void) { + /* Re-initialise all name servers to NONE, in case any were set via the + * "ip=" or "nfsaddrs=" kernel command line parameters: any IP addresses + * specified there will already have been decoded but are no longer + * needed + */ ic_nameservers_predef(); dev_add_pack(&bootp_packet_type); @@ -1423,6 +1428,13 @@ static int __init ip_auto_config(void) int err; unsigned int i; + /* Initialise all name servers to NONE (but only if the "ip=" or + * "nfsaddrs=" kernel command line parameters weren't decoded, otherwise + * we'll overwrite the IP addresses specified there) + */ + if (ic_set_manually == 0) + ic_nameservers_predef(); + #ifdef CONFIG_PROC_FS proc_create("pnp", S_IRUGO, init_net.proc_net, &pnp_seq_fops); #endif /* CONFIG_PROC_FS */ @@ -1640,6 +1652,7 @@ static int __init ip_auto_config_setup(char *addrs) return 1; } + /* Initialise all name servers to NONE */ ic_nameservers_predef(); /* Parse string for static IP assignment. */ From 7315e0d38a29792cbb32b84e17e413412171e1e1 Mon Sep 17 00:00:00 2001 From: Siva Rebbagondla Date: Wed, 11 Apr 2018 12:13:32 +0530 Subject: [PATCH 217/519] rsi: Fix 'invalid vdd' warning in mmc [ Upstream commit 78e450719c702784e42af6da912d3692fd3da0cb ] While performing cleanup, driver is messing with card->ocr value by not masking rocr against ocr_avail. Below panic is observed with some of the SDIO host controllers due to this. Issue is resolved by reverting incorrect modifications to vdd. [ 927.423821] mmc1: Invalid vdd 0x1f [ 927.423925] Modules linked in: rsi_sdio(+) cmac bnep arc4 rsi_91x mac80211 cfg80211 btrsi rfcomm bluetooth ecdh_generic [ 927.424073] CPU: 0 PID: 1624 Comm: insmod Tainted: G W 4.15.0-1000-caracalla #1 [ 927.424075] Hardware name: Dell Inc. Edge Gateway 3003/ , BIOS 01.00.06 01/22/2018 [ 927.424082] RIP: 0010:sdhci_set_power_noreg+0xdd/0x190[sdhci] [ 927.424085] RSP: 0018:ffffac3fc064b930 EFLAGS: 00010282 [ 927.424107] Call Trace: [ 927.424118] sdhci_set_power+0x5a/0x60 [sdhci] [ 927.424125] sdhci_set_ios+0x360/0x3b0 [sdhci] [ 927.424133] mmc_set_initial_state+0x92/0x120 [ 927.424137] mmc_power_up.part.34+0x33/0x1d0 [ 927.424141] mmc_power_up+0x17/0x20 [ 927.424147] mmc_sdio_runtime_resume+0x2d/0x50 [ 927.424151] mmc_runtime_resume+0x17/0x20 [ 927.424156] __rpm_callback+0xc4/0x200 [ 927.424161] ? idr_alloc_cyclic+0x57/0xd0 [ 927.424165] ? mmc_runtime_suspend+0x20/0x20 [ 927.424169] rpm_callback+0x24/0x80 [ 927.424172] ? mmc_runtime_suspend+0x20/0x20 [ 927.424176] rpm_resume+0x4b3/0x6c0 [ 927.424181] __pm_runtime_resume+0x4e/0x80 [ 927.424188] driver_probe_device+0x41/0x490 [ 927.424192] __driver_attach+0xdf/0xf0 [ 927.424196] ? driver_probe_device+0x490/0x490 [ 927.424201] bus_for_each_dev+0x6c/0xc0 [ 927.424205] driver_attach+0x1e/0x20 [ 927.424209] bus_add_driver+0x1f4/0x270 [ 927.424217] ? rsi_sdio_ack_intr+0x50/0x50 [rsi_sdio] [ 927.424221] driver_register+0x60/0xe0 [ 927.424227] ? rsi_sdio_ack_intr+0x50/0x50 [rsi_sdio] [ 927.424231] sdio_register_driver+0x20/0x30 [ 927.424237] rsi_module_init+0x16/0x40 [rsi_sdio] Signed-off-by: Siva Rebbagondla Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/rsi/rsi_91x_sdio.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c index 8428858204a6..fc895b466ebb 100644 --- a/drivers/net/wireless/rsi/rsi_91x_sdio.c +++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c @@ -155,7 +155,6 @@ static void rsi_reset_card(struct sdio_func *pfunction) int err; struct mmc_card *card = pfunction->card; struct mmc_host *host = card->host; - s32 bit = (fls(host->ocr_avail) - 1); u8 cmd52_resp; u32 clock, resp, i; u16 rca; @@ -175,7 +174,6 @@ static void rsi_reset_card(struct sdio_func *pfunction) msleep(20); /* Initialize the SDIO card */ - host->ios.vdd = bit; host->ios.chip_select = MMC_CS_DONTCARE; host->ios.bus_mode = MMC_BUSMODE_OPENDRAIN; host->ios.power_mode = MMC_POWER_UP; From 2dde48178ba5e66a2ac7235eaebad5655ae6db3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ondrej=20Mosn=C3=A1=C4=8Dek?= Date: Mon, 9 Apr 2018 10:00:06 +0200 Subject: [PATCH 218/519] audit: allow not equal op for audit by executable [ Upstream commit 23bcc480dac204c7dbdf49d96b2c918ed98223c2 ] Current implementation of auditing by executable name only implements the 'equal' operator. This patch extends it to also support the 'not equal' operator. See: https://github.com/linux-audit/audit-kernel/issues/53 Signed-off-by: Ondrej Mosnacek Reviewed-by: Richard Guy Briggs Signed-off-by: Paul Moore Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/auditfilter.c | 2 +- kernel/auditsc.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index b8ff9e193753..b57f929f1b46 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -406,7 +406,7 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f) return -EINVAL; break; case AUDIT_EXE: - if (f->op != Audit_equal) + if (f->op != Audit_not_equal && f->op != Audit_equal) return -EINVAL; if (entry->rule.listnr != AUDIT_FILTER_EXIT) return -EINVAL; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 7444f95f3ee9..0fe8b337291a 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -470,6 +470,8 @@ static int audit_filter_rules(struct task_struct *tsk, break; case AUDIT_EXE: result = audit_exe_compare(tsk, rule->exe); + if (f->op == Audit_not_equal) + result = !result; break; case AUDIT_UID: result = audit_uid_comparator(cred->uid, f->op, f->uid); From e9996e124ea742fb54b60b59329858bdc2918c9a Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 10 Apr 2018 15:05:42 +0200 Subject: [PATCH 219/519] microblaze: Fix simpleImage format generation [ Upstream commit ece97f3a5fb50cf5f98886fbc63c9665f2bb199d ] simpleImage generation was broken for some time. This patch is fixing steps how simpleImage.*.ub file is generated. Steps are objdump of vmlinux and create .ub. Also make sure that there is striped elf version with .strip suffix. Signed-off-by: Michal Simek Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/microblaze/boot/Makefile | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/microblaze/boot/Makefile b/arch/microblaze/boot/Makefile index 91d2068da1b9..0f3fe6a151dc 100644 --- a/arch/microblaze/boot/Makefile +++ b/arch/microblaze/boot/Makefile @@ -21,17 +21,19 @@ $(obj)/linux.bin.gz: $(obj)/linux.bin FORCE quiet_cmd_cp = CP $< $@$2 cmd_cp = cat $< >$@$2 || (rm -f $@ && echo false) -quiet_cmd_strip = STRIP $@ +quiet_cmd_strip = STRIP $< $@$2 cmd_strip = $(STRIP) -K microblaze_start -K _end -K __log_buf \ - -K _fdt_start vmlinux -o $@ + -K _fdt_start $< -o $@$2 UIMAGE_LOADADDR = $(CONFIG_KERNEL_BASE_ADDR) +UIMAGE_IN = $@ +UIMAGE_OUT = $@.ub $(obj)/simpleImage.%: vmlinux FORCE $(call if_changed,cp,.unstrip) $(call if_changed,objcopy) $(call if_changed,uimage) - $(call if_changed,strip) - @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' + $(call if_changed,strip,.strip) + @echo 'Kernel: $(UIMAGE_OUT) is ready' ' (#'`cat .version`')' clean-files += simpleImage.*.unstrip linux.bin.ub dts/*.dtb From 5f66271532ce7f00bc98aeaa945ca5cd22abb742 Mon Sep 17 00:00:00 2001 From: Dominik Bozek Date: Fri, 13 Apr 2018 10:42:31 -0700 Subject: [PATCH 220/519] usb: hub: Don't wait for connect state at resume for powered-off ports [ Upstream commit 5d111f5190848d6fb1c414dc57797efea3526a2f ] wait_for_connected() wait till a port change status to USB_PORT_STAT_CONNECTION, but this is not possible if the port is unpowered. The loop will only exit at timeout. Such case take place if an over-current incident happen while system is in S3. Then during resume wait_for_connected() will wait 2s, which may be noticeable by the user. Signed-off-by: Dominik Bozek Signed-off-by: Kuppuswamy Sathyanarayanan Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 93756664592a..2facffea2ee0 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -3308,6 +3308,10 @@ static int wait_for_ss_port_enable(struct usb_device *udev, while (delay_ms < 2000) { if (status || *portstatus & USB_PORT_STAT_CONNECTION) break; + if (!port_is_power_on(hub, *portstatus)) { + status = -ENODEV; + break; + } msleep(20); delay_ms += 20; status = hub_port_status(hub, *port1, portstatus, portchange); From d4ceb46ad29110a9065f8b35c41f780d7a98891a Mon Sep 17 00:00:00 2001 From: Tudor-Dan Ambarus Date: Tue, 3 Apr 2018 09:39:01 +0300 Subject: [PATCH 221/519] crypto: authencesn - don't leak pointers to authenc keys [ Upstream commit 31545df391d58a3bb60e29b1192644a6f2b5a8dd ] In crypto_authenc_esn_setkey we save pointers to the authenc keys in a local variable of type struct crypto_authenc_keys and we don't zeroize it after use. Fix this and don't leak pointers to the authenc keys. Signed-off-by: Tudor Ambarus Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- crypto/authencesn.c | 1 + 1 file changed, 1 insertion(+) diff --git a/crypto/authencesn.c b/crypto/authencesn.c index 52154ef21b5e..fa0c4567f697 100644 --- a/crypto/authencesn.c +++ b/crypto/authencesn.c @@ -90,6 +90,7 @@ static int crypto_authenc_esn_setkey(struct crypto_aead *authenc_esn, const u8 * CRYPTO_TFM_RES_MASK); out: + memzero_explicit(&keys, sizeof(keys)); return err; badkey: From b573c815efbc285cc0828d871184ed935e92ec07 Mon Sep 17 00:00:00 2001 From: Tudor-Dan Ambarus Date: Tue, 3 Apr 2018 09:39:00 +0300 Subject: [PATCH 222/519] crypto: authenc - don't leak pointers to authenc keys [ Upstream commit ad2fdcdf75d169e7a5aec6c7cb421c0bec8ec711 ] In crypto_authenc_setkey we save pointers to the authenc keys in a local variable of type struct crypto_authenc_keys and we don't zeroize it after use. Fix this and don't leak pointers to the authenc keys. Signed-off-by: Tudor Ambarus Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- crypto/authenc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/crypto/authenc.c b/crypto/authenc.c index 55a354d57251..b7290c5b1eaa 100644 --- a/crypto/authenc.c +++ b/crypto/authenc.c @@ -108,6 +108,7 @@ static int crypto_authenc_setkey(struct crypto_aead *authenc, const u8 *key, CRYPTO_TFM_RES_MASK); out: + memzero_explicit(&keys, sizeof(keys)); return err; badkey: From ced0f4f3a5815ea2ed6de6ec1783871a44cd6295 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Wed, 14 Mar 2018 11:41:36 -0400 Subject: [PATCH 223/519] media: omap3isp: fix unbalanced dma_iommu_mapping [ Upstream commit b7e1e6859fbf60519fd82d7120cee106a6019512 ] The OMAP3 ISP driver manages its MMU mappings through the IOMMU-aware ARM DMA backend. The current code creates a dma_iommu_mapping and attaches this to the ISP device, but never detaches the mapping in either the probe failure paths or the driver remove path resulting in an unbalanced mapping refcount and a memory leak. Fix this properly. Reported-by: Pavel Machek Signed-off-by: Suman Anna Tested-by: Pavel Machek Reviewed-by: Laurent Pinchart Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/omap3isp/isp.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/media/platform/omap3isp/isp.c b/drivers/media/platform/omap3isp/isp.c index 56e683b19a73..91e02c1ff392 100644 --- a/drivers/media/platform/omap3isp/isp.c +++ b/drivers/media/platform/omap3isp/isp.c @@ -2077,6 +2077,7 @@ static int isp_initialize_modules(struct isp_device *isp) static void isp_detach_iommu(struct isp_device *isp) { + arm_iommu_detach_device(isp->dev); arm_iommu_release_mapping(isp->mapping); isp->mapping = NULL; iommu_group_remove_device(isp->dev); @@ -2110,8 +2111,7 @@ static int isp_attach_iommu(struct isp_device *isp) mapping = arm_iommu_create_mapping(&platform_bus_type, SZ_1G, SZ_2G); if (IS_ERR(mapping)) { dev_err(isp->dev, "failed to create ARM IOMMU mapping\n"); - ret = PTR_ERR(mapping); - goto error; + return PTR_ERR(mapping); } isp->mapping = mapping; @@ -2126,7 +2126,8 @@ static int isp_attach_iommu(struct isp_device *isp) return 0; error: - isp_detach_iommu(isp); + arm_iommu_release_mapping(isp->mapping); + isp->mapping = NULL; return ret; } From 87812af9bbe9d10c69478c749073a048b7f86672 Mon Sep 17 00:00:00 2001 From: Xose Vazquez Perez Date: Sat, 7 Apr 2018 00:47:23 +0200 Subject: [PATCH 224/519] scsi: scsi_dh: replace too broad "TP9" string with the exact models [ Upstream commit 37b37d2609cb0ac267280ef27350b962d16d272e ] SGI/TP9100 is not an RDAC array: ^^^ https://git.opensvc.com/gitweb.cgi?p=multipath-tools/.git;a=blob;f=libmultipath/hwtable.c;h=88b4700beb1d8940008020fbe4c3cd97d62f4a56;hb=HEAD#l235 This partially reverts commit 35204772ea03 ("[SCSI] scsi_dh_rdac : Consolidate rdac strings together") [mkp: fixed up the new entries to align with rest of struct] Cc: NetApp RDAC team Cc: Hannes Reinecke Cc: James E.J. Bottomley Cc: Martin K. Petersen Cc: SCSI ML Cc: DM ML Signed-off-by: Xose Vazquez Perez Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_dh.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_dh.c b/drivers/scsi/scsi_dh.c index 5711d58f9e81..a8ebaeace154 100644 --- a/drivers/scsi/scsi_dh.c +++ b/drivers/scsi/scsi_dh.c @@ -58,7 +58,10 @@ static const struct scsi_dh_blist scsi_dh_blist[] = { {"IBM", "3526", "rdac", }, {"IBM", "3542", "rdac", }, {"IBM", "3552", "rdac", }, - {"SGI", "TP9", "rdac", }, + {"SGI", "TP9300", "rdac", }, + {"SGI", "TP9400", "rdac", }, + {"SGI", "TP9500", "rdac", }, + {"SGI", "TP9700", "rdac", }, {"SGI", "IS", "rdac", }, {"STK", "OPENstorage", "rdac", }, {"STK", "FLEXLINE 380", "rdac", }, From dd476b5e5b4a42130a8a4c991ba31eeceffe6057 Mon Sep 17 00:00:00 2001 From: Shivasharan S Date: Fri, 6 Apr 2018 02:02:11 -0700 Subject: [PATCH 225/519] scsi: megaraid_sas: Increase timeout by 1 sec for non-RAID fastpath IOs [ Upstream commit 3239b8cd28fd849a2023483257d35d68c5876c74 ] Hardware could time out Fastpath IOs one second earlier than the timeout provided by the host. For non-RAID devices, driver provides timeout value based on OS provided timeout value. Under certain scenarios, if the OS provides a timeout value of 1 second, due to above behavior hardware will timeout immediately. Increase timeout value for non-RAID fastpath IOs by 1 second. Signed-off-by: Shivasharan S Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/megaraid/megaraid_sas_fusion.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index 96007633ad39..213944ed64d9 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -1886,6 +1886,9 @@ megasas_build_syspd_fusion(struct megasas_instance *instance, pRAID_Context->timeoutValue = cpu_to_le16(os_timeout_value); pRAID_Context->VirtualDiskTgtId = cpu_to_le16(device_id); } else { + if (os_timeout_value) + os_timeout_value++; + /* system pd Fast Path */ io_request->Function = MPI2_FUNCTION_SCSI_IO_REQUEST; timeout_limit = (scmd->device->type == TYPE_DISK) ? From 3ff01af80d67326702a82aa80893c887a82501a2 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 6 Apr 2018 07:54:51 -0400 Subject: [PATCH 226/519] media: si470x: fix __be16 annotations [ Upstream commit 90db5c829692a0a7845e977e45719b4699216bd4 ] The annotations there are wrong as warned: drivers/media/radio/si470x/radio-si470x-i2c.c:107:35: warning: cast to restricted __be16 drivers/media/radio/si470x/radio-si470x-i2c.c:107:35: warning: cast to restricted __be16 drivers/media/radio/si470x/radio-si470x-i2c.c:107:35: warning: cast to restricted __be16 drivers/media/radio/si470x/radio-si470x-i2c.c:107:35: warning: cast to restricted __be16 drivers/media/radio/si470x/radio-si470x-i2c.c:129:24: warning: incorrect type in assignment (different base types) drivers/media/radio/si470x/radio-si470x-i2c.c:129:24: expected unsigned short [unsigned] [short] drivers/media/radio/si470x/radio-si470x-i2c.c:129:24: got restricted __be16 [usertype] drivers/media/radio/si470x/radio-si470x-i2c.c:163:39: warning: cast to restricted __be16 drivers/media/radio/si470x/radio-si470x-i2c.c:163:39: warning: cast to restricted __be16 drivers/media/radio/si470x/radio-si470x-i2c.c:163:39: warning: cast to restricted __be16 drivers/media/radio/si470x/radio-si470x-i2c.c:163:39: warning: cast to restricted __be16 Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/radio/si470x/radio-si470x-i2c.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/media/radio/si470x/radio-si470x-i2c.c b/drivers/media/radio/si470x/radio-si470x-i2c.c index 471d6a8ae8a4..9326439bc49c 100644 --- a/drivers/media/radio/si470x/radio-si470x-i2c.c +++ b/drivers/media/radio/si470x/radio-si470x-i2c.c @@ -96,7 +96,7 @@ MODULE_PARM_DESC(max_rds_errors, "RDS maximum block errors: *1*"); */ int si470x_get_register(struct si470x_device *radio, int regnr) { - u16 buf[READ_REG_NUM]; + __be16 buf[READ_REG_NUM]; struct i2c_msg msgs[1] = { { .addr = radio->client->addr, @@ -121,7 +121,7 @@ int si470x_get_register(struct si470x_device *radio, int regnr) int si470x_set_register(struct si470x_device *radio, int regnr) { int i; - u16 buf[WRITE_REG_NUM]; + __be16 buf[WRITE_REG_NUM]; struct i2c_msg msgs[1] = { { .addr = radio->client->addr, @@ -151,7 +151,7 @@ int si470x_set_register(struct si470x_device *radio, int regnr) static int si470x_get_all_registers(struct si470x_device *radio) { int i; - u16 buf[READ_REG_NUM]; + __be16 buf[READ_REG_NUM]; struct i2c_msg msgs[1] = { { .addr = radio->client->addr, From 5a3d1d67b3548e9dc1572c87527fa35b309feb0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Wed, 28 Mar 2018 15:30:37 -0700 Subject: [PATCH 227/519] drm: Add DP PSR2 sink enable bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4f212e40468650e220c1770876c7f25b8e0c1ff5 ] To comply with eDP1.4a this bit should be set when enabling PSR2. Signed-off-by: José Roberto de Souza Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20180328223046.16125-1-jose.souza@intel.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/drm/drm_dp_helper.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index bb9d0deca07c..0fb4975fae91 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -342,6 +342,7 @@ # define DP_PSR_FRAME_CAPTURE (1 << 3) # define DP_PSR_SELECTIVE_UPDATE (1 << 4) # define DP_PSR_IRQ_HPD_WITH_CRC_ERRORS (1 << 5) +# define DP_PSR_ENABLE_PSR2 (1 << 6) /* eDP 1.4a */ #define DP_ADAPTER_CTRL 0x1a0 # define DP_ADAPTER_CTRL_FORCE_LOAD_SENSE (1 << 0) From 1ed4ccaf052ec2fe00043759a284921e468687c3 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 14 Jul 2018 23:55:57 -0400 Subject: [PATCH 228/519] random: mix rdrand with entropy sent in from userspace commit 81e69df38e2911b642ec121dec319fad2a4782f3 upstream. Fedora has integrated the jitter entropy daemon to work around slow boot problems, especially on VM's that don't support virtio-rng: https://bugzilla.redhat.com/show_bug.cgi?id=1572944 It's understandable why they did this, but the Jitter entropy daemon works fundamentally on the principle: "the CPU microarchitecture is **so** complicated and we can't figure it out, so it *must* be random". Yes, it uses statistical tests to "prove" it is secure, but AES_ENCRYPT(NSA_KEY, COUNTER++) will also pass statistical tests with flying colors. So if RDRAND is available, mix it into entropy submitted from userspace. It can't hurt, and if you believe the NSA has backdoored RDRAND, then they probably have enough details about the Intel microarchitecture that they can reverse engineer how the Jitter entropy daemon affects the microarchitecture, and attack its output stream. And if RDRAND is in fact an honest DRNG, it will immeasurably improve on what the Jitter entropy daemon might produce. This also provides some protection against someone who is able to read or set the entropy seed file. Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Cc: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index dffd06a3bb76..2916d08ee30e 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1503,14 +1503,22 @@ static int write_pool(struct entropy_store *r, const char __user *buffer, size_t count) { size_t bytes; - __u32 buf[16]; + __u32 t, buf[16]; const char __user *p = buffer; while (count > 0) { + int b, i = 0; + bytes = min(count, sizeof(buf)); if (copy_from_user(&buf, p, bytes)) return -EFAULT; + for (b = bytes ; b > 0 ; b -= sizeof(__u32), i++) { + if (!arch_get_random_int(&t)) + break; + buf[i] ^= t; + } + count -= bytes; p += bytes; From d0f02f70b31306f98619aa7532613008507deda5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 29 Jul 2018 12:44:46 -0700 Subject: [PATCH 229/519] squashfs: be more careful about metadata corruption commit 01cfb7937a9af2abb1136c7e89fbf3fd92952956 upstream. Anatoly Trosinenko reports that a corrupted squashfs image can cause a kernel oops. It turns out that squashfs can end up being confused about negative fragment lengths. The regular squashfs_read_data() does check for negative lengths, but squashfs_read_metadata() did not, and the fragment size code just blindly trusted the on-disk value. Fix both the fragment parsing and the metadata reading code. Reported-by: Anatoly Trosinenko Cc: Al Viro Cc: Phillip Lougher Cc: stable@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/squashfs/cache.c | 3 +++ fs/squashfs/file.c | 8 ++++++-- fs/squashfs/fragment.c | 4 +--- fs/squashfs/squashfs_fs.h | 6 ++++++ 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c index 1cb70a0b2168..91ce49c05b7c 100644 --- a/fs/squashfs/cache.c +++ b/fs/squashfs/cache.c @@ -350,6 +350,9 @@ int squashfs_read_metadata(struct super_block *sb, void *buffer, TRACE("Entered squashfs_read_metadata [%llx:%x]\n", *block, *offset); + if (unlikely(length < 0)) + return -EIO; + while (length) { entry = squashfs_cache_get(sb, msblk->block_cache, *block, 0); if (entry->error) { diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index e5c9689062ba..1ec7bae2751d 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -194,7 +194,11 @@ static long long read_indexes(struct super_block *sb, int n, } for (i = 0; i < blocks; i++) { - int size = le32_to_cpu(blist[i]); + int size = squashfs_block_size(blist[i]); + if (size < 0) { + err = size; + goto failure; + } block += SQUASHFS_COMPRESSED_SIZE_BLOCK(size); } n -= blocks; @@ -367,7 +371,7 @@ static int read_blocklist(struct inode *inode, int index, u64 *block) sizeof(size)); if (res < 0) return res; - return le32_to_cpu(size); + return squashfs_block_size(size); } /* Copy data into page cache */ diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c index 0ed6edbc5c71..86ad9a4b8c36 100644 --- a/fs/squashfs/fragment.c +++ b/fs/squashfs/fragment.c @@ -61,9 +61,7 @@ int squashfs_frag_lookup(struct super_block *sb, unsigned int fragment, return size; *fragment_block = le64_to_cpu(fragment_entry.start_block); - size = le32_to_cpu(fragment_entry.size); - - return size; + return squashfs_block_size(fragment_entry.size); } diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h index 506f4ba5b983..e66486366f02 100644 --- a/fs/squashfs/squashfs_fs.h +++ b/fs/squashfs/squashfs_fs.h @@ -129,6 +129,12 @@ #define SQUASHFS_COMPRESSED_BLOCK(B) (!((B) & SQUASHFS_COMPRESSED_BIT_BLOCK)) +static inline int squashfs_block_size(__le32 raw) +{ + u32 size = le32_to_cpu(raw); + return (size >> 25) ? -EIO : size; +} + /* * Inode number ops. Inodes consist of a compressed block number, and an * uncompressed offset within that block From 5afdb4536020da5625872ea0a642fbeaf8869ada Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 10 Jul 2018 01:07:43 -0400 Subject: [PATCH 230/519] ext4: fix inline data updates with checksums enabled commit 362eca70b53389bddf3143fe20f53dcce2cfdf61 upstream. The inline data code was updating the raw inode directly; this is problematic since if metadata checksums are enabled, ext4_mark_inode_dirty() must be called to update the inode's checksum. In addition, the jbd2 layer requires that get_write_access() be called before the metadata buffer is modified. Fix both of these problems. https://bugzilla.kernel.org/show_bug.cgi?id=200443 Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inline.c | 19 +++++++++++-------- fs/ext4/inode.c | 16 +++++++--------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index e72f53a89764..c449bc089c94 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -678,6 +678,10 @@ int ext4_try_to_write_inline_data(struct address_space *mapping, goto convert; } + ret = ext4_journal_get_write_access(handle, iloc.bh); + if (ret) + goto out; + flags |= AOP_FLAG_NOFS; page = grab_cache_page_write_begin(mapping, 0, flags); @@ -706,7 +710,7 @@ int ext4_try_to_write_inline_data(struct address_space *mapping, out_up_read: up_read(&EXT4_I(inode)->xattr_sem); out: - if (handle) + if (handle && (ret != 1)) ext4_journal_stop(handle); brelse(iloc.bh); return ret; @@ -748,6 +752,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, ext4_write_unlock_xattr(inode, &no_expand); brelse(iloc.bh); + mark_inode_dirty(inode); out: return copied; } @@ -894,7 +899,6 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping, goto out; } - page = grab_cache_page_write_begin(mapping, 0, flags); if (!page) { ret = -ENOMEM; @@ -912,6 +916,9 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping, if (ret < 0) goto out_release_page; } + ret = ext4_journal_get_write_access(handle, iloc.bh); + if (ret) + goto out_release_page; up_read(&EXT4_I(inode)->xattr_sem); *pagep = page; @@ -932,7 +939,6 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, unsigned copied, struct page *page) { - int i_size_changed = 0; int ret; ret = ext4_write_inline_data_end(inode, pos, len, copied, page); @@ -950,10 +956,8 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos, * But it's important to update i_size while still holding page lock: * page writeout could otherwise come in and zero beyond i_size. */ - if (pos+copied > inode->i_size) { + if (pos+copied > inode->i_size) i_size_write(inode, pos+copied); - i_size_changed = 1; - } unlock_page(page); page_cache_release(page); @@ -963,8 +967,7 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos, * ordering of page lock and transaction start for journaling * filesystems. */ - if (i_size_changed) - mark_inode_dirty(inode); + mark_inode_dirty(inode); return copied; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index b507de0e4bbf..181db3c7f5d1 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1164,9 +1164,10 @@ static int ext4_write_end(struct file *file, loff_t old_size = inode->i_size; int ret = 0, ret2; int i_size_changed = 0; + int inline_data = ext4_has_inline_data(inode); trace_ext4_write_end(inode, pos, len, copied); - if (ext4_has_inline_data(inode)) { + if (inline_data) { ret = ext4_write_inline_data_end(inode, pos, len, copied, page); if (ret < 0) { @@ -1194,7 +1195,7 @@ static int ext4_write_end(struct file *file, * ordering of page lock and transaction start for journaling * filesystems. */ - if (i_size_changed) + if (i_size_changed || inline_data) ext4_mark_inode_dirty(handle, inode); if (pos + len > inode->i_size && ext4_can_truncate(inode)) @@ -1268,6 +1269,7 @@ static int ext4_journalled_write_end(struct file *file, int partial = 0; unsigned from, to; int size_changed = 0; + int inline_data = ext4_has_inline_data(inode); trace_ext4_journalled_write_end(inode, pos, len, copied); from = pos & (PAGE_CACHE_SIZE - 1); @@ -1275,7 +1277,7 @@ static int ext4_journalled_write_end(struct file *file, BUG_ON(!ext4_handle_valid(handle)); - if (ext4_has_inline_data(inode)) { + if (inline_data) { ret = ext4_write_inline_data_end(inode, pos, len, copied, page); if (ret < 0) { @@ -1306,7 +1308,7 @@ static int ext4_journalled_write_end(struct file *file, if (old_size < pos) pagecache_isize_extended(inode, old_size, pos); - if (size_changed) { + if (size_changed || inline_data) { ret2 = ext4_mark_inode_dirty(handle, inode); if (!ret) ret = ret2; @@ -1804,11 +1806,7 @@ static int __ext4_journalled_writepage(struct page *page, } if (inline_data) { - BUFFER_TRACE(inode_bh, "get write access"); - ret = ext4_journal_get_write_access(handle, inode_bh); - - err = ext4_handle_dirty_metadata(handle, inode, inode_bh); - + ret = ext4_mark_inode_dirty(handle, inode); } else { ret = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL, do_journal_get_write_access); From a66e985716add0a2bae8b94e8f18ecaf59bd44eb Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 12 Jul 2018 19:08:05 -0400 Subject: [PATCH 231/519] ext4: check for allocation block validity with block group locked commit 8d5a803c6a6ce4ec258e31f76059ea5153ba46ef upstream. With commit 044e6e3d74a3: "ext4: don't update checksum of new initialized bitmaps" the buffer valid bit will get set without actually setting up the checksum for the allocation bitmap, since the checksum will get calculated once we actually allocate an inode or block. If we are doing this, then we need to (re-)check the verified bit after we take the block group lock. Otherwise, we could race with another process reading and verifying the bitmap, which would then complain about the checksum being invalid. https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1780137 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/balloc.c | 3 +++ fs/ext4/ialloc.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 092da164bdc0..e0fb7cdcee89 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -378,6 +378,8 @@ static int ext4_validate_block_bitmap(struct super_block *sb, return -EFSCORRUPTED; ext4_lock_group(sb, block_group); + if (buffer_verified(bh)) + goto verified; if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group, desc, bh))) { ext4_unlock_group(sb, block_group); @@ -400,6 +402,7 @@ static int ext4_validate_block_bitmap(struct super_block *sb, return -EFSCORRUPTED; } set_buffer_verified(bh); +verified: ext4_unlock_group(sb, block_group); return 0; } diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 48d818eba9c3..041117fd8fd7 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -88,6 +88,8 @@ static int ext4_validate_inode_bitmap(struct super_block *sb, return -EFSCORRUPTED; ext4_lock_group(sb, block_group); + if (buffer_verified(bh)) + goto verified; blk = ext4_inode_bitmap(sb, desc); if (!ext4_inode_bitmap_csum_verify(sb, block_group, desc, bh, EXT4_INODES_PER_GROUP(sb) / 8)) { @@ -105,6 +107,7 @@ static int ext4_validate_inode_bitmap(struct super_block *sb, return -EFSBADCRC; } set_buffer_verified(bh); +verified: ext4_unlock_group(sb, block_group); return 0; } From b4cb2f0fd456f4eca58c9a84639aa8219c319cda Mon Sep 17 00:00:00 2001 From: Eric Engestrom Date: Mon, 25 Apr 2016 10:47:56 +0100 Subject: [PATCH 232/519] dmaengine: pxa_dma: remove duplicate const qualifier commit 4e0def887d717598ae8062b46e55f9e00d3a5783 upstream. Signed-off-by: Eric Engestrom Acked-by: Robert Jarzmik Signed-off-by: Vinod Koul Cc: Andrea Adami Signed-off-by: Greg Kroah-Hartman --- drivers/dma/pxa_dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c index 55f5d33f6dc7..4251e9ac0373 100644 --- a/drivers/dma/pxa_dma.c +++ b/drivers/dma/pxa_dma.c @@ -1321,7 +1321,7 @@ static int pxad_init_phys(struct platform_device *op, return 0; } -static const struct of_device_id const pxad_dt_ids[] = { +static const struct of_device_id pxad_dt_ids[] = { { .compatible = "marvell,pdma-1.0", }, {} }; From bb0376b6b6705503bd9b5706816213d260e1bde9 Mon Sep 17 00:00:00 2001 From: Andrea Adami Date: Fri, 6 May 2016 17:27:34 +0200 Subject: [PATCH 233/519] ASoC: pxa: Fix module autoload for platform drivers commit e5b7d71aa5b32180adec49a17c752e577c68f740 upstream. These platform drivers are lacking MODULE_ALIAS so module autoloading doesn't work. Tested on corgi and poodle with kernel 4.4. Signed-off-by: Andrea Adami Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/pxa/brownstone.c | 1 + sound/soc/pxa/mioa701_wm9713.c | 1 + sound/soc/pxa/mmp-pcm.c | 1 + sound/soc/pxa/mmp-sspa.c | 1 + sound/soc/pxa/palm27x.c | 1 + sound/soc/pxa/pxa-ssp.c | 1 + sound/soc/pxa/pxa2xx-ac97.c | 1 + sound/soc/pxa/pxa2xx-pcm.c | 1 + 8 files changed, 8 insertions(+) diff --git a/sound/soc/pxa/brownstone.c b/sound/soc/pxa/brownstone.c index 6147e86e9b0f..55ca9c9364b8 100644 --- a/sound/soc/pxa/brownstone.c +++ b/sound/soc/pxa/brownstone.c @@ -136,3 +136,4 @@ module_platform_driver(mmp_driver); MODULE_AUTHOR("Leo Yan "); MODULE_DESCRIPTION("ALSA SoC Brownstone"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:brownstone-audio"); diff --git a/sound/soc/pxa/mioa701_wm9713.c b/sound/soc/pxa/mioa701_wm9713.c index 29bc60e85e92..6cd28f95d548 100644 --- a/sound/soc/pxa/mioa701_wm9713.c +++ b/sound/soc/pxa/mioa701_wm9713.c @@ -203,3 +203,4 @@ module_platform_driver(mioa701_wm9713_driver); MODULE_AUTHOR("Robert Jarzmik (rjarzmik@free.fr)"); MODULE_DESCRIPTION("ALSA SoC WM9713 MIO A701"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:mioa701-wm9713"); diff --git a/sound/soc/pxa/mmp-pcm.c b/sound/soc/pxa/mmp-pcm.c index 51e790d006f5..96df9b2d8fc4 100644 --- a/sound/soc/pxa/mmp-pcm.c +++ b/sound/soc/pxa/mmp-pcm.c @@ -248,3 +248,4 @@ module_platform_driver(mmp_pcm_driver); MODULE_AUTHOR("Leo Yan "); MODULE_DESCRIPTION("MMP Soc Audio DMA module"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:mmp-pcm-audio"); diff --git a/sound/soc/pxa/mmp-sspa.c b/sound/soc/pxa/mmp-sspa.c index eca60c29791a..ca8b23f8c525 100644 --- a/sound/soc/pxa/mmp-sspa.c +++ b/sound/soc/pxa/mmp-sspa.c @@ -482,3 +482,4 @@ module_platform_driver(asoc_mmp_sspa_driver); MODULE_AUTHOR("Leo Yan "); MODULE_DESCRIPTION("MMP SSPA SoC Interface"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:mmp-sspa-dai"); diff --git a/sound/soc/pxa/palm27x.c b/sound/soc/pxa/palm27x.c index 4e74d9573f03..bcc81e920a67 100644 --- a/sound/soc/pxa/palm27x.c +++ b/sound/soc/pxa/palm27x.c @@ -161,3 +161,4 @@ module_platform_driver(palm27x_wm9712_driver); MODULE_AUTHOR("Marek Vasut "); MODULE_DESCRIPTION("ALSA SoC Palm T|X, T5 and LifeDrive"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:palm27x-asoc"); diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c index da03fad1b9cd..3cad990dad2c 100644 --- a/sound/soc/pxa/pxa-ssp.c +++ b/sound/soc/pxa/pxa-ssp.c @@ -833,3 +833,4 @@ module_platform_driver(asoc_ssp_driver); MODULE_AUTHOR("Mark Brown "); MODULE_DESCRIPTION("PXA SSP/PCM SoC Interface"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:pxa-ssp-dai"); diff --git a/sound/soc/pxa/pxa2xx-ac97.c b/sound/soc/pxa/pxa2xx-ac97.c index f3de615aacd7..9615e6de1306 100644 --- a/sound/soc/pxa/pxa2xx-ac97.c +++ b/sound/soc/pxa/pxa2xx-ac97.c @@ -287,3 +287,4 @@ module_platform_driver(pxa2xx_ac97_driver); MODULE_AUTHOR("Nicolas Pitre"); MODULE_DESCRIPTION("AC97 driver for the Intel PXA2xx chip"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:pxa2xx-ac97"); diff --git a/sound/soc/pxa/pxa2xx-pcm.c b/sound/soc/pxa/pxa2xx-pcm.c index 9f390398d518..410d48b93031 100644 --- a/sound/soc/pxa/pxa2xx-pcm.c +++ b/sound/soc/pxa/pxa2xx-pcm.c @@ -117,3 +117,4 @@ module_platform_driver(pxa_pcm_driver); MODULE_AUTHOR("Nicolas Pitre"); MODULE_DESCRIPTION("Intel PXA2xx PCM DMA module"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:pxa-pcm-audio"); From 33fbeee10538755e4955a18da0dde690be02073b Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 27 Jul 2018 18:15:46 +0200 Subject: [PATCH 234/519] ipv4: remove BUG_ON() from fib_compute_spec_dst [ Upstream commit 9fc12023d6f51551d6ca9ed7e02ecc19d79caf17 ] Remove BUG_ON() from fib_compute_spec_dst routine and check in_dev pointer during flowi4 data structure initialization. fib_compute_spec_dst routine can be run concurrently with device removal where ip_ptr net_device pointer is set to NULL. This can happen if userspace enables pkt info on UDP rx socket and the device is removed while traffic is flowing Fixes: 35ebf65e851c ("ipv4: Create and use fib_compute_spec_dst() helper") Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fib_frontend.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 8f05816a8be2..015c33712803 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -289,19 +289,19 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) return ip_hdr(skb)->daddr; in_dev = __in_dev_get_rcu(dev); - BUG_ON(!in_dev); net = dev_net(dev); scope = RT_SCOPE_UNIVERSE; if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) { + bool vmark = in_dev && IN_DEV_SRC_VMARK(in_dev); struct flowi4 fl4 = { .flowi4_iif = LOOPBACK_IFINDEX, .flowi4_oif = l3mdev_master_ifindex_rcu(dev), .daddr = ip_hdr(skb)->saddr, .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), .flowi4_scope = scope, - .flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0, + .flowi4_mark = vmark ? skb->mark : 0, }; if (!fib_lookup(net, &fl4, &res, 0)) return FIB_RES_PREFSRC(net, res); From 3c3deb06ebda981b65be2abfd5980e23b1519dff Mon Sep 17 00:00:00 2001 From: tangpengpeng Date: Thu, 26 Jul 2018 14:45:16 +0800 Subject: [PATCH 235/519] net: fix amd-xgbe flow-control issue [ Upstream commit 7f3fc7ddf719cd6faaf787722c511f6918ac6aab ] If we enable or disable xgbe flow-control by ethtool , it does't work.Because the parameter is not properly assigned,so we need to adjust the assignment order of the parameters. Fixes: c1ce2f77366b ("amd-xgbe: Fix flow control setting logic") Signed-off-by: tangpengpeng Acked-by: Tom Lendacky Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/amd/xgbe/xgbe-mdio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 446058081866..7a0ab4c44ee4 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -872,14 +872,14 @@ static void xgbe_phy_adjust_link(struct xgbe_prv_data *pdata) if (pdata->tx_pause != pdata->phy.tx_pause) { new_state = 1; - pdata->hw_if.config_tx_flow_control(pdata); pdata->tx_pause = pdata->phy.tx_pause; + pdata->hw_if.config_tx_flow_control(pdata); } if (pdata->rx_pause != pdata->phy.rx_pause) { new_state = 1; - pdata->hw_if.config_rx_flow_control(pdata); pdata->rx_pause = pdata->phy.rx_pause; + pdata->hw_if.config_rx_flow_control(pdata); } /* Speed support */ From d1fc12d8475ce4538b03f4901d6a65b639700e27 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sat, 28 Jul 2018 09:52:10 +0200 Subject: [PATCH 236/519] net: lan78xx: fix rx handling before first packet is send [ Upstream commit 136f55f660192ce04af091642efc75d85e017364 ] As long the bh tasklet isn't scheduled once, no packet from the rx path will be handled. Since the tx path also schedule the same tasklet this situation only persits until the first packet transmission. So fix this issue by scheduling the tasklet after link reset. Link: https://github.com/raspberrypi/linux/issues/2617 Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet") Suggested-by: Floris Bos Signed-off-by: Stefan Wahren Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/lan78xx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index a6d429950cb0..acec4b565511 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1361,6 +1361,8 @@ static void lan78xx_init_mac_address(struct lan78xx_net *dev) netif_dbg(dev, ifup, dev->net, "MAC address set to random addr"); } + + tasklet_schedule(&dev->bh); } ret = lan78xx_write_reg(dev, MAF_LO(0), addr_lo); From f6384b2517429e1666a79862fef895de3e2a2cd4 Mon Sep 17 00:00:00 2001 From: Xiao Liang Date: Fri, 27 Jul 2018 17:56:08 +0800 Subject: [PATCH 237/519] xen-netfront: wait xenbus state change when load module manually [ Upstream commit 822fb18a82abaf4ee7058793d95d340f5dab7bfc ] When loading module manually, after call xenbus_switch_state to initializes the state of the netfront device, the driver state did not change so fast that may lead no dev created in latest kernel. This patch adds wait to make sure xenbus knows the driver is not in closed/unknown state. Current state: [vm]# ethtool eth0 Settings for eth0: Link detected: yes [vm]# modprobe -r xen_netfront [vm]# modprobe xen_netfront [vm]# ethtool eth0 Settings for eth0: Cannot get device settings: No such device Cannot get wake-on-lan settings: No such device Cannot get message level: No such device Cannot get link status: No such device No data available With the patch installed. [vm]# ethtool eth0 Settings for eth0: Link detected: yes [vm]# modprobe -r xen_netfront [vm]# modprobe xen_netfront [vm]# ethtool eth0 Settings for eth0: Link detected: yes Signed-off-by: Xiao Liang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 3bb3d6d9117c..bec9f099573b 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -86,6 +86,7 @@ struct netfront_cb { /* IRQ name is queue name with "-tx" or "-rx" appended */ #define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3) +static DECLARE_WAIT_QUEUE_HEAD(module_load_q); static DECLARE_WAIT_QUEUE_HEAD(module_unload_q); struct netfront_stats { @@ -1335,6 +1336,11 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) netif_carrier_off(netdev); xenbus_switch_state(dev, XenbusStateInitialising); + wait_event(module_load_q, + xenbus_read_driver_state(dev->otherend) != + XenbusStateClosed && + xenbus_read_driver_state(dev->otherend) != + XenbusStateUnknown); return netdev; exit: From 6f4a86ce5d04d57b17b7b987eb27038bf9c3a1a5 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Thu, 26 Jul 2018 15:05:37 +0300 Subject: [PATCH 238/519] NET: stmmac: align DMA stuff to largest cache line length [ Upstream commit 9939a46d90c6c76f4533d534dbadfa7b39dc6acc ] As for today STMMAC_ALIGN macro (which is used to align DMA stuff) relies on L1 line length (L1_CACHE_BYTES). This isn't correct in case of system with several cache levels which might have L1 cache line length smaller than L2 line. This can lead to sharing one cache line between DMA buffer and other data, so we can lose this data while invalidate DMA buffer before DMA transaction. Fix that by using SMP_CACHE_BYTES instead of L1_CACHE_BYTES for aligning. Signed-off-by: Eugeniy Paltsev Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 5adaf537513b..7bba30f24135 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -54,7 +54,7 @@ #include #include -#define STMMAC_ALIGN(x) L1_CACHE_ALIGN(x) +#define STMMAC_ALIGN(x) __ALIGN_KERNEL(x, SMP_CACHE_BYTES) /* Module parameters */ #define TX_TIMEO 5000 From e2f337e2bd4efe32051a496a7fcdd94ea67c0cfa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 17 May 2018 14:47:25 -0700 Subject: [PATCH 239/519] tcp: do not force quickack when receiving out-of-order packets [ Upstream commit a3893637e1eb0ef5eb1bbc52b3a8d2dfa317a35d ] As explained in commit 9f9843a751d0 ("tcp: properly handle stretch acks in slow start"), TCP stacks have to consider how many packets are acknowledged in one single ACK, because of GRO, but also because of ACK compression or losses. We plan to add SACK compression in the following patch, we must therefore not call tcp_enter_quickack_mode() Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5c645069a09a..2315701036fa 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4649,8 +4649,6 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp))) goto out_of_window; - tcp_enter_quickack_mode(sk); - if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { /* Partial packet, seq < rcv_next < end_seq */ SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n", From 2b30c04bc6f9e7be2d9a5e1b504faa904154c7da Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 21 May 2018 15:08:56 -0700 Subject: [PATCH 240/519] tcp: add max_quickacks param to tcp_incr_quickack and tcp_enter_quickack_mode [ Upstream commit 9a9c9b51e54618861420093ae6e9b50a961914c5 ] We want to add finer control of the number of ACK packets sent after ECN events. This patch is not changing current behavior, it only enables following change. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/tcp.h | 2 +- net/ipv4/tcp_dctcp.c | 4 ++-- net/ipv4/tcp_input.c | 24 +++++++++++++----------- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 65babd8a682d..cac4a6ad5db3 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -376,7 +376,7 @@ ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); -void tcp_enter_quickack_mode(struct sock *sk); +void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks); static inline void tcp_dec_quickack_mode(struct sock *sk, const unsigned int pkts) { diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index e63b764e55ea..6300edf90e60 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -138,7 +138,7 @@ static void dctcp_ce_state_0_to_1(struct sock *sk) */ if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) __tcp_send_ack(sk, ca->prior_rcv_nxt); - tcp_enter_quickack_mode(sk); + tcp_enter_quickack_mode(sk, 1); } ca->prior_rcv_nxt = tp->rcv_nxt; @@ -159,7 +159,7 @@ static void dctcp_ce_state_1_to_0(struct sock *sk) */ if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) __tcp_send_ack(sk, ca->prior_rcv_nxt); - tcp_enter_quickack_mode(sk); + tcp_enter_quickack_mode(sk, 1); } ca->prior_rcv_nxt = tp->rcv_nxt; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2315701036fa..5f0ef0b7ef37 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -176,21 +176,23 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) } } -static void tcp_incr_quickack(struct sock *sk) +static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks) { struct inet_connection_sock *icsk = inet_csk(sk); unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss); if (quickacks == 0) quickacks = 2; + quickacks = min(quickacks, max_quickacks); if (quickacks > icsk->icsk_ack.quick) - icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS); + icsk->icsk_ack.quick = quickacks; } -void tcp_enter_quickack_mode(struct sock *sk) +void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) { struct inet_connection_sock *icsk = inet_csk(sk); - tcp_incr_quickack(sk); + + tcp_incr_quickack(sk, max_quickacks); icsk->icsk_ack.pingpong = 0; icsk->icsk_ack.ato = TCP_ATO_MIN; } @@ -235,7 +237,7 @@ static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb) * it is probably a retransmit. */ if (tp->ecn_flags & TCP_ECN_SEEN) - tcp_enter_quickack_mode((struct sock *)tp); + tcp_enter_quickack_mode((struct sock *)tp, TCP_MAX_QUICKACKS); break; case INET_ECN_CE: if (tcp_ca_needs_ecn((struct sock *)tp)) @@ -243,7 +245,7 @@ static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb) if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { /* Better not delay acks, sender can have a very low cwnd */ - tcp_enter_quickack_mode((struct sock *)tp); + tcp_enter_quickack_mode((struct sock *)tp, TCP_MAX_QUICKACKS); tp->ecn_flags |= TCP_ECN_DEMAND_CWR; } tp->ecn_flags |= TCP_ECN_SEEN; @@ -651,7 +653,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) /* The _first_ data packet received, initialize * delayed ACK engine. */ - tcp_incr_quickack(sk); + tcp_incr_quickack(sk, TCP_MAX_QUICKACKS); icsk->icsk_ack.ato = TCP_ATO_MIN; } else { int m = now - icsk->icsk_ack.lrcvtime; @@ -667,7 +669,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) /* Too long gap. Apparently sender failed to * restart window, so that we send ACKs quickly. */ - tcp_incr_quickack(sk); + tcp_incr_quickack(sk, TCP_MAX_QUICKACKS); sk_mem_reclaim(sk); } } @@ -4136,7 +4138,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); - tcp_enter_quickack_mode(sk); + tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); if (tcp_is_sack(tp) && sysctl_tcp_dsack) { u32 end_seq = TCP_SKB_CB(skb)->end_seq; @@ -4638,7 +4640,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); out_of_window: - tcp_enter_quickack_mode(sk); + tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); inet_csk_schedule_ack(sk); drop: __kfree_skb(skb); @@ -5674,7 +5676,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * to stand against the temptation 8) --ANK */ inet_csk_schedule_ack(sk); - tcp_enter_quickack_mode(sk); + tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX, TCP_RTO_MAX); From 96b792d199d17545d6a53faf44b9c91d038f1ab3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 21 May 2018 15:08:57 -0700 Subject: [PATCH 241/519] tcp: do not aggressively quick ack after ECN events [ Upstream commit 522040ea5fdd1c33bbf75e1d7c7c0422b96a94ef ] ECN signals currently forces TCP to enter quickack mode for up to 16 (TCP_MAX_QUICKACKS) following incoming packets. We believe this is not needed, and only sending one immediate ack for the current packet should be enough. This should reduce the extra load noticed in DCTCP environments, after congestion events. This is part 2 of our effort to reduce pure ACK packets. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5f0ef0b7ef37..3dc61a64022b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -237,7 +237,7 @@ static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb) * it is probably a retransmit. */ if (tp->ecn_flags & TCP_ECN_SEEN) - tcp_enter_quickack_mode((struct sock *)tp, TCP_MAX_QUICKACKS); + tcp_enter_quickack_mode((struct sock *)tp, 1); break; case INET_ECN_CE: if (tcp_ca_needs_ecn((struct sock *)tp)) @@ -245,7 +245,7 @@ static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb) if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { /* Better not delay acks, sender can have a very low cwnd */ - tcp_enter_quickack_mode((struct sock *)tp, TCP_MAX_QUICKACKS); + tcp_enter_quickack_mode((struct sock *)tp, 1); tp->ecn_flags |= TCP_ECN_DEMAND_CWR; } tp->ecn_flags |= TCP_ECN_SEEN; From cd760ab9f4e13aedccc80f19a0b7863d5c0b3c8c Mon Sep 17 00:00:00 2001 From: Yousuk Seung Date: Mon, 4 Jun 2018 15:29:51 -0700 Subject: [PATCH 242/519] tcp: refactor tcp_ecn_check_ce to remove sk type cast [ Upstream commit f4c9f85f3b2cb7669830cd04d0be61192a4d2436 ] Refactor tcp_ecn_check_ce and __tcp_ecn_check_ce to accept struct sock* instead of tcp_sock* to clean up type casts. This is a pure refactor patch. Signed-off-by: Yousuk Seung Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3dc61a64022b..0754e30abfab 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -228,8 +228,10 @@ static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp) tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; } -static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb) +static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) { + struct tcp_sock *tp = tcp_sk(sk); + switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) { case INET_ECN_NOT_ECT: /* Funny extension: if ECT is not set on a segment, @@ -237,31 +239,31 @@ static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb) * it is probably a retransmit. */ if (tp->ecn_flags & TCP_ECN_SEEN) - tcp_enter_quickack_mode((struct sock *)tp, 1); + tcp_enter_quickack_mode(sk, 1); break; case INET_ECN_CE: - if (tcp_ca_needs_ecn((struct sock *)tp)) - tcp_ca_event((struct sock *)tp, CA_EVENT_ECN_IS_CE); + if (tcp_ca_needs_ecn(sk)) + tcp_ca_event(sk, CA_EVENT_ECN_IS_CE); if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { /* Better not delay acks, sender can have a very low cwnd */ - tcp_enter_quickack_mode((struct sock *)tp, 1); + tcp_enter_quickack_mode(sk, 1); tp->ecn_flags |= TCP_ECN_DEMAND_CWR; } tp->ecn_flags |= TCP_ECN_SEEN; break; default: - if (tcp_ca_needs_ecn((struct sock *)tp)) - tcp_ca_event((struct sock *)tp, CA_EVENT_ECN_NO_CE); + if (tcp_ca_needs_ecn(sk)) + tcp_ca_event(sk, CA_EVENT_ECN_NO_CE); tp->ecn_flags |= TCP_ECN_SEEN; break; } } -static void tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb) +static void tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) { - if (tp->ecn_flags & TCP_ECN_OK) - __tcp_ecn_check_ce(tp, skb); + if (tcp_sk(sk)->ecn_flags & TCP_ECN_OK) + __tcp_ecn_check_ce(sk, skb); } static void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th) @@ -675,7 +677,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) } icsk->icsk_ack.lrcvtime = now; - tcp_ecn_check_ce(tp, skb); + tcp_ecn_check_ce(sk, skb); if (skb->len >= 128) tcp_grow_window(sk, skb); @@ -4366,7 +4368,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) struct sk_buff *skb1; u32 seq, end_seq; - tcp_ecn_check_ce(tp, skb); + tcp_ecn_check_ce(sk, skb); if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP); From 27a0762cb570834dc44155363c118cabdd024c3c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Jun 2018 08:47:21 -0700 Subject: [PATCH 243/519] tcp: add one more quick ack after after ECN events [ Upstream commit 15ecbe94a45ef88491ca459b26efdd02f91edb6d ] Larry Brakmo proposal ( https://patchwork.ozlabs.org/patch/935233/ tcp: force cwnd at least 2 in tcp_cwnd_reduction) made us rethink about our recent patch removing ~16 quick acks after ECN events. tcp_enter_quickack_mode(sk, 1) makes sure one immediate ack is sent, but in the case the sender cwnd was lowered to 1, we do not want to have a delayed ack for the next packet we will receive. Fixes: 522040ea5fdd ("tcp: do not aggressively quick ack after ECN events") Signed-off-by: Eric Dumazet Reported-by: Neal Cardwell Cc: Lawrence Brakmo Acked-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0754e30abfab..4a261e078082 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -239,7 +239,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) * it is probably a retransmit. */ if (tp->ecn_flags & TCP_ECN_SEEN) - tcp_enter_quickack_mode(sk, 1); + tcp_enter_quickack_mode(sk, 2); break; case INET_ECN_CE: if (tcp_ca_needs_ecn(sk)) @@ -247,7 +247,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { /* Better not delay acks, sender can have a very low cwnd */ - tcp_enter_quickack_mode(sk, 1); + tcp_enter_quickack_mode(sk, 2); tp->ecn_flags |= TCP_ECN_DEMAND_CWR; } tp->ecn_flags |= TCP_ECN_SEEN; From df30bfccc463082cfc2a5b164e5590403f16af94 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Jul 2018 20:09:11 -0700 Subject: [PATCH 244/519] inet: frag: enforce memory limits earlier [ Upstream commit 56e2c94f055d328f5f6b0a5c1721cca2f2d4e0a1 ] We currently check current frags memory usage only when a new frag queue is created. This allows attackers to first consume the memory budget (default : 4 MB) creating thousands of frag queues, then sending tiny skbs to exceed high_thresh limit by 2 to 3 order of magnitude. Note that before commit 648700f76b03 ("inet: frags: use rhashtables for reassembly units"), work queue could be starved under DOS, getting no cpu cycles. After commit 648700f76b03, only the per frag queue timer can eventually remove an incomplete frag queue and its skbs. Fixes: b13d3cbfb8e8 ("inet: frag: move eviction of queues to work queue") Signed-off-by: Eric Dumazet Reported-by: Jann Horn Cc: Florian Westphal Cc: Peter Oskolkov Cc: Paolo Abeni Acked-by: Florian Westphal Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/inet_fragment.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index b34fa1bb278f..b2001b20e029 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -364,11 +364,6 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, { struct inet_frag_queue *q; - if (frag_mem_limit(nf) > nf->high_thresh) { - inet_frag_schedule_worker(f); - return NULL; - } - q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC); if (!q) return NULL; @@ -405,6 +400,11 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, struct inet_frag_queue *q; int depth = 0; + if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) { + inet_frag_schedule_worker(f); + return NULL; + } + if (frag_mem_limit(nf) > nf->low_thresh) inet_frag_schedule_worker(f); From b5fef54e3212e2ec1449a4445becb25208c9410c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 31 Jul 2018 17:12:52 -0700 Subject: [PATCH 245/519] net: dsa: Do not suspend/resume closed slave_dev [ Upstream commit a94c689e6c9e72e722f28339e12dff191ee5a265 ] If a DSA slave network device was previously disabled, there is no need to suspend or resume it. Fixes: 2446254915a7 ("net: dsa: allow switch drivers to implement suspend/resume hooks") Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dsa/slave.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 554c2a961ad5..48b28a7ecc7a 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1099,6 +1099,9 @@ int dsa_slave_suspend(struct net_device *slave_dev) { struct dsa_slave_priv *p = netdev_priv(slave_dev); + if (!netif_running(slave_dev)) + return 0; + netif_device_detach(slave_dev); if (p->phy) { @@ -1116,6 +1119,9 @@ int dsa_slave_resume(struct net_device *slave_dev) { struct dsa_slave_priv *p = netdev_priv(slave_dev); + if (!netif_running(slave_dev)) + return 0; + netif_device_attach(slave_dev); if (p->phy) { From 8cac0ce0a8853cae7dc01256f88bc9b7e53ad3ce Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Tue, 31 Jul 2018 21:13:16 +0000 Subject: [PATCH 246/519] netlink: Fix spectre v1 gadget in netlink_create() [ Upstream commit bc5b6c0b62b932626a135f516a41838c510c6eba ] 'protocol' is a user-controlled value, so sanitize it after the bounds check to avoid using it for speculative out-of-bounds access to arrays indexed by it. This addresses the following accesses detected with the help of smatch: * net/netlink/af_netlink.c:654 __netlink_create() warn: potential spectre issue 'nlk_cb_mutex_keys' [w] * net/netlink/af_netlink.c:654 __netlink_create() warn: potential spectre issue 'nlk_cb_mutex_key_strings' [w] * net/netlink/af_netlink.c:685 netlink_create() warn: potential spectre issue 'nl_table' [w] (local cap) Cc: Josh Poimboeuf Signed-off-by: Jeremy Cline Reviewed-by: Josh Poimboeuf Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 818400fddc9b..9708fff318d5 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include @@ -654,6 +655,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, if (protocol < 0 || protocol >= MAX_LINKS) return -EPROTONOSUPPORT; + protocol = array_index_nospec(protocol, MAX_LINKS); netlink_lock_table(); #ifdef CONFIG_MODULES From dac2939e629e092b9c65a6242f1b1c018e811dc8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 30 Jul 2018 14:27:15 -0700 Subject: [PATCH 247/519] squashfs: more metadata hardening commit d512584780d3e6a7cacb2f482834849453d444a1 upstream. Anatoly reports another squashfs fuzzing issue, where the decompression parameters themselves are in a compressed block. This causes squashfs_read_data() to be called in order to read the decompression options before the decompression stream having been set up, making squashfs go sideways. Reported-by: Anatoly Trosinenko Acked-by: Phillip Lougher Cc: stable@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/squashfs/block.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 0cea9b9236d0..82bc942fc437 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -166,6 +166,8 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length, } if (compressed) { + if (!msblk->stream) + goto read_failure; length = squashfs_decompress(msblk, bh, b, offset, length, output); if (length < 0) From 581c2941840f0a37aec39a3e8f4066fd86a8e61b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 2 Aug 2018 08:43:35 -0700 Subject: [PATCH 248/519] squashfs: more metadata hardenings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 71755ee5350b63fb1f283de8561cdb61b47f4d1d upstream. The squashfs fragment reading code doesn't actually verify that the fragment is inside the fragment table. The end result _is_ verified to be inside the image when actually reading the fragment data, but before that is done, we may end up taking a page fault because the fragment table itself might not even exist. Another report from Anatoly and his endless squashfs image fuzzing. Reported-by: Анатолий Тросиненко Acked-by:: Phillip Lougher , Cc: Willy Tarreau Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/squashfs/fragment.c | 13 +++++++++---- fs/squashfs/squashfs_fs_sb.h | 1 + fs/squashfs/super.c | 5 +++-- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c index 86ad9a4b8c36..0681feab4a84 100644 --- a/fs/squashfs/fragment.c +++ b/fs/squashfs/fragment.c @@ -49,11 +49,16 @@ int squashfs_frag_lookup(struct super_block *sb, unsigned int fragment, u64 *fragment_block) { struct squashfs_sb_info *msblk = sb->s_fs_info; - int block = SQUASHFS_FRAGMENT_INDEX(fragment); - int offset = SQUASHFS_FRAGMENT_INDEX_OFFSET(fragment); - u64 start_block = le64_to_cpu(msblk->fragment_index[block]); + int block, offset, size; struct squashfs_fragment_entry fragment_entry; - int size; + u64 start_block; + + if (fragment >= msblk->fragments) + return -EIO; + block = SQUASHFS_FRAGMENT_INDEX(fragment); + offset = SQUASHFS_FRAGMENT_INDEX_OFFSET(fragment); + + start_block = le64_to_cpu(msblk->fragment_index[block]); size = squashfs_read_metadata(sb, &fragment_entry, &start_block, &offset, sizeof(fragment_entry)); diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index 1da565cb50c3..ef69c31947bf 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h @@ -75,6 +75,7 @@ struct squashfs_sb_info { unsigned short block_log; long long bytes_used; unsigned int inodes; + unsigned int fragments; int xattr_ids; }; #endif diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 5056babe00df..93aa3e23c845 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -176,6 +176,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) msblk->inode_table = le64_to_cpu(sblk->inode_table_start); msblk->directory_table = le64_to_cpu(sblk->directory_table_start); msblk->inodes = le32_to_cpu(sblk->inodes); + msblk->fragments = le32_to_cpu(sblk->fragments); flags = le16_to_cpu(sblk->flags); TRACE("Found valid superblock on %s\n", bdevname(sb->s_bdev, b)); @@ -186,7 +187,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) TRACE("Filesystem size %lld bytes\n", msblk->bytes_used); TRACE("Block size %d\n", msblk->block_size); TRACE("Number of inodes %d\n", msblk->inodes); - TRACE("Number of fragments %d\n", le32_to_cpu(sblk->fragments)); + TRACE("Number of fragments %d\n", msblk->fragments); TRACE("Number of ids %d\n", le16_to_cpu(sblk->no_ids)); TRACE("sblk->inode_table_start %llx\n", msblk->inode_table); TRACE("sblk->directory_table_start %llx\n", msblk->directory_table); @@ -273,7 +274,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_export_op = &squashfs_export_ops; handle_fragments: - fragments = le32_to_cpu(sblk->fragments); + fragments = msblk->fragments; if (fragments == 0) goto check_directory_table; From 7dfa787508f9150cd7527e06b9216415dcf941f2 Mon Sep 17 00:00:00 2001 From: Anton Vasilyev Date: Fri, 27 Jul 2018 18:50:42 +0300 Subject: [PATCH 249/519] can: ems_usb: Fix memory leak on ems_usb_disconnect() commit 72c05f32f4a5055c9c8fe889bb6903ec959c0aad upstream. ems_usb_probe() allocates memory for dev->tx_msg_buffer, but there is no its deallocation in ems_usb_disconnect(). Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Anton Vasilyev Cc: Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/ems_usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index 357c9e89fdf9..047348033e27 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -1078,6 +1078,7 @@ static void ems_usb_disconnect(struct usb_interface *intf) usb_free_urb(dev->intr_urb); kfree(dev->intr_in_buffer); + kfree(dev->tx_msg_buffer); } } From d856749a77546f033d9a41cc681ed3a58dba18e9 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Fri, 27 Jul 2018 22:43:01 +0000 Subject: [PATCH 250/519] net: socket: fix potential spectre v1 gadget in socketcall commit c8e8cd579bb4265651df8223730105341e61a2d1 upstream. 'call' is a user-controlled value, so sanitize the array index after the bounds check to avoid speculating past the bounds of the 'nargs' array. Found with the help of Smatch: net/socket.c:2508 __do_sys_socketcall() warn: potential spectre issue 'nargs' [r] (local cap) Cc: Josh Poimboeuf Cc: stable@vger.kernel.org Signed-off-by: Jeremy Cline Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/socket.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/socket.c b/net/socket.c index 5b31e5baf3b5..0c544ae48eac 100644 --- a/net/socket.c +++ b/net/socket.c @@ -89,6 +89,7 @@ #include #include #include +#include #include #include @@ -2324,6 +2325,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) if (call < 1 || call > SYS_SENDMMSG) return -EINVAL; + call = array_index_nospec(call, SYS_SENDMMSG + 1); len = nargs[call]; if (len > sizeof(a)) From a1d7ff2496e04bc1b8c3d8cb1ad717b6256098a8 Mon Sep 17 00:00:00 2001 From: Jiang Biao Date: Wed, 18 Jul 2018 10:29:28 +0800 Subject: [PATCH 251/519] virtio_balloon: fix another race between migration and ballooning commit 89da619bc18d79bca5304724c11d4ba3b67ce2c6 upstream. Kernel panic when with high memory pressure, calltrace looks like, PID: 21439 TASK: ffff881be3afedd0 CPU: 16 COMMAND: "java" #0 [ffff881ec7ed7630] machine_kexec at ffffffff81059beb #1 [ffff881ec7ed7690] __crash_kexec at ffffffff81105942 #2 [ffff881ec7ed7760] crash_kexec at ffffffff81105a30 #3 [ffff881ec7ed7778] oops_end at ffffffff816902c8 #4 [ffff881ec7ed77a0] no_context at ffffffff8167ff46 #5 [ffff881ec7ed77f0] __bad_area_nosemaphore at ffffffff8167ffdc #6 [ffff881ec7ed7838] __node_set at ffffffff81680300 #7 [ffff881ec7ed7860] __do_page_fault at ffffffff8169320f #8 [ffff881ec7ed78c0] do_page_fault at ffffffff816932b5 #9 [ffff881ec7ed78f0] page_fault at ffffffff8168f4c8 [exception RIP: _raw_spin_lock_irqsave+47] RIP: ffffffff8168edef RSP: ffff881ec7ed79a8 RFLAGS: 00010046 RAX: 0000000000000246 RBX: ffffea0019740d00 RCX: ffff881ec7ed7fd8 RDX: 0000000000020000 RSI: 0000000000000016 RDI: 0000000000000008 RBP: ffff881ec7ed79a8 R8: 0000000000000246 R9: 000000000001a098 R10: ffff88107ffda000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000008 R14: ffff881ec7ed7a80 R15: ffff881be3afedd0 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 It happens in the pagefault and results in double pagefault during compacting pages when memory allocation fails. Analysed the vmcore, the page leads to second pagefault is corrupted with _mapcount=-256, but private=0. It's caused by the race between migration and ballooning, and lock missing in virtballoon_migratepage() of virtio_balloon driver. This patch fix the bug. Fixes: e22504296d4f64f ("virtio_balloon: introduce migration primitives to balloon pages") Cc: stable@vger.kernel.org Signed-off-by: Jiang Biao Signed-off-by: Huang Chong Signed-off-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman --- drivers/virtio/virtio_balloon.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 7cf26768ea0b..cbe9e2295752 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -479,7 +479,9 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info, tell_host(vb, vb->inflate_vq); /* balloon's page migration 2nd step -- deflate "page" */ + spin_lock_irqsave(&vb_dev_info->pages_lock, flags); balloon_page_delete(page); + spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags); vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; set_page_pfns(vb, vb->pfns, page); tell_host(vb, vb->deflate_vq); From 314b46558cc97578f2edf5e77a65140b63db3fcc Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Thu, 19 Jul 2018 21:59:07 +0300 Subject: [PATCH 252/519] kvm: x86: vmx: fix vpid leak commit 63aff65573d73eb8dda4732ad4ef222dd35e4862 upstream. VPID for the nested vcpu is allocated at vmx_create_vcpu whenever nested vmx is turned on with the module parameter. However, it's only freed if the L1 guest has executed VMXON which is not a given. As a result, on a system with nested==on every creation+deletion of an L1 vcpu without running an L2 guest results in leaking one vpid. Since the total number of vpids is limited to 64k, they can eventually get exhausted, preventing L2 from starting. Delay allocation of the L2 vpid until VMXON emulation, thus matching its freeing. Fixes: 5c614b3583e7b6dab0c86356fa36c2bcbb8322a0 Cc: stable@vger.kernel.org Signed-off-by: Roman Kagan Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 18143886b186..c5a4b1978cbf 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6843,6 +6843,8 @@ static int handle_vmon(struct kvm_vcpu *vcpu) HRTIMER_MODE_REL); vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; + vmx->nested.vpid02 = allocate_vpid(); + vmx->nested.vmxon = true; skip_emulated_instruction(vcpu); @@ -8887,10 +8889,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) goto free_vmcs; } - if (nested) { + if (nested) nested_vmx_setup_ctls_msrs(vmx); - vmx->nested.vpid02 = allocate_vpid(); - } vmx->nested.posted_intr_nv = -1; vmx->nested.current_vmptr = -1ull; @@ -8899,7 +8899,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) return &vmx->vcpu; free_vmcs: - free_vpid(vmx->nested.vpid02); free_loaded_vmcs(vmx->loaded_vmcs); free_msrs: kfree(vmx->guest_msrs); From 3ef726179c2bbedbf80ab43f8e57f9582865b7b5 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 13 Jul 2018 16:12:32 +0800 Subject: [PATCH 253/519] crypto: padlock-aes - Fix Nano workaround data corruption commit 46d8c4b28652d35dc6cfb5adf7f54e102fc04384 upstream. This was detected by the self-test thanks to Ard's chunking patch. I finally got around to testing this out on my ancient Via box. It turns out that the workaround got the assembly wrong and we end up doing count + initial cycles of the loop instead of just count. This obviously causes corruption, either by overwriting the source that is yet to be processed, or writing over the end of the buffer. On CPUs that don't require the workaround only ECB is affected. On Nano CPUs both ECB and CBC are affected. This patch fixes it by doing the subtraction prior to the assembly. Fixes: a76c1c23d0c3 ("crypto: padlock-aes - work around Nano CPU...") Cc: Reported-by: Jamie Heilman Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/padlock-aes.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index 97a364694bfc..047ef69b7e65 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -266,6 +266,8 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, return; } + count -= initial; + if (initial) asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ : "+S"(input), "+D"(output) @@ -273,7 +275,7 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ : "+S"(input), "+D"(output) - : "d"(control_word), "b"(key), "c"(count - initial)); + : "d"(control_word), "b"(key), "c"(count)); } static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, @@ -284,6 +286,8 @@ static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, if (count < cbc_fetch_blocks) return cbc_crypt(input, output, key, iv, control_word, count); + count -= initial; + if (initial) asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ : "+S" (input), "+D" (output), "+a" (iv) @@ -291,7 +295,7 @@ static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ : "+S" (input), "+D" (output), "+a" (iv) - : "d" (control_word), "b" (key), "c" (count-initial)); + : "d" (control_word), "b" (key), "c" (count)); return iv; } From 6ff21107ba21e59a3831825a55e882afc8a1b3f1 Mon Sep 17 00:00:00 2001 From: Tony Battersby Date: Thu, 12 Jul 2018 16:30:45 -0400 Subject: [PATCH 254/519] scsi: sg: fix minor memory leak in error path commit c170e5a8d222537e98aa8d4fddb667ff7a2ee114 upstream. Fix a minor memory leak when there is an error opening a /dev/sg device. Fixes: cc833acbee9d ("sg: O_EXCL and other lock handling") Cc: Reviewed-by: Ewan D. Milne Signed-off-by: Tony Battersby Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 4302880a20b3..e1639e80db53 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -2195,6 +2195,7 @@ sg_add_sfp(Sg_device * sdp) write_lock_irqsave(&sdp->sfd_lock, iflags); if (atomic_read(&sdp->detaching)) { write_unlock_irqrestore(&sdp->sfd_lock, iflags); + kfree(sfp); return ERR_PTR(-ENODEV); } list_add_tail(&sfp->sfd_siblings, &sdp->sfds); From bffa1e42b3713aa7911cc3f9a6e5a2dbbf1dc789 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 6 Aug 2018 16:24:42 +0200 Subject: [PATCH 255/519] Linux 4.4.146 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index be31491a2d67..030f5af05f4e 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 145 +SUBLEVEL = 146 EXTRAVERSION = NAME = Blurry Fish Butt From 760fed99e2bfcf0c4c7b1880ebbe19fdb4d9738a Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 18 Jul 2018 14:29:54 -0700 Subject: [PATCH 256/519] scsi: qla2xxx: Fix ISP recovery on unload commit b08abbd9f5996309f021684f9ca74da30dcca36a upstream. During unload process, the chip can encounter problem where a FW dump would be captured. For this case, the full reset sequence will be skip to bring the chip back to full operational state. Fixes: e315cd28b9ef ("[SCSI] qla2xxx: Code changes for qla data structure refactoring") Cc: Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_os.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 5cbf20ab94aa..18b19744398a 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -4938,8 +4938,9 @@ qla2x00_do_dpc(void *data) } } - if (test_and_clear_bit(ISP_ABORT_NEEDED, - &base_vha->dpc_flags)) { + if (test_and_clear_bit + (ISP_ABORT_NEEDED, &base_vha->dpc_flags) && + !test_bit(UNLOADING, &base_vha->dpc_flags)) { ql_dbg(ql_dbg_dpc, base_vha, 0x4007, "ISP abort scheduled.\n"); From 468926f8dbb747b2612459113a926be404c9a6d6 Mon Sep 17 00:00:00 2001 From: Anil Gurumurthy Date: Wed, 18 Jul 2018 14:29:55 -0700 Subject: [PATCH 257/519] scsi: qla2xxx: Return error when TMF returns commit b4146c4929ef61d5afca011474d59d0918a0cd82 upstream. Propagate the task management completion status properly to avoid unnecessary waits for commands to complete. Fixes: faef62d13463 ("[SCSI] qla2xxx: Fix Task Management command asynchronous handling") Cc: Signed-off-by: Anil Gurumurthy Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_init.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index a9eb3cd453be..41a646696bab 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -325,11 +325,10 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun, wait_for_completion(&tm_iocb->u.tmf.comp); - rval = tm_iocb->u.tmf.comp_status == CS_COMPLETE ? - QLA_SUCCESS : QLA_FUNCTION_FAILED; + rval = tm_iocb->u.tmf.data; - if ((rval != QLA_SUCCESS) || tm_iocb->u.tmf.data) { - ql_dbg(ql_dbg_taskm, vha, 0x8030, + if (rval != QLA_SUCCESS) { + ql_log(ql_log_warn, vha, 0x8030, "TM IOCB failed (%x).\n", rval); } From 09a0de491c5ec0e40f8f9d21b13af306667360f4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Aug 2018 14:44:59 +0200 Subject: [PATCH 258/519] genirq: Make force irq threading setup more robust commit d1f0301b3333eef5efbfa1fe0f0edbea01863d5d upstream. The support of force threading interrupts which are set up with both a primary and a threaded handler wreckaged the setup of regular requested threaded interrupts (primary handler == NULL). The reason is that it does not check whether the primary handler is set to the default handler which wakes the handler thread. Instead it replaces the thread handler with the primary handler as it would do with force threaded interrupts which have been requested via request_irq(). So both the primary and the thread handler become the same which then triggers the warnon that the thread handler tries to wakeup a not configured secondary thread. Fortunately this only happens when the driver omits the IRQF_ONESHOT flag when requesting the threaded interrupt, which is normaly caught by the sanity checks when force irq threading is disabled. Fix it by skipping the force threading setup when a regular threaded interrupt is requested. As a consequence the interrupt request which lacks the IRQ_ONESHOT flag is rejected correctly instead of silently wreckaging it. Fixes: 2a1d3ab8986d ("genirq: Handle force threading of irqs with primary and thread handler") Reported-by: Kurt Kanzenbach Signed-off-by: Thomas Gleixner Tested-by: Kurt Kanzenbach Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- kernel/irq/manage.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 5f55a8bf5264..0df2b44dac7c 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1012,6 +1012,13 @@ static int irq_setup_forced_threading(struct irqaction *new) if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT)) return 0; + /* + * No further action required for interrupts which are requested as + * threaded interrupts already + */ + if (new->handler == irq_default_primary_handler) + return 0; + new->flags |= IRQF_ONESHOT; /* @@ -1019,7 +1026,7 @@ static int irq_setup_forced_threading(struct irqaction *new) * thread handler. We force thread them as well by creating a * secondary action. */ - if (new->handler != irq_default_primary_handler && new->thread_fn) { + if (new->handler && new->thread_fn) { /* Allocate the secondary action */ new->secondary = kzalloc(sizeof(struct irqaction), GFP_KERNEL); if (!new->secondary) From 0a022859e2304b21a6cd620c2eb008cc60de0ab8 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Tue, 31 Jul 2018 18:13:58 +0200 Subject: [PATCH 259/519] nohz: Fix local_timer_softirq_pending() commit 80d20d35af1edd632a5e7a3b9c0ab7ceff92769e upstream. local_timer_softirq_pending() checks whether the timer softirq is pending with: local_softirq_pending() & TIMER_SOFTIRQ. This is wrong because TIMER_SOFTIRQ is the softirq number and not a bitmask. So the test checks for the wrong bit. Use BIT(TIMER_SOFTIRQ) instead. Fixes: 5d62c183f9e9 ("nohz: Prevent a timer interrupt storm in tick_nohz_stop_sched_tick()") Signed-off-by: Anna-Maria Gleixner Signed-off-by: Thomas Gleixner Reviewed-by: Paul E. McKenney Reviewed-by: Daniel Bristot de Oliveira Acked-by: Frederic Weisbecker Cc: bigeasy@linutronix.de Cc: peterz@infradead.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180731161358.29472-1-anna-maria@linutronix.de Signed-off-by: Greg Kroah-Hartman --- kernel/time/tick-sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index e5d228f7224c..5ad2e852e9f6 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -570,7 +570,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) static inline bool local_timer_softirq_pending(void) { - return local_softirq_pending() & TIMER_SOFTIRQ; + return local_softirq_pending() & BIT(TIMER_SOFTIRQ); } static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, From 52296ab92b66e2a8c932b2a4cb318dd08cb8bd29 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 27 Jul 2018 16:54:44 +0100 Subject: [PATCH 260/519] netlink: Do not subscribe to non-existent groups [ Upstream commit 7acf9d4237c46894e0fa0492dd96314a41742e84 ] Make ABI more strict about subscribing to group > ngroups. Code doesn't check for that and it looks bogus. (one can subscribe to non-existing group) Still, it's possible to bind() to all possible groups with (-1) Cc: "David S. Miller" Cc: Herbert Xu Cc: Steffen Klassert Cc: netdev@vger.kernel.org Signed-off-by: Dmitry Safonov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 9708fff318d5..aed2cfb1a4fe 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -985,6 +985,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (err) return err; } + groups &= (1UL << nlk->ngroups) - 1; bound = nlk->bound; if (bound) { From bc48f46f117e05af226a4c0dff617218f1c13650 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 30 Jul 2018 18:32:36 +0100 Subject: [PATCH 261/519] netlink: Don't shift with UB on nlk->ngroups [ Upstream commit 61f4b23769f0cc72ae62c9a81cf08f0397d40da8 ] On i386 nlk->ngroups might be 32 or 0. Which leads to UB, resulting in hang during boot. Check for 0 ngroups and use (unsigned long long) as a type to shift. Fixes: 7acf9d4237c4 ("netlink: Do not subscribe to non-existent groups"). Reported-by: kernel test robot Signed-off-by: Dmitry Safonov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index aed2cfb1a4fe..87c25918c073 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -985,7 +985,11 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (err) return err; } - groups &= (1UL << nlk->ngroups) - 1; + + if (nlk->ngroups == 0) + groups = 0; + else + groups &= (1ULL << nlk->ngroups) - 1; bound = nlk->bound; if (bound) { From a5928d68418768e3e7ed9c75039060c1e70e047e Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sun, 5 Aug 2018 01:35:53 +0100 Subject: [PATCH 262/519] netlink: Don't shift on 64 for ngroups commit 91874ecf32e41b5d86a4cb9d60e0bee50d828058 upstream. It's legal to have 64 groups for netlink_sock. As user-supplied nladdr->nl_groups is __u32, it's possible to subscribe only to first 32 groups. The check for correctness of .bind() userspace supplied parameter is done by applying mask made from ngroups shift. Which broke Android as they have 64 groups and the shift for mask resulted in an overflow. Fixes: 61f4b23769f0 ("netlink: Don't shift with UB on nlk->ngroups") Cc: "David S. Miller" Cc: Herbert Xu Cc: Steffen Klassert Cc: netdev@vger.kernel.org Cc: stable@vger.kernel.org Reported-and-Tested-by: Nathan Chancellor Signed-off-by: Dmitry Safonov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 87c25918c073..bf292010760a 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -988,8 +988,8 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (nlk->ngroups == 0) groups = 0; - else - groups &= (1ULL << nlk->ngroups) - 1; + else if (nlk->ngroups < 8*sizeof(groups)) + groups &= (1UL << nlk->ngroups) - 1; bound = nlk->bound; if (bound) { From 310eba0dfc8a7d5423516df7f4be7451505ac6ef Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 8 Jul 2018 19:35:02 -0400 Subject: [PATCH 263/519] ext4: fix false negatives *and* false positives in ext4_check_descriptors() commit 44de022c4382541cebdd6de4465d1f4f465ff1dd upstream. Ext4_check_descriptors() was getting called before s_gdb_count was initialized. So for file systems w/o the meta_bg feature, allocation bitmaps could overlap the block group descriptors and ext4 wouldn't notice. For file systems with the meta_bg feature enabled, there was a fencepost error which would cause the ext4_check_descriptors() to incorrectly believe that the block allocation bitmap overlaps with the block group descriptor blocks, and it would reject the mount. Fix both of these problems. Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Signed-off-by: Benjamin Gilbert Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 49af3c50b263..3e4d8ac1974e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2102,7 +2102,7 @@ static int ext4_check_descriptors(struct super_block *sb, struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); ext4_fsblk_t last_block; - ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0) + 1; + ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0); ext4_fsblk_t block_bitmap; ext4_fsblk_t inode_bitmap; ext4_fsblk_t inode_table; @@ -3777,13 +3777,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount2; } } + sbi->s_gdb_count = db_count; if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) { ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); ret = -EFSCORRUPTED; goto failed_mount2; } - sbi->s_gdb_count = db_count; get_random_bytes(&sbi->s_next_generation, sizeof(u32)); spin_lock_init(&sbi->s_next_gen_lock); From fef9866d278ee726b15cf251339660e77ba5488c Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 14 Sep 2017 16:50:14 +0200 Subject: [PATCH 264/519] ACPI / PCI: Bail early in acpi_pci_add_bus() if there is no ACPI handle commit a0040c0145945d3bd203df8fa97f6dfa819f3f7d upstream. Hyper-V instances support PCI pass-through which is implemented through PV pci-hyperv driver. When a device is passed through, a new root PCI bus is created in the guest. The bus sits on top of VMBus and has no associated information in ACPI. acpi_pci_add_bus() in this case proceeds all the way to acpi_evaluate_dsm(), which reports ACPI: \: failed to evaluate _DSM (0x1001) While acpi_pci_slot_enumerate() and acpiphp_enumerate_slots() are protected against ACPI_HANDLE() being NULL and do nothing, acpi_evaluate_dsm() is not and gives us the error. It seems the correct fix is to not do anything in acpi_pci_add_bus() in such cases. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Bjorn Helgaas Cc: Sinan Kaya Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci-acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index a32ba753e413..afaf13474796 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -543,7 +543,7 @@ void acpi_pci_add_bus(struct pci_bus *bus) union acpi_object *obj; struct pci_host_bridge *bridge; - if (acpi_pci_disabled || !bus->bridge) + if (acpi_pci_disabled || !bus->bridge || !ACPI_HANDLE(bus->bridge)) return; acpi_pci_slot_enumerate(bus); From 731ccd90b8dc6697fefb62f43ed6f8d253d7fd5b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 14 Jul 2018 01:28:15 +0900 Subject: [PATCH 265/519] ring_buffer: tracing: Inherit the tracing setting to next ring buffer commit 73c8d8945505acdcbae137c2e00a1232e0be709f upstream. Maintain the tracing on/off setting of the ring_buffer when switching to the trace buffer snapshot. Taking a snapshot is done by swapping the backup ring buffer (max_tr_buffer). But since the tracing on/off setting is defined by the ring buffer, when swapping it, the tracing on/off setting can also be changed. This causes a strange result like below: /sys/kernel/debug/tracing # cat tracing_on 1 /sys/kernel/debug/tracing # echo 0 > tracing_on /sys/kernel/debug/tracing # cat tracing_on 0 /sys/kernel/debug/tracing # echo 1 > snapshot /sys/kernel/debug/tracing # cat tracing_on 1 /sys/kernel/debug/tracing # echo 1 > snapshot /sys/kernel/debug/tracing # cat tracing_on 0 We don't touch tracing_on, but snapshot changes tracing_on setting each time. This is an anomaly, because user doesn't know that each "ring_buffer" stores its own tracing-enable state and the snapshot is done by swapping ring buffers. Link: http://lkml.kernel.org/r/153149929558.11274.11730609978254724394.stgit@devbox Cc: Ingo Molnar Cc: Shuah Khan Cc: Tom Zanussi Cc: Hiraku Toyooka Cc: stable@vger.kernel.org Fixes: debdd57f5145 ("tracing: Make a snapshot feature available from userspace") Signed-off-by: Masami Hiramatsu [ Updated commit log and comment in the code ] Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- include/linux/ring_buffer.h | 1 + kernel/trace/ring_buffer.c | 16 ++++++++++++++++ kernel/trace/trace.c | 6 ++++++ 3 files changed, 23 insertions(+) diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 4acc552e9279..19d0778ec382 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -162,6 +162,7 @@ void ring_buffer_record_enable(struct ring_buffer *buffer); void ring_buffer_record_off(struct ring_buffer *buffer); void ring_buffer_record_on(struct ring_buffer *buffer); int ring_buffer_record_is_on(struct ring_buffer *buffer); +int ring_buffer_record_is_set_on(struct ring_buffer *buffer); void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu); void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index d9cd6191760b..fdaa88f38aec 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3141,6 +3141,22 @@ int ring_buffer_record_is_on(struct ring_buffer *buffer) return !atomic_read(&buffer->record_disabled); } +/** + * ring_buffer_record_is_set_on - return true if the ring buffer is set writable + * @buffer: The ring buffer to see if write is set enabled + * + * Returns true if the ring buffer is set writable by ring_buffer_record_on(). + * Note that this does NOT mean it is in a writable state. + * + * It may return true when the ring buffer has been disabled by + * ring_buffer_record_disable(), as that is a temporary disabling of + * the ring buffer. + */ +int ring_buffer_record_is_set_on(struct ring_buffer *buffer) +{ + return !(atomic_read(&buffer->record_disabled) & RB_BUFFER_OFF); +} + /** * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer * @buffer: The ring buffer to stop writes to. diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8aef4e63ac57..1b980a8ef791 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1088,6 +1088,12 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) arch_spin_lock(&tr->max_lock); + /* Inherit the recordable setting from trace_buffer */ + if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer)) + ring_buffer_record_on(tr->max_buffer.buffer); + else + ring_buffer_record_off(tr->max_buffer.buffer); + buf = tr->trace_buffer.buffer; tr->trace_buffer.buffer = tr->max_buffer.buffer; tr->max_buffer.buffer = buf; From a8ec97dbac9027f2f4158aadf86010edc2a9ea5d Mon Sep 17 00:00:00 2001 From: Esben Haabendal Date: Mon, 9 Jul 2018 11:43:01 +0200 Subject: [PATCH 266/519] i2c: imx: Fix reinit_completion() use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9f9e3e0d4dd3338b3f3dde080789f71901e1e4ff upstream. Make sure to call reinit_completion() before dma is started to avoid race condition where reinit_completion() is called after complete() and before wait_for_completion_timeout(). Signed-off-by: Esben Haabendal Fixes: ce1a78840ff7 ("i2c: imx: add DMA support for freescale i2c driver") Reviewed-by: Uwe Kleine-König Signed-off-by: Wolfram Sang Cc: stable@kernel.org Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-imx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index d4d853680ae4..a4abf7dc9576 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -382,6 +382,7 @@ static int i2c_imx_dma_xfer(struct imx_i2c_struct *i2c_imx, goto err_desc; } + reinit_completion(&dma->cmd_complete); txdesc->callback = i2c_imx_dma_callback; txdesc->callback_param = i2c_imx; if (dma_submit_error(dmaengine_submit(txdesc))) { @@ -631,7 +632,6 @@ static int i2c_imx_dma_write(struct imx_i2c_struct *i2c_imx, * The first byte must be transmitted by the CPU. */ imx_i2c_write_reg(msgs->addr << 1, i2c_imx, IMX_I2C_I2DR); - reinit_completion(&i2c_imx->dma->cmd_complete); time_left = wait_for_completion_timeout( &i2c_imx->dma->cmd_complete, msecs_to_jiffies(DMA_TIMEOUT)); @@ -690,7 +690,6 @@ static int i2c_imx_dma_read(struct imx_i2c_struct *i2c_imx, if (result) return result; - reinit_completion(&i2c_imx->dma->cmd_complete); time_left = wait_for_completion_timeout( &i2c_imx->dma->cmd_complete, msecs_to_jiffies(DMA_TIMEOUT)); From 0749d5b3ec62310b747751ea7d4d5ccca51bc80f Mon Sep 17 00:00:00 2001 From: Shankara Pailoor Date: Tue, 5 Jun 2018 08:33:27 -0500 Subject: [PATCH 267/519] jfs: Fix inconsistency between memory allocation and ea_buf->max_size commit 92d34134193e5b129dc24f8d79cb9196626e8d7a upstream. The code is assuming the buffer is max_size length, but we weren't allocating enough space for it. Signed-off-by: Shankara Pailoor Signed-off-by: Dave Kleikamp Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- fs/jfs/xattr.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 48b15a6e5558..40a26a542341 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -493,15 +493,17 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size) if (size > PSIZE) { /* * To keep the rest of the code simple. Allocate a - * contiguous buffer to work with + * contiguous buffer to work with. Make the buffer large + * enough to make use of the whole extent. */ - ea_buf->xattr = kmalloc(size, GFP_KERNEL); + ea_buf->max_size = (size + sb->s_blocksize - 1) & + ~(sb->s_blocksize - 1); + + ea_buf->xattr = kmalloc(ea_buf->max_size, GFP_KERNEL); if (ea_buf->xattr == NULL) return -ENOMEM; ea_buf->flag = EA_MALLOC; - ea_buf->max_size = (size + sb->s_blocksize - 1) & - ~(sb->s_blocksize - 1); if (ea_size == 0) return 0; From 8404ae6c8c9ff23a06cf38112e83002e1088bfe1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 9 Aug 2018 12:19:28 +0200 Subject: [PATCH 268/519] Linux 4.4.147 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 030f5af05f4e..ee92a12e3a4b 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 146 +SUBLEVEL = 147 EXTRAVERSION = NAME = Blurry Fish Butt From 7736fcede789b412ae1c5c2f12f9bef58903319c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 28 Jul 2018 08:12:04 -0400 Subject: [PATCH 269/519] ext4: fix check to prevent initializing reserved inodes commit 5012284700775a4e6e3fbe7eac4c543c4874b559 upstream. Commit 8844618d8aa7: "ext4: only look at the bg_flags field if it is valid" will complain if block group zero does not have the EXT4_BG_INODE_ZEROED flag set. Unfortunately, this is not correct, since a freshly created file system has this flag cleared. It gets almost immediately after the file system is mounted read-write --- but the following somewhat unlikely sequence will end up triggering a false positive report of a corrupted file system: mkfs.ext4 /dev/vdc mount -o ro /dev/vdc /vdc mount -o remount,rw /dev/vdc Instead, when initializing the inode table for block group zero, test to make sure that itable_unused count is not too large, since that is the case that will result in some or all of the reserved inodes getting cleared. This fixes the failures reported by Eric Whiteney when running generic/230 and generic/231 in the the nojournal test case. Fixes: 8844618d8aa7 ("ext4: only look at the bg_flags field if it is valid") Reported-by: Eric Whitney Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ialloc.c | 5 ++++- fs/ext4/super.c | 8 +------- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 041117fd8fd7..0963213e9cd3 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -1308,7 +1308,10 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, ext4_itable_unused_count(sb, gdp)), sbi->s_inodes_per_block); - if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) { + if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group) || + ((group == 0) && ((EXT4_INODES_PER_GROUP(sb) - + ext4_itable_unused_count(sb, gdp)) < + EXT4_FIRST_INO(sb)))) { ext4_error(sb, "Something is wrong with group %u: " "used itable blocks: %d; " "itable unused count: %u", diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 3e4d8ac1974e..8d18f6142da5 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2875,14 +2875,8 @@ static ext4_group_t ext4_has_uninit_itable(struct super_block *sb) if (!gdp) continue; - if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)) - continue; - if (group != 0) + if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) break; - ext4_error(sb, "Inode table for bg 0 marked as " - "needing zeroing"); - if (sb->s_flags & MS_RDONLY) - return ngroups; } return group; From 215f36e128f2b476cd3bfe91339a5e12b79d010c Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Tue, 22 May 2018 14:37:18 -0700 Subject: [PATCH 270/519] tpm: fix race condition in tpm_common_write() commit 3ab2011ea368ec3433ad49e1b9e1c7b70d2e65df upstream. There is a race condition in tpm_common_write function allowing two threads on the same /dev/tpm, or two different applications on the same /dev/tpmrm to overwrite each other commands/responses. Fixed this by taking the priv->buffer_mutex early in the function. Also converted the priv->data_pending from atomic to a regular size_t type. There is no need for it to be atomic since it is only touched under the protection of the priv->buffer_mutex. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Signed-off-by: Tadeusz Struk Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm-dev.c | 43 ++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/drivers/char/tpm/tpm-dev.c b/drivers/char/tpm/tpm-dev.c index 912ad30be585..4719aa781bf2 100644 --- a/drivers/char/tpm/tpm-dev.c +++ b/drivers/char/tpm/tpm-dev.c @@ -25,7 +25,7 @@ struct file_priv { struct tpm_chip *chip; /* Data passed to and from the tpm via the read/write calls */ - atomic_t data_pending; + size_t data_pending; struct mutex buffer_mutex; struct timer_list user_read_timer; /* user needs to claim result */ @@ -46,7 +46,7 @@ static void timeout_work(struct work_struct *work) struct file_priv *priv = container_of(work, struct file_priv, work); mutex_lock(&priv->buffer_mutex); - atomic_set(&priv->data_pending, 0); + priv->data_pending = 0; memset(priv->data_buffer, 0, sizeof(priv->data_buffer)); mutex_unlock(&priv->buffer_mutex); } @@ -72,7 +72,6 @@ static int tpm_open(struct inode *inode, struct file *file) } priv->chip = chip; - atomic_set(&priv->data_pending, 0); mutex_init(&priv->buffer_mutex); setup_timer(&priv->user_read_timer, user_reader_timeout, (unsigned long)priv); @@ -86,28 +85,24 @@ static ssize_t tpm_read(struct file *file, char __user *buf, size_t size, loff_t *off) { struct file_priv *priv = file->private_data; - ssize_t ret_size; + ssize_t ret_size = 0; int rc; del_singleshot_timer_sync(&priv->user_read_timer); flush_work(&priv->work); - ret_size = atomic_read(&priv->data_pending); - if (ret_size > 0) { /* relay data */ - ssize_t orig_ret_size = ret_size; - if (size < ret_size) - ret_size = size; + mutex_lock(&priv->buffer_mutex); - mutex_lock(&priv->buffer_mutex); + if (priv->data_pending) { + ret_size = min_t(ssize_t, size, priv->data_pending); rc = copy_to_user(buf, priv->data_buffer, ret_size); - memset(priv->data_buffer, 0, orig_ret_size); + memset(priv->data_buffer, 0, priv->data_pending); if (rc) ret_size = -EFAULT; - mutex_unlock(&priv->buffer_mutex); + priv->data_pending = 0; } - atomic_set(&priv->data_pending, 0); - + mutex_unlock(&priv->buffer_mutex); return ret_size; } @@ -118,18 +113,20 @@ static ssize_t tpm_write(struct file *file, const char __user *buf, size_t in_size = size; ssize_t out_size; - /* cannot perform a write until the read has cleared - either via tpm_read or a user_read_timer timeout. - This also prevents splitted buffered writes from blocking here. - */ - if (atomic_read(&priv->data_pending) != 0) - return -EBUSY; - if (in_size > TPM_BUFSIZE) return -E2BIG; mutex_lock(&priv->buffer_mutex); + /* Cannot perform a write until the read has cleared either via + * tpm_read or a user_read_timer timeout. This also prevents split + * buffered writes from blocking here. + */ + if (priv->data_pending != 0) { + mutex_unlock(&priv->buffer_mutex); + return -EBUSY; + } + if (copy_from_user (priv->data_buffer, (void __user *) buf, in_size)) { mutex_unlock(&priv->buffer_mutex); @@ -153,7 +150,7 @@ static ssize_t tpm_write(struct file *file, const char __user *buf, return out_size; } - atomic_set(&priv->data_pending, out_size); + priv->data_pending = out_size; mutex_unlock(&priv->buffer_mutex); /* Set a timeout by which the reader must come claim the result */ @@ -172,7 +169,7 @@ static int tpm_release(struct inode *inode, struct file *file) del_singleshot_timer_sync(&priv->user_read_timer); flush_work(&priv->work); file->private_data = NULL; - atomic_set(&priv->data_pending, 0); + priv->data_pending = 0; clear_bit(0, &priv->chip->is_open); kfree(priv); return 0; From e424bee248c38266c6057d43f3e350072fc41c5d Mon Sep 17 00:00:00 2001 From: Thomas Egerer Date: Mon, 25 Jan 2016 12:58:44 +0100 Subject: [PATCH 271/519] ipv4+ipv6: Make INET*_ESP select CRYPTO_ECHAINIV commit 32b6170ca59ccf07d0e394561e54b2cd9726038c upstream. The ESP algorithms using CBC mode require echainiv. Hence INET*_ESP have to select CRYPTO_ECHAINIV in order to work properly. This solves the issues caused by a misconfiguration as described in [1]. The original approach, patching crypto/Kconfig was turned down by Herbert Xu [2]. [1] https://lists.strongswan.org/pipermail/users/2015-December/009074.html [2] http://marc.info/?l=linux-crypto-vger&m=145224655809562&w=2 Signed-off-by: Thomas Egerer Acked-by: Herbert Xu Signed-off-by: David S. Miller Cc: Yongqin Liu Signed-off-by: Greg Kroah-Hartman --- net/ipv4/Kconfig | 1 + net/ipv6/Kconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 93581bba8643..09d6c4a6b53d 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -354,6 +354,7 @@ config INET_ESP select CRYPTO_CBC select CRYPTO_SHA1 select CRYPTO_DES + select CRYPTO_ECHAINIV ---help--- Support for IPsec ESP. diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 851d5c9e3ecc..0f50248bad17 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -69,6 +69,7 @@ config INET6_ESP select CRYPTO_CBC select CRYPTO_SHA1 select CRYPTO_DES + select CRYPTO_ECHAINIV ---help--- Support for IPsec ESP. From 1e4006421429ab672c62ab25afb3c39e6f4aa94f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 20 Apr 2018 14:55:31 -0700 Subject: [PATCH 272/519] fork: unconditionally clear stack on fork commit e01e80634ecdde1dd113ac43b3adad21b47f3957 upstream. One of the classes of kernel stack content leaks[1] is exposing the contents of prior heap or stack contents when a new process stack is allocated. Normally, those stacks are not zeroed, and the old contents remain in place. In the face of stack content exposure flaws, those contents can leak to userspace. Fixing this will make the kernel no longer vulnerable to these flaws, as the stack will be wiped each time a stack is assigned to a new process. There's not a meaningful change in runtime performance; it almost looks like it provides a benefit. Performing back-to-back kernel builds before: Run times: 157.86 157.09 158.90 160.94 160.80 Mean: 159.12 Std Dev: 1.54 and after: Run times: 159.31 157.34 156.71 158.15 160.81 Mean: 158.46 Std Dev: 1.46 Instead of making this a build or runtime config, Andy Lutomirski recommended this just be enabled by default. [1] A noisy search for many kinds of stack content leaks can be seen here: https://cve.mitre.org/cgi-bin/cvekey.cgi?keyword=linux+kernel+stack+leak I did some more with perf and cycle counts on running 100,000 execs of /bin/true. before: Cycles: 218858861551 218853036130 214727610969 227656844122 224980542841 Mean: 221015379122.60 Std Dev: 4662486552.47 after: Cycles: 213868945060 213119275204 211820169456 224426673259 225489986348 Mean: 217745009865.40 Std Dev: 5935559279.99 It continues to look like it's faster, though the deviation is rather wide, but I'm not sure what I could do that would be less noisy. I'm open to ideas! Link: http://lkml.kernel.org/r/20180221021659.GA37073@beast Signed-off-by: Kees Cook Acked-by: Michal Hocko Reviewed-by: Andrew Morton Cc: Andy Lutomirski Cc: Laura Abbott Cc: Rasmus Villemoes Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds [ Srivatsa: Backported to 4.4.y ] Signed-off-by: Srivatsa S. Bhat Reviewed-by: Srinidhi Rao Signed-off-by: Greg Kroah-Hartman --- include/linux/thread_info.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index ff307b548ed3..646891f3bc1e 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -55,11 +55,7 @@ extern long do_no_restart_syscall(struct restart_block *parm); #ifdef __KERNEL__ -#ifdef CONFIG_DEBUG_STACK_USAGE -# define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) -#else -# define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK) -#endif +#define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) /* * flag set/clear/test wrappers From a9252a70174362912fee1556f8c3a25d66cd7637 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 28 Jul 2018 11:47:17 +0200 Subject: [PATCH 273/519] parisc: Enable CONFIG_MLONGCALLS by default commit 66509a276c8c1d19ee3f661a41b418d101c57d29 upstream. Enable the -mlong-calls compiler option by default, because otherwise in most cases linking the vmlinux binary fails due to truncations of R_PARISC_PCREL22F relocations. This fixes building the 64-bit defconfig. Cc: stable@vger.kernel.org # 4.0+ Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 729f89163bc3..210b3d675261 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -177,7 +177,7 @@ config PREFETCH config MLONGCALLS bool "Enable the -mlong-calls compiler option for big kernels" - def_bool y if (!MODULES) + default y depends on PA8X00 help If you configure the kernel to include many drivers built-in instead From 277b161b1a1d339985b4c24e796e86eae9511382 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Sun, 5 Aug 2018 13:30:31 -0400 Subject: [PATCH 274/519] parisc: Define mb() and add memory barriers to assembler unlock sequences MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit fedb8da96355f5f64353625bf96dc69423ad1826 upstream. For years I thought all parisc machines executed loads and stores in order. However, Jeff Law recently indicated on gcc-patches that this is not correct. There are various degrees of out-of-order execution all the way back to the PA7xxx processor series (hit-under-miss). The PA8xxx series has full out-of-order execution for both integer operations, and loads and stores. This is described in the following article: http://web.archive.org/web/20040214092531/http://www.cpus.hp.com/technical_references/advperf.shtml For this reason, we need to define mb() and to insert a memory barrier before the store unlocking spinlocks. This ensures that all memory accesses are complete prior to unlocking. The ldcw instruction performs the same function on entry. Signed-off-by: John David Anglin Cc: stable@vger.kernel.org # 4.0+ Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/asm/barrier.h | 32 +++++++++++++++++++++++++++++++ arch/parisc/kernel/entry.S | 2 ++ arch/parisc/kernel/pacache.S | 1 + arch/parisc/kernel/syscall.S | 4 ++++ 4 files changed, 39 insertions(+) create mode 100644 arch/parisc/include/asm/barrier.h diff --git a/arch/parisc/include/asm/barrier.h b/arch/parisc/include/asm/barrier.h new file mode 100644 index 000000000000..dbaaca84f27f --- /dev/null +++ b/arch/parisc/include/asm/barrier.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_BARRIER_H +#define __ASM_BARRIER_H + +#ifndef __ASSEMBLY__ + +/* The synchronize caches instruction executes as a nop on systems in + which all memory references are performed in order. */ +#define synchronize_caches() __asm__ __volatile__ ("sync" : : : "memory") + +#if defined(CONFIG_SMP) +#define mb() do { synchronize_caches(); } while (0) +#define rmb() mb() +#define wmb() mb() +#define dma_rmb() mb() +#define dma_wmb() mb() +#else +#define mb() barrier() +#define rmb() barrier() +#define wmb() barrier() +#define dma_rmb() barrier() +#define dma_wmb() barrier() +#endif + +#define __smp_mb() mb() +#define __smp_rmb() mb() +#define __smp_wmb() mb() + +#include + +#endif /* !__ASSEMBLY__ */ +#endif /* __ASM_BARRIER_H */ diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 5dc831955de5..13cb2461fef5 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -481,6 +481,8 @@ /* Release pa_tlb_lock lock without reloading lock address. */ .macro tlb_unlock0 spc,tmp #ifdef CONFIG_SMP + or,COND(=) %r0,\spc,%r0 + sync or,COND(=) %r0,\spc,%r0 stw \spc,0(\tmp) #endif diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S index 16073f472118..b3434a7fd3c9 100644 --- a/arch/parisc/kernel/pacache.S +++ b/arch/parisc/kernel/pacache.S @@ -354,6 +354,7 @@ ENDPROC(flush_data_cache_local) .macro tlb_unlock la,flags,tmp #ifdef CONFIG_SMP ldi 1,\tmp + sync stw \tmp,0(\la) mtsm \flags #endif diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 9f22195b90ed..f68eedc72484 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -631,6 +631,7 @@ cas_action: sub,<> %r28, %r25, %r0 2: stw,ma %r24, 0(%r26) /* Free lock */ + sync stw,ma %r20, 0(%sr2,%r20) #if ENABLE_LWS_DEBUG /* Clear thread register indicator */ @@ -645,6 +646,7 @@ cas_action: 3: /* Error occurred on load or store */ /* Free lock */ + sync stw %r20, 0(%sr2,%r20) #if ENABLE_LWS_DEBUG stw %r0, 4(%sr2,%r20) @@ -846,6 +848,7 @@ cas2_action: cas2_end: /* Free lock */ + sync stw,ma %r20, 0(%sr2,%r20) /* Enable interrupts */ ssm PSW_SM_I, %r0 @@ -856,6 +859,7 @@ cas2_end: 22: /* Error occurred on load or store */ /* Free lock */ + sync stw %r20, 0(%sr2,%r20) ssm PSW_SM_I, %r0 ldo 1(%r0),%r28 From 6b1f6243b39c4f49d44bafa9e4639be4f124577f Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 9 Aug 2018 16:42:16 +0200 Subject: [PATCH 275/519] xen/netfront: don't cache skb_shinfo() commit d472b3a6cf63cd31cae1ed61930f07e6cd6671b5 upstream. skb_shinfo() can change when calling __pskb_pull_tail(): Don't cache its return value. Cc: stable@vger.kernel.org Signed-off-by: Juergen Gross Reviewed-by: Wei Liu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index bec9f099573b..68d0a5c9d437 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -879,7 +879,6 @@ static RING_IDX xennet_fill_frags(struct netfront_queue *queue, struct sk_buff *skb, struct sk_buff_head *list) { - struct skb_shared_info *shinfo = skb_shinfo(skb); RING_IDX cons = queue->rx.rsp_cons; struct sk_buff *nskb; @@ -888,15 +887,16 @@ static RING_IDX xennet_fill_frags(struct netfront_queue *queue, RING_GET_RESPONSE(&queue->rx, ++cons); skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0]; - if (shinfo->nr_frags == MAX_SKB_FRAGS) { + if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS) { unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to; BUG_ON(pull_to <= skb_headlen(skb)); __pskb_pull_tail(skb, pull_to - skb_headlen(skb)); } - BUG_ON(shinfo->nr_frags >= MAX_SKB_FRAGS); + BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS); - skb_add_rx_frag(skb, shinfo->nr_frags, skb_frag_page(nfrag), + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + skb_frag_page(nfrag), rx->offset, rx->status, PAGE_SIZE); skb_shinfo(nskb)->nr_frags = 0; From 277131baccf9c96e01d5ffdb0c6447770b634eae Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 26 Apr 2018 14:10:24 +0200 Subject: [PATCH 276/519] ACPI / LPSS: Add missing prv_offset setting for byt/cht PWM devices commit fdcb613d49321b5bf5d5a1bd0fba8e7c241dcc70 upstream. The LPSS PWM device on on Bay Trail and Cherry Trail devices has a set of private registers at offset 0x800, the current lpss_device_desc for them already sets the LPSS_SAVE_CTX flag to have these saved/restored over device-suspend, but the current lpss_device_desc was not setting the prv_offset field, leading to the regular device registers getting saved/restored instead. This is causing the PWM controller to no longer work, resulting in a black screen, after a suspend/resume on systems where the firmware clears the APB clock and reset bits at offset 0x804. This commit fixes this by properly setting prv_offset to 0x800 for the PWM devices. Cc: stable@vger.kernel.org Fixes: e1c748179754 ("ACPI / LPSS: Add Intel BayTrail ACPI mode PWM") Fixes: 1bfbd8eb8a7f ("ACPI / LPSS: Add ACPI IDs for Intel Braswell") Signed-off-by: Hans de Goede Acked-by: Rafael J . Wysocki Signed-off-by: Thierry Reding Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpi_lpss.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index f9e0d09f7c66..8a0f77fb5181 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -154,10 +154,12 @@ static const struct lpss_device_desc lpt_sdio_dev_desc = { static const struct lpss_device_desc byt_pwm_dev_desc = { .flags = LPSS_SAVE_CTX, + .prv_offset = 0x800, }; static const struct lpss_device_desc bsw_pwm_dev_desc = { .flags = LPSS_SAVE_CTX | LPSS_NO_D3_DELAY, + .prv_offset = 0x800, }; static const struct lpss_device_desc byt_uart_dev_desc = { From 6aef4c4a1690b0b371d88babc41a8a314d0fd3f9 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 2 Aug 2018 10:44:42 -0700 Subject: [PATCH 277/519] scsi: sr: Avoid that opening a CD-ROM hangs with runtime power management enabled commit 1214fd7b497400d200e3f4e64e2338b303a20949 upstream. Surround scsi_execute() calls with scsi_autopm_get_device() and scsi_autopm_put_device(). Note: removing sr_mutex protection from the scsi_cd_get() and scsi_cd_put() calls is safe because the purpose of sr_mutex is to serialize cdrom_*() calls. This patch avoids that complaints similar to the following appear in the kernel log if runtime power management is enabled: INFO: task systemd-udevd:650 blocked for more than 120 seconds. Not tainted 4.18.0-rc7-dbg+ #1 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. systemd-udevd D28176 650 513 0x00000104 Call Trace: __schedule+0x444/0xfe0 schedule+0x4e/0xe0 schedule_preempt_disabled+0x18/0x30 __mutex_lock+0x41c/0xc70 mutex_lock_nested+0x1b/0x20 __blkdev_get+0x106/0x970 blkdev_get+0x22c/0x5a0 blkdev_open+0xe9/0x100 do_dentry_open.isra.19+0x33e/0x570 vfs_open+0x7c/0xd0 path_openat+0x6e3/0x1120 do_filp_open+0x11c/0x1c0 do_sys_open+0x208/0x2d0 __x64_sys_openat+0x59/0x70 do_syscall_64+0x77/0x230 entry_SYSCALL_64_after_hwframe+0x49/0xbe Signed-off-by: Bart Van Assche Cc: Maurizio Lombardi Cc: Johannes Thumshirn Cc: Alan Stern Cc: Tested-by: Johannes Thumshirn Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sr.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index de53c9694b68..5dc288fecace 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -520,18 +520,26 @@ static int sr_init_command(struct scsi_cmnd *SCpnt) static int sr_block_open(struct block_device *bdev, fmode_t mode) { struct scsi_cd *cd; + struct scsi_device *sdev; int ret = -ENXIO; + cd = scsi_cd_get(bdev->bd_disk); + if (!cd) + goto out; + + sdev = cd->device; + scsi_autopm_get_device(sdev); check_disk_change(bdev); mutex_lock(&sr_mutex); - cd = scsi_cd_get(bdev->bd_disk); - if (cd) { - ret = cdrom_open(&cd->cdi, bdev, mode); - if (ret) - scsi_cd_put(cd); - } + ret = cdrom_open(&cd->cdi, bdev, mode); mutex_unlock(&sr_mutex); + + scsi_autopm_put_device(sdev); + if (ret) + scsi_cd_put(cd); + +out: return ret; } @@ -559,6 +567,8 @@ static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, if (ret) goto out; + scsi_autopm_get_device(sdev); + /* * Send SCSI addressing ioctls directly to mid level, send other * ioctls to cdrom/block level. @@ -567,15 +577,18 @@ static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, case SCSI_IOCTL_GET_IDLUN: case SCSI_IOCTL_GET_BUS_NUMBER: ret = scsi_ioctl(sdev, cmd, argp); - goto out; + goto put; } ret = cdrom_ioctl(&cd->cdi, bdev, mode, cmd, arg); if (ret != -ENOSYS) - goto out; + goto put; ret = scsi_ioctl(sdev, cmd, argp); +put: + scsi_autopm_put_device(sdev); + out: mutex_unlock(&sr_mutex); return ret; From ba744147871e7c6d3b6b60eede06f74a1a7abcd9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 6 Aug 2018 09:03:58 -0400 Subject: [PATCH 278/519] root dentries need RCU-delayed freeing commit 90bad5e05bcdb0308cfa3d3a60f5c0b9c8e2efb3 upstream. Since mountpoint crossing can happen without leaving lazy mode, root dentries do need the same protection against having their memory freed without RCU delay as everything else in the tree. It's partially hidden by RCU delay between detaching from the mount tree and dropping the vfsmount reference, but the starting point of pathwalk can be on an already detached mount, in which case umount-caused RCU delay has already passed by the time the lazy pathwalk grabs rcu_read_lock(). If the starting point happens to be at the root of that vfsmount *and* that vfsmount covers the entire filesystem, we get trouble. Fixes: 48a066e72d97 ("RCU'd vsfmounts") Cc: stable@vger.kernel.org Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/dcache.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 250c1222e30c..807efaab838e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1954,10 +1954,12 @@ struct dentry *d_make_root(struct inode *root_inode) static const struct qstr name = QSTR_INIT("/", 1); res = __d_alloc(root_inode->i_sb, &name); - if (res) + if (res) { + res->d_flags |= DCACHE_RCUACCESS; d_instantiate(res, root_inode); - else + } else { iput(root_inode); + } } return res; } From a3ababd599e72b9b92420c159564684fcbfa489f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 9 Aug 2018 17:21:17 -0400 Subject: [PATCH 279/519] fix mntput/mntput race commit 9ea0a46ca2c318fcc449c1e6b62a7230a17888f1 upstream. mntput_no_expire() does the calculation of total refcount under mount_lock; unfortunately, the decrement (as well as all increments) are done outside of it, leading to false positives in the "are we dropping the last reference" test. Consider the following situation: * mnt is a lazy-umounted mount, kept alive by two opened files. One of those files gets closed. Total refcount of mnt is 2. On CPU 42 mntput(mnt) (called from __fput()) drops one reference, decrementing component * After it has looked at component #0, the process on CPU 0 does mntget(), incrementing component #0, gets preempted and gets to run again - on CPU 69. There it does mntput(), which drops the reference (component #69) and proceeds to spin on mount_lock. * On CPU 42 our first mntput() finishes counting. It observes the decrement of component #69, but not the increment of component #0. As the result, the total it gets is not 1 as it should've been - it's 0. At which point we decide that vfsmount needs to be killed and proceed to free it and shut the filesystem down. However, there's still another opened file on that filesystem, with reference to (now freed) vfsmount, etc. and we are screwed. It's not a wide race, but it can be reproduced with artificial slowdown of the mnt_get_count() loop, and it should be easier to hit on SMP KVM setups. Fix consists of moving the refcount decrement under mount_lock; the tricky part is that we want (and can) keep the fast case (i.e. mount that still has non-NULL ->mnt_ns) entirely out of mount_lock. All places that zero mnt->mnt_ns are dropping some reference to mnt and they call synchronize_rcu() before that mntput(). IOW, if mntput() observes (under rcu_read_lock()) a non-NULL ->mnt_ns, it is guaranteed that there is another reference yet to be dropped. Reported-by: Jann Horn Tested-by: Jann Horn Fixes: 48a066e72d97 ("RCU'd vsfmounts") Cc: stable@vger.kernel.org Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index a879560ea144..643b7eecf7b7 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1124,12 +1124,22 @@ static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput); static void mntput_no_expire(struct mount *mnt) { rcu_read_lock(); - mnt_add_count(mnt, -1); - if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */ + if (likely(READ_ONCE(mnt->mnt_ns))) { + /* + * Since we don't do lock_mount_hash() here, + * ->mnt_ns can change under us. However, if it's + * non-NULL, then there's a reference that won't + * be dropped until after an RCU delay done after + * turning ->mnt_ns NULL. So if we observe it + * non-NULL under rcu_read_lock(), the reference + * we are dropping is not the final one. + */ + mnt_add_count(mnt, -1); rcu_read_unlock(); return; } lock_mount_hash(); + mnt_add_count(mnt, -1); if (mnt_get_count(mnt)) { rcu_read_unlock(); unlock_mount_hash(); From b9341f5aebd89f46d2cda7dd9c39aabc0a559bdb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 9 Aug 2018 17:51:32 -0400 Subject: [PATCH 280/519] fix __legitimize_mnt()/mntput() race commit 119e1ef80ecfe0d1deb6378d4ab41f5b71519de1 upstream. __legitimize_mnt() has two problems - one is that in case of success the check of mount_lock is not ordered wrt preceding increment of refcount, making it possible to have successful __legitimize_mnt() on one CPU just before the otherwise final mntpu() on another, with __legitimize_mnt() not seeing mntput() taking the lock and mntput() not seeing the increment done by __legitimize_mnt(). Solved by a pair of barriers. Another is that failure of __legitimize_mnt() on the second read_seqretry() leaves us with reference that'll need to be dropped by caller; however, if that races with final mntput() we can end up with caller dropping rcu_read_lock() and doing mntput() to release that reference - with the first mntput() having freed the damn thing just as rcu_read_lock() had been dropped. Solution: in "do mntput() yourself" failure case grab mount_lock, check if MNT_DOOMED has been set by racing final mntput() that has missed our increment and if it has - undo the increment and treat that as "failure, caller doesn't need to drop anything" case. It's not easy to hit - the final mntput() has to come right after the first read_seqretry() in __legitimize_mnt() *and* manage to miss the increment done by __legitimize_mnt() before the second read_seqretry() in there. The things that are almost impossible to hit on bare hardware are not impossible on SMP KVM, though... Reported-by: Oleg Nesterov Fixes: 48a066e72d97 ("RCU'd vsfmounts") Cc: stable@vger.kernel.org Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/namespace.c b/fs/namespace.c index 643b7eecf7b7..b56b50e3da11 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -603,12 +603,21 @@ int __legitimize_mnt(struct vfsmount *bastard, unsigned seq) return 0; mnt = real_mount(bastard); mnt_add_count(mnt, 1); + smp_mb(); // see mntput_no_expire() if (likely(!read_seqretry(&mount_lock, seq))) return 0; if (bastard->mnt_flags & MNT_SYNC_UMOUNT) { mnt_add_count(mnt, -1); return 1; } + lock_mount_hash(); + if (unlikely(bastard->mnt_flags & MNT_DOOMED)) { + mnt_add_count(mnt, -1); + unlock_mount_hash(); + return 1; + } + unlock_mount_hash(); + /* caller will mntput() */ return -1; } @@ -1139,6 +1148,11 @@ static void mntput_no_expire(struct mount *mnt) return; } lock_mount_hash(); + /* + * make sure that if __legitimize_mnt() has not seen us grab + * mount_lock, we'll see their refcount increment here. + */ + smp_mb(); mnt_add_count(mnt, -1); if (mnt_get_count(mnt)) { rcu_read_unlock(); From 01b377d3f0d286d071f46c30586cb261c79559f7 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 23 May 2018 15:30:30 +0300 Subject: [PATCH 281/519] IB/core: Make testing MR flags for writability a static inline function commit 08bb558ac11ab944e0539e78619d7b4c356278bd upstream. Make the MR writability flags check, which is performed in umem.c, a static inline function in file ib_verbs.h This allows the function to be used by low-level infiniband drivers. Cc: Signed-off-by: Jason Gunthorpe Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/umem.c | 11 +---------- include/rdma/ib_verbs.h | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 6790ebb366dd..98fd9a594841 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -122,16 +122,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, umem->address = addr; umem->page_size = PAGE_SIZE; umem->pid = get_task_pid(current, PIDTYPE_PID); - /* - * We ask for writable memory if any of the following - * access flags are set. "Local write" and "remote write" - * obviously require write access. "Remote atomic" can do - * things like fetch and add, which will modify memory, and - * "MW bind" can change permissions by binding a window. - */ - umem->writable = !!(access & - (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND)); + umem->writable = ib_access_writable(access); if (access & IB_ACCESS_ON_DEMAND) { put_pid(umem->pid); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 120da1d7f57e..10fefb0dc640 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3007,6 +3007,20 @@ static inline int ib_check_mr_access(int flags) return 0; } +static inline bool ib_access_writable(int access_flags) +{ + /* + * We have writable memory backing the MR if any of the following + * access flags are set. "Local write" and "remote write" obviously + * require write access. "Remote atomic" can do things like fetch and + * add, which will modify memory, and "MW bind" can change permissions + * by binding a window. + */ + return access_flags & + (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND); +} + /** * ib_check_mr_status: lightweight check of MR status. * This routine may provide status checks on a selected From d803aa2fe665f2dca0e46cefca982ad5c537ca7e Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 23 May 2018 15:30:31 +0300 Subject: [PATCH 282/519] IB/mlx4: Mark user MR as writable if actual virtual memory is writable commit d8f9cc328c8888369880e2527e9186d745f2bbf6 upstream. To allow rereg_user_mr to modify the MR from read-only to writable without using get_user_pages again, we needed to define the initial MR as writable. However, this was originally done unconditionally, without taking into account the writability of the underlying virtual memory. As a result, any attempt to register a read-only MR over read-only virtual memory failed. To fix this, do not add the writable flag bit when the user virtual memory is not writable (e.g. const memory). However, when the underlying memory is NOT writable (and we therefore do not define the initial MR as writable), the IB core adds a "force writable" flag to its user-pages request. If this succeeds, the reg_user_mr caller gets a writable copy of the original pages. If the user-space caller then does a rereg_user_mr operation to enable writability, this will succeed. This should not be allowed, since the original virtual memory was not writable. Cc: Fixes: 9376932d0c26 ("IB/mlx4_ib: Add support for user MR re-registration") Signed-off-by: Jason Gunthorpe Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx4/mr.c | 50 +++++++++++++++++++++++++++------ 1 file changed, 42 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index ce87e9cc7eff..bf52e35dd506 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -130,6 +130,40 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, return err; } +static struct ib_umem *mlx4_get_umem_mr(struct ib_ucontext *context, u64 start, + u64 length, u64 virt_addr, + int access_flags) +{ + /* + * Force registering the memory as writable if the underlying pages + * are writable. This is so rereg can change the access permissions + * from readable to writable without having to run through ib_umem_get + * again + */ + if (!ib_access_writable(access_flags)) { + struct vm_area_struct *vma; + + down_read(¤t->mm->mmap_sem); + /* + * FIXME: Ideally this would iterate over all the vmas that + * cover the memory, but for now it requires a single vma to + * entirely cover the MR to support RO mappings. + */ + vma = find_vma(current->mm, start); + if (vma && vma->vm_end >= start + length && + vma->vm_start <= start) { + if (vma->vm_flags & VM_WRITE) + access_flags |= IB_ACCESS_LOCAL_WRITE; + } else { + access_flags |= IB_ACCESS_LOCAL_WRITE; + } + + up_read(¤t->mm->mmap_sem); + } + + return ib_umem_get(context, start, length, access_flags, 0); +} + struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata) @@ -144,10 +178,8 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mr) return ERR_PTR(-ENOMEM); - /* Force registering the memory as writable. */ - /* Used for memory re-registeration. HCA protects the access */ - mr->umem = ib_umem_get(pd->uobject->context, start, length, - access_flags | IB_ACCESS_LOCAL_WRITE, 0); + mr->umem = mlx4_get_umem_mr(pd->uobject->context, start, length, + virt_addr, access_flags); if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); goto err_free; @@ -214,6 +246,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, } if (flags & IB_MR_REREG_ACCESS) { + if (ib_access_writable(mr_access_flags) && !mmr->umem->writable) + return -EPERM; + err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry, convert_access(mr_access_flags)); @@ -227,10 +262,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr); ib_umem_release(mmr->umem); - mmr->umem = ib_umem_get(mr->uobject->context, start, length, - mr_access_flags | - IB_ACCESS_LOCAL_WRITE, - 0); + mmr->umem = + mlx4_get_umem_mr(mr->uobject->context, start, length, + virt_addr, mr_access_flags); if (IS_ERR(mmr->umem)) { err = PTR_ERR(mmr->umem); /* Prevent mlx4_ib_dereg_mr from free'ing invalid pointer */ From 45c679be34ac44ad24bc7abf60193b9f43a83490 Mon Sep 17 00:00:00 2001 From: Michael Mera Date: Mon, 1 May 2017 15:41:16 +0900 Subject: [PATCH 283/519] IB/ocrdma: fix out of bounds access to local buffer commit 062d0f22a30c39840ea49b72cfcfc1aa4cc538fa upstream. In write to debugfs file 'resource_stats' the local buffer 'tmp_str' is written at index 'count-1' where 'count' is the size of the write, so potentially 0. This patch filters odd values for the write size/position to avoid this type of problem. Signed-off-by: Michael Mera Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/ocrdma/ocrdma_stats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c index 748b63b86cbc..40242ead096f 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c @@ -643,7 +643,7 @@ static ssize_t ocrdma_dbgfs_ops_write(struct file *filp, struct ocrdma_stats *pstats = filp->private_data; struct ocrdma_dev *dev = pstats->dev; - if (count > 32) + if (*ppos != 0 || count == 0 || count > sizeof(tmp_str)) goto err; if (copy_from_user(tmp_str, buffer, count)) From 916a57896e00d4f92318c8ff5a7b8ca07e4e95a7 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 15 Jun 2018 09:41:29 +0200 Subject: [PATCH 284/519] ARM: dts: imx6sx: fix irq for pcie bridge commit 1bcfe0564044be578841744faea1c2f46adc8178 upstream. Use the correct IRQ line for the MSI controller in the PCIe host controller. Apparently a different IRQ line is used compared to other i.MX6 variants. Without this change MSI IRQs aren't properly propagated to the upstream interrupt controller. Signed-off-by: Oleksij Rempel Reviewed-by: Lucas Stach Fixes: b1d17f68e5c5 ("ARM: dts: imx: add initial imx6sx device tree source") Signed-off-by: Shawn Guo Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/imx6sx.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi index 167f77b3bd43..6963dff815dc 100644 --- a/arch/arm/boot/dts/imx6sx.dtsi +++ b/arch/arm/boot/dts/imx6sx.dtsi @@ -1250,7 +1250,7 @@ pcie: pcie@0x08000000 { /* non-prefetchable memory */ 0x82000000 0 0x08000000 0x08000000 0 0x00f00000>; num-lanes = <1>; - interrupts = ; + interrupts = ; clocks = <&clks IMX6SX_CLK_PCIE_REF_125M>, <&clks IMX6SX_CLK_PCIE_AXI>, <&clks IMX6SX_CLK_LVDS1_OUT>, From 8dbce8a2e9cfc8e026565d75f7cb950393d04159 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 3 Aug 2018 16:41:39 +0200 Subject: [PATCH 285/519] x86/paravirt: Fix spectre-v2 mitigations for paravirt guests commit 5800dc5c19f34e6e03b5adab1282535cb102fafd upstream. Nadav reported that on guests we're failing to rewrite the indirect calls to CALLEE_SAVE paravirt functions. In particular the pv_queued_spin_unlock() call is left unpatched and that is all over the place. This obviously wrecks Spectre-v2 mitigation (for paravirt guests) which relies on not actually having indirect calls around. The reason is an incorrect clobber test in paravirt_patch_call(); this function rewrites an indirect call with a direct call to the _SAME_ function, there is no possible way the clobbers can be different because of this. Therefore remove this clobber check. Also put WARNs on the other patch failure case (not enough room for the instruction) which I've not seen trigger in my (limited) testing. Three live kernel image disassemblies for lock_sock_nested (as a small function that illustrates the problem nicely). PRE is the current situation for guests, POST is with this patch applied and NATIVE is with or without the patch for !guests. PRE: (gdb) disassemble lock_sock_nested Dump of assembler code for function lock_sock_nested: 0xffffffff817be970 <+0>: push %rbp 0xffffffff817be971 <+1>: mov %rdi,%rbp 0xffffffff817be974 <+4>: push %rbx 0xffffffff817be975 <+5>: lea 0x88(%rbp),%rbx 0xffffffff817be97c <+12>: callq 0xffffffff819f7160 <_cond_resched> 0xffffffff817be981 <+17>: mov %rbx,%rdi 0xffffffff817be984 <+20>: callq 0xffffffff819fbb00 <_raw_spin_lock_bh> 0xffffffff817be989 <+25>: mov 0x8c(%rbp),%eax 0xffffffff817be98f <+31>: test %eax,%eax 0xffffffff817be991 <+33>: jne 0xffffffff817be9ba 0xffffffff817be993 <+35>: movl $0x1,0x8c(%rbp) 0xffffffff817be99d <+45>: mov %rbx,%rdi 0xffffffff817be9a0 <+48>: callq *0xffffffff822299e8 0xffffffff817be9a7 <+55>: pop %rbx 0xffffffff817be9a8 <+56>: pop %rbp 0xffffffff817be9a9 <+57>: mov $0x200,%esi 0xffffffff817be9ae <+62>: mov $0xffffffff817be993,%rdi 0xffffffff817be9b5 <+69>: jmpq 0xffffffff81063ae0 <__local_bh_enable_ip> 0xffffffff817be9ba <+74>: mov %rbp,%rdi 0xffffffff817be9bd <+77>: callq 0xffffffff817be8c0 <__lock_sock> 0xffffffff817be9c2 <+82>: jmp 0xffffffff817be993 End of assembler dump. POST: (gdb) disassemble lock_sock_nested Dump of assembler code for function lock_sock_nested: 0xffffffff817be970 <+0>: push %rbp 0xffffffff817be971 <+1>: mov %rdi,%rbp 0xffffffff817be974 <+4>: push %rbx 0xffffffff817be975 <+5>: lea 0x88(%rbp),%rbx 0xffffffff817be97c <+12>: callq 0xffffffff819f7160 <_cond_resched> 0xffffffff817be981 <+17>: mov %rbx,%rdi 0xffffffff817be984 <+20>: callq 0xffffffff819fbb00 <_raw_spin_lock_bh> 0xffffffff817be989 <+25>: mov 0x8c(%rbp),%eax 0xffffffff817be98f <+31>: test %eax,%eax 0xffffffff817be991 <+33>: jne 0xffffffff817be9ba 0xffffffff817be993 <+35>: movl $0x1,0x8c(%rbp) 0xffffffff817be99d <+45>: mov %rbx,%rdi 0xffffffff817be9a0 <+48>: callq 0xffffffff810a0c20 <__raw_callee_save___pv_queued_spin_unlock> 0xffffffff817be9a5 <+53>: xchg %ax,%ax 0xffffffff817be9a7 <+55>: pop %rbx 0xffffffff817be9a8 <+56>: pop %rbp 0xffffffff817be9a9 <+57>: mov $0x200,%esi 0xffffffff817be9ae <+62>: mov $0xffffffff817be993,%rdi 0xffffffff817be9b5 <+69>: jmpq 0xffffffff81063aa0 <__local_bh_enable_ip> 0xffffffff817be9ba <+74>: mov %rbp,%rdi 0xffffffff817be9bd <+77>: callq 0xffffffff817be8c0 <__lock_sock> 0xffffffff817be9c2 <+82>: jmp 0xffffffff817be993 End of assembler dump. NATIVE: (gdb) disassemble lock_sock_nested Dump of assembler code for function lock_sock_nested: 0xffffffff817be970 <+0>: push %rbp 0xffffffff817be971 <+1>: mov %rdi,%rbp 0xffffffff817be974 <+4>: push %rbx 0xffffffff817be975 <+5>: lea 0x88(%rbp),%rbx 0xffffffff817be97c <+12>: callq 0xffffffff819f7160 <_cond_resched> 0xffffffff817be981 <+17>: mov %rbx,%rdi 0xffffffff817be984 <+20>: callq 0xffffffff819fbb00 <_raw_spin_lock_bh> 0xffffffff817be989 <+25>: mov 0x8c(%rbp),%eax 0xffffffff817be98f <+31>: test %eax,%eax 0xffffffff817be991 <+33>: jne 0xffffffff817be9ba 0xffffffff817be993 <+35>: movl $0x1,0x8c(%rbp) 0xffffffff817be99d <+45>: mov %rbx,%rdi 0xffffffff817be9a0 <+48>: movb $0x0,(%rdi) 0xffffffff817be9a3 <+51>: nopl 0x0(%rax) 0xffffffff817be9a7 <+55>: pop %rbx 0xffffffff817be9a8 <+56>: pop %rbp 0xffffffff817be9a9 <+57>: mov $0x200,%esi 0xffffffff817be9ae <+62>: mov $0xffffffff817be993,%rdi 0xffffffff817be9b5 <+69>: jmpq 0xffffffff81063ae0 <__local_bh_enable_ip> 0xffffffff817be9ba <+74>: mov %rbp,%rdi 0xffffffff817be9bd <+77>: callq 0xffffffff817be8c0 <__lock_sock> 0xffffffff817be9c2 <+82>: jmp 0xffffffff817be993 End of assembler dump. Fixes: 63f70270ccd9 ("[PATCH] i386: PARAVIRT: add common patching machinery") Fixes: 3010a0663fd9 ("x86/paravirt, objtool: Annotate indirect calls") Reported-by: Nadav Amit Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: Juergen Gross Cc: Konrad Rzeszutek Wilk Cc: Boris Ostrovsky Cc: David Woodhouse Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/paravirt.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index f534a0e3af53..632195b41688 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -97,10 +97,12 @@ unsigned paravirt_patch_call(void *insnbuf, struct branch *b = insnbuf; unsigned long delta = (unsigned long)target - (addr+5); - if (tgt_clobbers & ~site_clobbers) - return len; /* target would clobber too much for this site */ - if (len < 5) + if (len < 5) { +#ifdef CONFIG_RETPOLINE + WARN_ONCE("Failing to patch indirect CALL in %ps\n", (void *)addr); +#endif return len; /* call too long for patch site */ + } b->opcode = 0xe8; /* call */ b->delta = delta; @@ -115,8 +117,12 @@ unsigned paravirt_patch_jmp(void *insnbuf, const void *target, struct branch *b = insnbuf; unsigned long delta = (unsigned long)target - (addr+5); - if (len < 5) + if (len < 5) { +#ifdef CONFIG_RETPOLINE + WARN_ONCE("Failing to patch indirect JMP in %ps\n", (void *)addr); +#endif return len; /* call too long for patch site */ + } b->opcode = 0xe9; /* jmp */ b->delta = delta; From 7744abbe29a59db367f59b0c9890356732f25a3b Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Thu, 26 Jul 2018 13:14:55 +0200 Subject: [PATCH 286/519] x86/speculation: Protect against userspace-userspace spectreRSB commit fdf82a7856b32d905c39afc85e34364491e46346 upstream. The article "Spectre Returns! Speculation Attacks using the Return Stack Buffer" [1] describes two new (sub-)variants of spectrev2-like attacks, making use solely of the RSB contents even on CPUs that don't fallback to BTB on RSB underflow (Skylake+). Mitigate userspace-userspace attacks by always unconditionally filling RSB on context switch when the generic spectrev2 mitigation has been enabled. [1] https://arxiv.org/pdf/1807.07940.pdf Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner Reviewed-by: Josh Poimboeuf Acked-by: Tim Chen Cc: Konrad Rzeszutek Wilk Cc: Borislav Petkov Cc: David Woodhouse Cc: Peter Zijlstra Cc: Linus Torvalds Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/nycvar.YFH.7.76.1807261308190.997@cbobk.fhfr.pm Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 38 +++++++------------------------------- 1 file changed, 7 insertions(+), 31 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 12a8867071f3..7688ce0b26c5 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -309,23 +309,6 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return cmd; } -/* Check for Skylake-like CPUs (for RSB handling) */ -static bool __init is_skylake_era(void) -{ - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && - boot_cpu_data.x86 == 6) { - switch (boot_cpu_data.x86_model) { - case INTEL_FAM6_SKYLAKE_MOBILE: - case INTEL_FAM6_SKYLAKE_DESKTOP: - case INTEL_FAM6_SKYLAKE_X: - case INTEL_FAM6_KABYLAKE_MOBILE: - case INTEL_FAM6_KABYLAKE_DESKTOP: - return true; - } - } - return false; -} - static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -386,22 +369,15 @@ static void __init spectre_v2_select_mitigation(void) pr_info("%s\n", spectre_v2_strings[mode]); /* - * If neither SMEP nor PTI are available, there is a risk of - * hitting userspace addresses in the RSB after a context switch - * from a shallow call stack to a deeper one. To prevent this fill - * the entire RSB, even when using IBRS. + * If spectre v2 protection has been enabled, unconditionally fill + * RSB during a context switch; this protects against two independent + * issues: * - * Skylake era CPUs have a separate issue with *underflow* of the - * RSB, when they will predict 'ret' targets from the generic BTB. - * The proper mitigation for this is IBRS. If IBRS is not supported - * or deactivated in favour of retpolines the RSB fill on context - * switch is required. + * - RSB underflow (and switch to BTB) on Skylake+ + * - SpectreRSB variant of spectre v2 on X86_BUG_SPECTRE_V2 CPUs */ - if ((!boot_cpu_has(X86_FEATURE_KAISER) && - !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { - setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); - pr_info("Spectre v2 mitigation: Filling RSB on context switch\n"); - } + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); + pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); /* Initialize Indirect Branch Prediction Barrier if supported */ if (boot_cpu_has(X86_FEATURE_IBPB)) { From 866234c373a0f34774d5dcb3886a6c982397bbc9 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 28 Apr 2018 21:37:03 +0900 Subject: [PATCH 287/519] kprobes/x86: Fix %p uses in error messages commit 0ea063306eecf300fcf06d2f5917474b580f666f upstream. Remove all %p uses in error messages in kprobes/x86. Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: Arnd Bergmann Cc: David Howells Cc: David S . Miller Cc: Heiko Carstens Cc: Jon Medhurst Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Thomas Richter Cc: Tobin C . Harding Cc: Will Deacon Cc: acme@kernel.org Cc: akpm@linux-foundation.org Cc: brueckner@linux.vnet.ibm.com Cc: linux-arch@vger.kernel.org Cc: rostedt@goodmis.org Cc: schwidefsky@de.ibm.com Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/lkml/152491902310.9916.13355297638917767319.stgit@devbox Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/kprobes/core.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 1f5c47a49e35..c6f466d6cc57 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -393,7 +393,6 @@ int __copy_instruction(u8 *dest, u8 *src) newdisp = (u8 *) src + (s64) insn.displacement.value - (u8 *) dest; if ((s64) (s32) newdisp != newdisp) { pr_err("Kprobes error: new displacement does not fit into s32 (%llx)\n", newdisp); - pr_err("\tSrc: %p, Dest: %p, old disp: %x\n", src, dest, insn.displacement.value); return 0; } disp = (u8 *) dest + insn_offset_displacement(&insn); @@ -609,8 +608,7 @@ static int reenter_kprobe(struct kprobe *p, struct pt_regs *regs, * Raise a BUG or we'll continue in an endless reentering loop * and eventually a stack overflow. */ - printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n", - p->addr); + pr_err("Unrecoverable kprobe detected.\n"); dump_kprobe(p); BUG(); default: From ec5aa64fec7206537442a2f3cb67decabad252f4 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Fri, 3 Aug 2018 10:05:50 -0700 Subject: [PATCH 288/519] x86/irqflags: Provide a declaration for native_save_fl commit 208cbb32558907f68b3b2a081ca2337ac3744794 upstream. It was reported that the commit d0a8d9378d16 is causing users of gcc < 4.9 to observe -Werror=missing-prototypes errors. Indeed, it seems that: extern inline unsigned long native_save_fl(void) { return 0; } compiled with -Werror=missing-prototypes produces this warning in gcc < 4.9, but not gcc >= 4.9. Fixes: d0a8d9378d16 ("x86/paravirt: Make native_save_fl() extern inline"). Reported-by: David Laight Reported-by: Jean Delvare Signed-off-by: Nick Desaulniers Signed-off-by: Thomas Gleixner Cc: hpa@zytor.com Cc: jgross@suse.com Cc: kstewart@linuxfoundation.org Cc: gregkh@linuxfoundation.org Cc: boris.ostrovsky@oracle.com Cc: astrachan@google.com Cc: mka@chromium.org Cc: arnd@arndb.de Cc: tstellar@redhat.com Cc: sedat.dilek@gmail.com Cc: David.Laight@aculab.com Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180803170550.164688-1-ndesaulniers@google.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/irqflags.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 0056bc945cd1..cb7f04981c6b 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -8,6 +8,8 @@ * Interrupt control: */ +/* Declaration required for gcc < 4.9 to prevent -Werror=missing-prototypes */ +extern inline unsigned long native_save_fl(void); extern inline unsigned long native_save_fl(void) { unsigned long flags; From 90a231c63cc28d896ab353b027011a949e9884d3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 13 Jun 2018 15:48:21 -0700 Subject: [PATCH 289/519] x86/speculation/l1tf: Increase 32bit PAE __PHYSICAL_PAGE_SHIFT commit 50896e180c6aa3a9c61a26ced99e15d602666a4c upstream L1 Terminal Fault (L1TF) is a speculation related vulnerability. The CPU speculates on PTE entries which do not have the PRESENT bit set, if the content of the resulting physical address is available in the L1D cache. The OS side mitigation makes sure that a !PRESENT PTE entry points to a physical address outside the actually existing and cachable memory space. This is achieved by inverting the upper bits of the PTE. Due to the address space limitations this only works for 64bit and 32bit PAE kernels, but not for 32bit non PAE. This mitigation applies to both host and guest kernels, but in case of a 64bit host (hypervisor) and a 32bit PAE guest, inverting the upper bits of the PAE address space (44bit) is not enough if the host has more than 43 bits of populated memory address space, because the speculation treats the PTE content as a physical host address bypassing EPT. The host (hypervisor) protects itself against the guest by flushing L1D as needed, but pages inside the guest are not protected against attacks from other processes inside the same guest. For the guest the inverted PTE mask has to match the host to provide the full protection for all pages the host could possibly map into the guest. The hosts populated address space is not known to the guest, so the mask must cover the possible maximal host address space, i.e. 52 bit. On 32bit PAE the maximum PTE mask is currently set to 44 bit because that is the limit imposed by 32bit unsigned long PFNs in the VMs. This limits the mask to be below what the host could possible use for physical pages. The L1TF PROT_NONE protection code uses the PTE masks to determine which bits to invert to make sure the higher bits are set for unmapped entries to prevent L1TF speculation attacks against EPT inside guests. In order to invert all bits that could be used by the host, increase __PHYSICAL_PAGE_SHIFT to 52 to match 64bit. The real limit for a 32bit PAE kernel is still 44 bits because all Linux PTEs are created from unsigned long PFNs, so they cannot be higher than 44 bits on a 32bit kernel. So these extra PFN bits should be never set. The only users of this macro are using it to look at PTEs, so it's safe. [ tglx: Massaged changelog ] Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Reviewed-by: Josh Poimboeuf Acked-by: Michal Hocko Acked-by: Dave Hansen Signed-off-by: David Woodhouse Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/page_32_types.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index 3a52ee0e726d..bfceb5cc6347 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h @@ -27,8 +27,13 @@ #define N_EXCEPTION_STACKS 1 #ifdef CONFIG_X86_PAE -/* 44=32+12, the limit we can fit into an unsigned long pfn */ -#define __PHYSICAL_MASK_SHIFT 44 +/* + * This is beyond the 44 bit limit imposed by the 32bit long pfns, + * but we need the full mask to make sure inverted PROT_NONE + * entries have all the host bits set in a guest. + * The real limit is still 44 bits. + */ +#define __PHYSICAL_MASK_SHIFT 52 #define __VIRTUAL_MASK_SHIFT 32 #else /* !CONFIG_X86_PAE */ From 0a5deacaac102f451bf8c1fb9d007047fcd712f6 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 7 Jul 2016 17:19:11 -0700 Subject: [PATCH 290/519] x86/mm: Move swap offset/type up in PTE to work around erratum commit 00839ee3b299303c6a5e26a0a2485427a3afcbbf upstream This erratum can result in Accessed/Dirty getting set by the hardware when we do not expect them to be (on !Present PTEs). Instead of trying to fix them up after this happens, we just allow the bits to get set and try to ignore them. We do this by shifting the layout of the bits we use for swap offset/type in our 64-bit PTEs. It looks like this: bitnrs: | ... | 11| 10| 9|8|7|6|5| 4| 3|2|1|0| names: | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| before: | OFFSET (9-63) |0|X|X| TYPE(1-5) |0| after: | OFFSET (14-63) | TYPE (9-13) |0|X|X|X| X| X|X|X|0| Note that D was already a don't care (X) even before. We just move TYPE up and turn its old spot (which could be hit by the A bit) into all don't cares. We take 5 bits away from the offset, but that still leaves us with 50 bits which lets us index into a 62-bit swapfile (4 EiB). I think that's probably fine for the moment. We could theoretically reclaim 5 of the bits (1, 2, 3, 4, 7) but it doesn't gain us anything. Signed-off-by: Dave Hansen Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: dave.hansen@intel.com Cc: linux-mm@kvack.org Cc: mhocko@suse.com Link: http://lkml.kernel.org/r/20160708001911.9A3FD2B6@viggo.jf.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pgtable_64.h | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index c810226e741a..225405b690b8 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -163,18 +163,32 @@ static inline int pgd_large(pgd_t pgd) { return 0; } #define pte_offset_map(dir, address) pte_offset_kernel((dir), (address)) #define pte_unmap(pte) ((void)(pte))/* NOP */ -/* Encode and de-code a swap entry */ +/* + * Encode and de-code a swap entry + * + * | ... | 11| 10| 9|8|7|6|5| 4| 3|2|1|0| <- bit number + * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names + * | OFFSET (14->63) | TYPE (10-13) |0|X|X|X| X| X|X|X|0| <- swp entry + * + * G (8) is aliased and used as a PROT_NONE indicator for + * !present ptes. We need to start storing swap entries above + * there. We also need to avoid using A and D because of an + * erratum where they can be incorrectly set by hardware on + * non-present PTEs. + */ +#define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1) #define SWP_TYPE_BITS 5 -#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1) +/* Place the offset above the type: */ +#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS + 1) #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) -#define __swp_type(x) (((x).val >> (_PAGE_BIT_PRESENT + 1)) \ +#define __swp_type(x) (((x).val >> (SWP_TYPE_FIRST_BIT)) \ & ((1U << SWP_TYPE_BITS) - 1)) -#define __swp_offset(x) ((x).val >> SWP_OFFSET_SHIFT) +#define __swp_offset(x) ((x).val >> SWP_OFFSET_FIRST_BIT) #define __swp_entry(type, offset) ((swp_entry_t) { \ - ((type) << (_PAGE_BIT_PRESENT + 1)) \ - | ((offset) << SWP_OFFSET_SHIFT) }) + ((type) << (SWP_TYPE_FIRST_BIT)) \ + | ((offset) << SWP_OFFSET_FIRST_BIT) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) From f487cf69cf1456ceb34857a50474373aae42dd8a Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 10 Aug 2016 10:23:25 -0700 Subject: [PATCH 291/519] x86/mm: Fix swap entry comment and macro commit ace7fab7a6cdd363a615ec537f2aa94dbc761ee2 upstream A recent patch changed the format of a swap PTE. The comment explaining the format of the swap PTE is wrong about the bits used for the swap type field. Amusingly, the ASCII art and the patch description are correct, but the comment itself is wrong. As I was looking at this, I also noticed that the SWP_OFFSET_FIRST_BIT has an off-by-one error. This does not really hurt anything. It just wasted a bit of space in the PTE, giving us 2^59 bytes of addressable space in our swapfiles instead of 2^60. But, it doesn't match with the comments, and it wastes a bit of space, so fix it. Signed-off-by: Dave Hansen Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Fixes: 00839ee3b299 ("x86/mm: Move swap offset/type up in PTE to work around erratum") Link: http://lkml.kernel.org/r/20160810172325.E56AD7DA@viggo.jf.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pgtable_64.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 225405b690b8..ce97c8c6a310 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -168,7 +168,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; } * * | ... | 11| 10| 9|8|7|6|5| 4| 3|2|1|0| <- bit number * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names - * | OFFSET (14->63) | TYPE (10-13) |0|X|X|X| X| X|X|X|0| <- swp entry + * | OFFSET (14->63) | TYPE (9-13) |0|X|X|X| X| X|X|X|0| <- swp entry * * G (8) is aliased and used as a PROT_NONE indicator for * !present ptes. We need to start storing swap entries above @@ -179,7 +179,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; } #define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1) #define SWP_TYPE_BITS 5 /* Place the offset above the type: */ -#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS + 1) +#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS) #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) From 86b0948d7c546feb01cd2d7ac2bfb15476e6e974 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Fri, 8 Sep 2017 16:10:46 -0700 Subject: [PATCH 292/519] mm: x86: move _PAGE_SWP_SOFT_DIRTY from bit 7 to bit 1 commit eee4818baac0f2b37848fdf90e4b16430dc536ac upstream _PAGE_PSE is used to distinguish between a truly non-present (_PAGE_PRESENT=0) PMD, and a PMD which is undergoing a THP split and should be treated as present. But _PAGE_SWP_SOFT_DIRTY currently uses the _PAGE_PSE bit, which would cause confusion between one of those PMDs undergoing a THP split, and a soft-dirty PMD. Dropping _PAGE_PSE check in pmd_present() does not work well, because it can hurt optimization of tlb handling in thp split. Thus, we need to move the bit. In the current kernel, bits 1-4 are not used in non-present format since commit 00839ee3b299 ("x86/mm: Move swap offset/type up in PTE to work around erratum"). So let's move _PAGE_SWP_SOFT_DIRTY to bit 1. Bit 7 is used as reserved (always clear), so please don't use it for other purpose. [dwmw2: Pulled in to 4.9 backport to support L1TF changes] Link: http://lkml.kernel.org/r/20170717193955.20207-3-zi.yan@sent.com Signed-off-by: Naoya Horiguchi Signed-off-by: Zi Yan Acked-by: Dave Hansen Cc: "H. Peter Anvin" Cc: Anshuman Khandual Cc: David Nellans Cc: Ingo Molnar Cc: Kirill A. Shutemov Cc: Mel Gorman Cc: Minchan Kim Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Andrea Arcangeli Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: David Woodhouse Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pgtable_64.h | 12 +++++++++--- arch/x86/include/asm/pgtable_types.h | 10 +++++----- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index ce97c8c6a310..008e1a58f96c 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -166,15 +166,21 @@ static inline int pgd_large(pgd_t pgd) { return 0; } /* * Encode and de-code a swap entry * - * | ... | 11| 10| 9|8|7|6|5| 4| 3|2|1|0| <- bit number - * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names - * | OFFSET (14->63) | TYPE (9-13) |0|X|X|X| X| X|X|X|0| <- swp entry + * | ... | 11| 10| 9|8|7|6|5| 4| 3|2| 1|0| <- bit number + * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names + * | OFFSET (14->63) | TYPE (9-13) |0|0|X|X| X| X|X|SD|0| <- swp entry * * G (8) is aliased and used as a PROT_NONE indicator for * !present ptes. We need to start storing swap entries above * there. We also need to avoid using A and D because of an * erratum where they can be incorrectly set by hardware on * non-present PTEs. + * + * SD (1) in swp entry is used to store soft dirty bit, which helps us + * remember soft dirty over page migration + * + * Bit 7 in swp entry should be 0 because pmd_present checks not only P, + * but also L and G. */ #define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1) #define SWP_TYPE_BITS 5 diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 8dba273da25a..7572ce32055e 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -70,15 +70,15 @@ /* * Tracking soft dirty bit when a page goes to a swap is tricky. * We need a bit which can be stored in pte _and_ not conflict - * with swap entry format. On x86 bits 6 and 7 are *not* involved - * into swap entry computation, but bit 6 is used for nonlinear - * file mapping, so we borrow bit 7 for soft dirty tracking. + * with swap entry format. On x86 bits 1-4 are *not* involved + * into swap entry computation, but bit 7 is used for thp migration, + * so we borrow bit 1 for soft dirty tracking. * * Please note that this bit must be treated as swap dirty page - * mark if and only if the PTE has present bit clear! + * mark if and only if the PTE/PMD has present bit clear! */ #ifdef CONFIG_MEM_SOFT_DIRTY -#define _PAGE_SWP_SOFT_DIRTY _PAGE_PSE +#define _PAGE_SWP_SOFT_DIRTY _PAGE_RW #else #define _PAGE_SWP_SOFT_DIRTY (_AT(pteval_t, 0)) #endif From 614f5e84640e382b9916b6f606328191ed0264b3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 13 Jun 2018 15:48:22 -0700 Subject: [PATCH 293/519] x86/speculation/l1tf: Change order of offset/type in swap entry commit bcd11afa7adad8d720e7ba5ef58bdcd9775cf45f upstream If pages are swapped out, the swap entry is stored in the corresponding PTE, which has the Present bit cleared. CPUs vulnerable to L1TF speculate on PTE entries which have the present bit set and would treat the swap entry as phsyical address (PFN). To mitigate that the upper bits of the PTE must be set so the PTE points to non existent memory. The swap entry stores the type and the offset of a swapped out page in the PTE. type is stored in bit 9-13 and offset in bit 14-63. The hardware ignores the bits beyond the phsyical address space limit, so to make the mitigation effective its required to start 'offset' at the lowest possible bit so that even large swap offsets do not reach into the physical address space limit bits. Move offset to bit 9-58 and type to bit 59-63 which are the bits that hardware generally doesn't care about. That, in turn, means that if you on desktop chip with only 40 bits of physical addressing, now that the offset starts at bit 9, there needs to be 30 bits of offset actually *in use* until bit 39 ends up being set, which means when inverted it will again point into existing memory. So that's 4 terabyte of swap space (because the offset is counted in pages, so 30 bits of offset is 42 bits of actual coverage). With bigger physical addressing, that obviously grows further, until the limit of the offset is hit (at 50 bits of offset - 62 bits of actual swap file coverage). This is a preparatory change for the actual swap entry inversion to protect against L1TF. [ AK: Updated description and minor tweaks. Split into two parts ] [ tglx: Massaged changelog ] Signed-off-by: Linus Torvalds Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Tested-by: Andi Kleen Reviewed-by: Josh Poimboeuf Acked-by: Michal Hocko Acked-by: Vlastimil Babka Acked-by: Dave Hansen Signed-off-by: David Woodhouse Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pgtable_64.h | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 008e1a58f96c..a72c2ab24006 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -168,7 +168,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; } * * | ... | 11| 10| 9|8|7|6|5| 4| 3|2| 1|0| <- bit number * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names - * | OFFSET (14->63) | TYPE (9-13) |0|0|X|X| X| X|X|SD|0| <- swp entry + * | TYPE (59-63) | OFFSET (9-58) |0|0|X|X| X| X|X|SD|0| <- swp entry * * G (8) is aliased and used as a PROT_NONE indicator for * !present ptes. We need to start storing swap entries above @@ -182,19 +182,28 @@ static inline int pgd_large(pgd_t pgd) { return 0; } * Bit 7 in swp entry should be 0 because pmd_present checks not only P, * but also L and G. */ -#define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1) -#define SWP_TYPE_BITS 5 -/* Place the offset above the type: */ -#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS) +#define SWP_TYPE_BITS 5 + +#define SWP_OFFSET_FIRST_BIT (_PAGE_BIT_PROTNONE + 1) + +/* We always extract/encode the offset by shifting it all the way up, and then down again */ +#define SWP_OFFSET_SHIFT (SWP_OFFSET_FIRST_BIT+SWP_TYPE_BITS) #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) -#define __swp_type(x) (((x).val >> (SWP_TYPE_FIRST_BIT)) \ - & ((1U << SWP_TYPE_BITS) - 1)) -#define __swp_offset(x) ((x).val >> SWP_OFFSET_FIRST_BIT) -#define __swp_entry(type, offset) ((swp_entry_t) { \ - ((type) << (SWP_TYPE_FIRST_BIT)) \ - | ((offset) << SWP_OFFSET_FIRST_BIT) }) +/* Extract the high bits for type */ +#define __swp_type(x) ((x).val >> (64 - SWP_TYPE_BITS)) + +/* Shift up (to get rid of type), then down to get value */ +#define __swp_offset(x) ((x).val << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT) + +/* + * Shift the offset up "too far" by TYPE bits, then down again + */ +#define __swp_entry(type, offset) ((swp_entry_t) { \ + ((unsigned long)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \ + | ((unsigned long)(type) << (64-SWP_TYPE_BITS)) }) + #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) From 9bbdab847fc9a0b8cf23fa7354e1210f0b492821 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 13 Jun 2018 15:48:23 -0700 Subject: [PATCH 294/519] x86/speculation/l1tf: Protect swap entries against L1TF commit 2f22b4cd45b67b3496f4aa4c7180a1271c6452f6 upstream With L1 terminal fault the CPU speculates into unmapped PTEs, and resulting side effects allow to read the memory the PTE is pointing too, if its values are still in the L1 cache. For swapped out pages Linux uses unmapped PTEs and stores a swap entry into them. To protect against L1TF it must be ensured that the swap entry is not pointing to valid memory, which requires setting higher bits (between bit 36 and bit 45) that are inside the CPUs physical address space, but outside any real memory. To do this invert the offset to make sure the higher bits are always set, as long as the swap file is not too big. Note there is no workaround for 32bit !PAE, or on systems which have more than MAX_PA/2 worth of memory. The later case is very unlikely to happen on real systems. [AK: updated description and minor tweaks by. Split out from the original patch ] Signed-off-by: Linus Torvalds Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Tested-by: Andi Kleen Reviewed-by: Josh Poimboeuf Acked-by: Michal Hocko Acked-by: Vlastimil Babka Acked-by: Dave Hansen Signed-off-by: David Woodhouse Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pgtable_64.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index a72c2ab24006..67f2fe43a593 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -168,7 +168,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; } * * | ... | 11| 10| 9|8|7|6|5| 4| 3|2| 1|0| <- bit number * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names - * | TYPE (59-63) | OFFSET (9-58) |0|0|X|X| X| X|X|SD|0| <- swp entry + * | TYPE (59-63) | ~OFFSET (9-58) |0|0|X|X| X| X|X|SD|0| <- swp entry * * G (8) is aliased and used as a PROT_NONE indicator for * !present ptes. We need to start storing swap entries above @@ -181,6 +181,9 @@ static inline int pgd_large(pgd_t pgd) { return 0; } * * Bit 7 in swp entry should be 0 because pmd_present checks not only P, * but also L and G. + * + * The offset is inverted by a binary not operation to make the high + * physical bits set. */ #define SWP_TYPE_BITS 5 @@ -195,13 +198,15 @@ static inline int pgd_large(pgd_t pgd) { return 0; } #define __swp_type(x) ((x).val >> (64 - SWP_TYPE_BITS)) /* Shift up (to get rid of type), then down to get value */ -#define __swp_offset(x) ((x).val << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT) +#define __swp_offset(x) (~(x).val << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT) /* * Shift the offset up "too far" by TYPE bits, then down again + * The offset is inverted by a binary not operation to make the high + * physical bits set. */ #define __swp_entry(type, offset) ((swp_entry_t) { \ - ((unsigned long)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \ + (~(unsigned long)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \ | ((unsigned long)(type) << (64-SWP_TYPE_BITS)) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) From 9ee2d2da676c48a459a99f10f45c71ffca8761a8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 13 Jun 2018 15:48:24 -0700 Subject: [PATCH 295/519] x86/speculation/l1tf: Protect PROT_NONE PTEs against speculation commit 6b28baca9b1f0d4a42b865da7a05b1c81424bd5c upstream When PTEs are set to PROT_NONE the kernel just clears the Present bit and preserves the PFN, which creates attack surface for L1TF speculation speculation attacks. This is important inside guests, because L1TF speculation bypasses physical page remapping. While the host has its own migitations preventing leaking data from other VMs into the guest, this would still risk leaking the wrong page inside the current guest. This uses the same technique as Linus' swap entry patch: while an entry is is in PROTNONE state invert the complete PFN part part of it. This ensures that the the highest bit will point to non existing memory. The invert is done by pte/pmd_modify and pfn/pmd/pud_pte for PROTNONE and pte/pmd/pud_pfn undo it. This assume that no code path touches the PFN part of a PTE directly without using these primitives. This doesn't handle the case that MMIO is on the top of the CPU physical memory. If such an MMIO region was exposed by an unpriviledged driver for mmap it would be possible to attack some real memory. However this situation is all rather unlikely. For 32bit non PAE the inversion is not done because there are really not enough bits to protect anything. Q: Why does the guest need to be protected when the HyperVisor already has L1TF mitigations? A: Here's an example: Physical pages 1 2 get mapped into a guest as GPA 1 -> PA 2 GPA 2 -> PA 1 through EPT. The L1TF speculation ignores the EPT remapping. Now the guest kernel maps GPA 1 to process A and GPA 2 to process B, and they belong to different users and should be isolated. A sets the GPA 1 PA 2 PTE to PROT_NONE to bypass the EPT remapping and gets read access to the underlying physical page. Which in this case points to PA 2, so it can read process B's data, if it happened to be in L1, so isolation inside the guest is broken. There's nothing the hypervisor can do about this. This mitigation has to be done in the guest itself. [ tglx: Massaged changelog ] [ dwmw2: backported to 4.9 ] Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Reviewed-by: Josh Poimboeuf Acked-by: Michal Hocko Acked-by: Vlastimil Babka Acked-by: Dave Hansen Signed-off-by: David Woodhouse Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pgtable-2level.h | 17 ++++++++++++ arch/x86/include/asm/pgtable-3level.h | 2 ++ arch/x86/include/asm/pgtable-invert.h | 32 ++++++++++++++++++++++ arch/x86/include/asm/pgtable.h | 38 +++++++++++++++++++-------- arch/x86/include/asm/pgtable_64.h | 2 ++ 5 files changed, 80 insertions(+), 11 deletions(-) create mode 100644 arch/x86/include/asm/pgtable-invert.h diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h index fd74a11959de..89c50332a71e 100644 --- a/arch/x86/include/asm/pgtable-2level.h +++ b/arch/x86/include/asm/pgtable-2level.h @@ -77,4 +77,21 @@ static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshi #define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) +/* No inverted PFNs on 2 level page tables */ + +static inline u64 protnone_mask(u64 val) +{ + return 0; +} + +static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask) +{ + return val; +} + +static inline bool __pte_needs_invert(u64 val) +{ + return false; +} + #endif /* _ASM_X86_PGTABLE_2LEVEL_H */ diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index cdaa58c9b39e..0c89891c7b44 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -184,4 +184,6 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp) #define __pte_to_swp_entry(pte) ((swp_entry_t){ (pte).pte_high }) #define __swp_entry_to_pte(x) ((pte_t){ { .pte_high = (x).val } }) +#include + #endif /* _ASM_X86_PGTABLE_3LEVEL_H */ diff --git a/arch/x86/include/asm/pgtable-invert.h b/arch/x86/include/asm/pgtable-invert.h new file mode 100644 index 000000000000..177564187fc0 --- /dev/null +++ b/arch/x86/include/asm/pgtable-invert.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_PGTABLE_INVERT_H +#define _ASM_PGTABLE_INVERT_H 1 + +#ifndef __ASSEMBLY__ + +static inline bool __pte_needs_invert(u64 val) +{ + return (val & (_PAGE_PRESENT|_PAGE_PROTNONE)) == _PAGE_PROTNONE; +} + +/* Get a mask to xor with the page table entry to get the correct pfn. */ +static inline u64 protnone_mask(u64 val) +{ + return __pte_needs_invert(val) ? ~0ull : 0; +} + +static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask) +{ + /* + * When a PTE transitions from NONE to !NONE or vice-versa + * invert the PFN part to stop speculation. + * pte_pfn undoes this when needed. + */ + if (__pte_needs_invert(oldval) != __pte_needs_invert(val)) + val = (val & ~mask) | (~val & mask); + return val; +} + +#endif /* __ASSEMBLY__ */ + +#endif diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 84c62d950023..2ed1556d99b1 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -148,19 +148,29 @@ static inline int pte_special(pte_t pte) return pte_flags(pte) & _PAGE_SPECIAL; } +/* Entries that were set to PROT_NONE are inverted */ + +static inline u64 protnone_mask(u64 val); + static inline unsigned long pte_pfn(pte_t pte) { - return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT; + unsigned long pfn = pte_val(pte); + pfn ^= protnone_mask(pfn); + return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT; } static inline unsigned long pmd_pfn(pmd_t pmd) { - return (pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT; + unsigned long pfn = pmd_val(pmd); + pfn ^= protnone_mask(pfn); + return (pfn & pmd_pfn_mask(pmd)) >> PAGE_SHIFT; } static inline unsigned long pud_pfn(pud_t pud) { - return (pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT; + unsigned long pfn = pud_val(pud); + pfn ^= protnone_mask(pfn); + return (pfn & pud_pfn_mask(pud)) >> PAGE_SHIFT; } #define pte_page(pte) pfn_to_page(pte_pfn(pte)) @@ -359,19 +369,25 @@ static inline pgprotval_t massage_pgprot(pgprot_t pgprot) static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { - return __pte(((phys_addr_t)page_nr << PAGE_SHIFT) | - massage_pgprot(pgprot)); + phys_addr_t pfn = page_nr << PAGE_SHIFT; + pfn ^= protnone_mask(pgprot_val(pgprot)); + pfn &= PTE_PFN_MASK; + return __pte(pfn | massage_pgprot(pgprot)); } static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) { - return __pmd(((phys_addr_t)page_nr << PAGE_SHIFT) | - massage_pgprot(pgprot)); + phys_addr_t pfn = page_nr << PAGE_SHIFT; + pfn ^= protnone_mask(pgprot_val(pgprot)); + pfn &= PHYSICAL_PMD_PAGE_MASK; + return __pmd(pfn | massage_pgprot(pgprot)); } +static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask); + static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { - pteval_t val = pte_val(pte); + pteval_t val = pte_val(pte), oldval = val; /* * Chop off the NX bit (if present), and add the NX portion of @@ -379,17 +395,17 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) */ val &= _PAGE_CHG_MASK; val |= massage_pgprot(newprot) & ~_PAGE_CHG_MASK; - + val = flip_protnone_guard(oldval, val, PTE_PFN_MASK); return __pte(val); } static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { - pmdval_t val = pmd_val(pmd); + pmdval_t val = pmd_val(pmd), oldval = val; val &= _HPAGE_CHG_MASK; val |= massage_pgprot(newprot) & ~_HPAGE_CHG_MASK; - + val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK); return __pmd(val); } diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 67f2fe43a593..221a32ed1372 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -235,6 +235,8 @@ extern void cleanup_highmap(void); extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); +#include + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_64_H */ From 52dc5c9f8eee1c569974308f0bb7be64ec63565c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 13 Jun 2018 15:48:25 -0700 Subject: [PATCH 296/519] x86/speculation/l1tf: Make sure the first page is always reserved commit 10a70416e1f067f6c4efda6ffd8ea96002ac4223 upstream The L1TF workaround doesn't make any attempt to mitigate speculate accesses to the first physical page for zeroed PTEs. Normally it only contains some data from the early real mode BIOS. It's not entirely clear that the first page is reserved in all configurations, so add an extra reservation call to make sure it is really reserved. In most configurations (e.g. with the standard reservations) it's likely a nop. Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Reviewed-by: Josh Poimboeuf Acked-by: Dave Hansen Signed-off-by: David Woodhouse Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/setup.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index bbaae4cf9e8e..31c4bc0d3372 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -851,6 +851,12 @@ void __init setup_arch(char **cmdline_p) memblock_reserve(__pa_symbol(_text), (unsigned long)__bss_stop - (unsigned long)_text); + /* + * Make sure page 0 is always reserved because on systems with + * L1TF its contents can be leaked to user processes. + */ + memblock_reserve(0, PAGE_SIZE); + early_reserve_initrd(); /* From bf0cca01b8736a5e146a980434ba36eb036e37ac Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 13 Jun 2018 15:48:26 -0700 Subject: [PATCH 297/519] x86/speculation/l1tf: Add sysfs reporting for l1tf commit 17dbca119312b4e8173d4e25ff64262119fcef38 upstream L1TF core kernel workarounds are cheap and normally always enabled, However they still should be reported in sysfs if the system is vulnerable or mitigated. Add the necessary CPU feature/bug bits. - Extend the existing checks for Meltdowns to determine if the system is vulnerable. All CPUs which are not vulnerable to Meltdown are also not vulnerable to L1TF - Check for 32bit non PAE and emit a warning as there is no practical way for mitigation due to the limited physical address bits - If the system has more than MAX_PA/2 physical memory the invert page workarounds don't protect the system against the L1TF attack anymore, because an inverted physical address will also point to valid memory. Print a warning in this case and report that the system is vulnerable. Add a function which returns the PFN limit for the L1TF mitigation, which will be used in follow up patches for sanity and range checks. [ tglx: Renamed the CPU feature bit to L1TF_PTEINV ] [ dwmw2: Backport to 4.9 (cpufeatures.h, E820) ] Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Reviewed-by: Josh Poimboeuf Acked-by: Dave Hansen Signed-off-by: David Woodhouse Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 3 ++- arch/x86/include/asm/processor.h | 5 ++++ arch/x86/kernel/cpu/bugs.c | 40 ++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/common.c | 20 +++++++++++++++ drivers/base/cpu.c | 8 ++++++ include/linux/cpu.h | 2 ++ 6 files changed, 77 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index f4b175db70f4..f3a8479c0d87 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -214,7 +214,7 @@ #define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ #define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */ - +#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ @@ -331,5 +331,6 @@ #define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ #define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ #define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ +#define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 8e415cf65457..a3a53955f01c 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -172,6 +172,11 @@ extern const struct seq_operations cpuinfo_op; extern void cpu_detect(struct cpuinfo_x86 *c); +static inline unsigned long l1tf_pfn_limit(void) +{ + return BIT(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT) - 1; +} + extern void early_cpu_init(void); extern void identify_boot_cpu(void); extern void identify_secondary_cpu(struct cpuinfo_x86 *); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 7688ce0b26c5..ce8d0abf3d35 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -26,9 +26,11 @@ #include #include #include +#include static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); +static void __init l1tf_select_mitigation(void); /* * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any @@ -80,6 +82,8 @@ void __init check_bugs(void) */ ssb_select_mitigation(); + l1tf_select_mitigation(); + #ifdef CONFIG_X86_32 /* * Check whether we are able to run this kernel safely on SMP. @@ -203,6 +207,32 @@ static void x86_amd_ssb_disable(void) wrmsrl(MSR_AMD64_LS_CFG, msrval); } +static void __init l1tf_select_mitigation(void) +{ + u64 half_pa; + + if (!boot_cpu_has_bug(X86_BUG_L1TF)) + return; + +#if CONFIG_PGTABLE_LEVELS == 2 + pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n"); + return; +#endif + + /* + * This is extremely unlikely to happen because almost all + * systems have far more MAX_PA/2 than RAM can be fit into + * DIMM slots. + */ + half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT; + if (e820_any_mapped(half_pa, ULLONG_MAX - half_pa, E820_RAM)) { + pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n"); + return; + } + + setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV); +} + #ifdef RETPOLINE static bool spectre_v2_bad_module; @@ -655,6 +685,11 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_SPEC_STORE_BYPASS: return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); + case X86_BUG_L1TF: + if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV)) + return sprintf(buf, "Mitigation: Page Table Inversion\n"); + break; + default: break; } @@ -681,4 +716,9 @@ ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute * { return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS); } + +ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_L1TF); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3d21b28f9826..4d3fa79c0f09 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -880,6 +880,21 @@ static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = { {} }; +static const __initconst struct x86_cpu_id cpu_no_l1tf[] = { + /* in addition to cpu_no_speculation */ + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MOOREFIELD }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_DENVERTON }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GEMINI_LAKE }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, + {} +}; + static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = 0; @@ -905,6 +920,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) return; setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); + + if (x86_match_cpu(cpu_no_l1tf)) + return; + + setup_force_cpu_bug(X86_BUG_L1TF); } /* diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 143edea1076f..41090ef5facb 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -524,16 +524,24 @@ ssize_t __weak cpu_show_spec_store_bypass(struct device *dev, return sprintf(buf, "Not affected\n"); } +ssize_t __weak cpu_show_l1tf(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Not affected\n"); +} + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL); +static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, &dev_attr_spectre_v1.attr, &dev_attr_spectre_v2.attr, &dev_attr_spec_store_bypass.attr, + &dev_attr_l1tf.attr, NULL }; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 2f9d12022100..063c73ed6d78 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -48,6 +48,8 @@ extern ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_l1tf(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, From 0371d9c4c822fceb290a0b4cd21119534f7bae47 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 29 Dec 2015 20:12:20 -0800 Subject: [PATCH 298/519] mm: Add vm_insert_pfn_prot() commit 1745cbc5d0dee0749a6bc0ea8e872c5db0074061 upstream The x86 vvar vma contains pages with differing cacheability flags. x86 currently implements this by manually inserting all the ptes using (io_)remap_pfn_range when the vma is set up. x86 wants to move to using .fault with VM_FAULT_NOPAGE to set up the mappings as needed. The correct API to use to insert a pfn in .fault is vm_insert_pfn(), but vm_insert_pfn() can't override the vma's cache mode, and the HPET page in particular needs to be uncached despite the fact that the rest of the VMA is cached. Add vm_insert_pfn_prot() to support varying cacheability within the same non-COW VMA in a more sane manner. x86 could alternatively use multiple VMAs, but that's messy, would break CRIU, and would create unnecessary VMAs that would waste memory. Signed-off-by: Andy Lutomirski Reviewed-by: Kees Cook Acked-by: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/d2938d1eb37be7a5e4f86182db646551f11e45aa.1451446564.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- include/linux/mm.h | 2 ++ mm/memory.c | 25 +++++++++++++++++++++++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index a100946607a5..1f4366567e7d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2083,6 +2083,8 @@ int remap_pfn_range(struct vm_area_struct *, unsigned long addr, int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *); int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn); +int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, pgprot_t pgprot); int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn); int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len); diff --git a/mm/memory.c b/mm/memory.c index 177cb7d111a9..edb5b0d8a4a0 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1604,9 +1604,30 @@ static int insert_pfn(struct vm_area_struct *vma, unsigned long addr, */ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn) +{ + return vm_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot); +} +EXPORT_SYMBOL(vm_insert_pfn); + +/** + * vm_insert_pfn_prot - insert single pfn into user vma with specified pgprot + * @vma: user vma to map to + * @addr: target user address of this page + * @pfn: source kernel pfn + * @pgprot: pgprot flags for the inserted page + * + * This is exactly like vm_insert_pfn, except that it allows drivers to + * to override pgprot on a per-page basis. + * + * This only makes sense for IO mappings, and it makes no sense for + * cow mappings. In general, using multiple vmas is preferable; + * vm_insert_pfn_prot should only be used if using multiple VMAs is + * impractical. + */ +int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, pgprot_t pgprot) { int ret; - pgprot_t pgprot = vma->vm_page_prot; /* * Technically, architectures with pte_special can avoid all these * restrictions (same for remap_pfn_range). However we would like @@ -1628,7 +1649,7 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, return ret; } -EXPORT_SYMBOL(vm_insert_pfn); +EXPORT_SYMBOL(vm_insert_pfn_prot); int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn) From 9ac0dc7d949db7afd4116d55fa4fcf6a66d820f0 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 7 Oct 2016 17:00:18 -0700 Subject: [PATCH 299/519] mm: fix cache mode tracking in vm_insert_mixed() commit 87744ab3832b83ba71b931f86f9cfdb000d07da5 upstream vm_insert_mixed() unlike vm_insert_pfn_prot() and vmf_insert_pfn_pmd(), fails to check the pgprot_t it uses for the mapping against the one recorded in the memtype tracking tree. Add the missing call to track_pfn_insert() to preclude cases where incompatible aliased mappings are established for a given physical address range. [groeck: Backport to v4.4.y] Link: http://lkml.kernel.org/r/147328717909.35069.14256589123570653697.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Cc: David Airlie Cc: Matthew Wilcox Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- mm/memory.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index edb5b0d8a4a0..78efb8c7ee20 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1654,10 +1654,14 @@ EXPORT_SYMBOL(vm_insert_pfn_prot); int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn) { + pgprot_t pgprot = vma->vm_page_prot; + BUG_ON(!(vma->vm_flags & VM_MIXEDMAP)); if (addr < vma->vm_start || addr >= vma->vm_end) return -EFAULT; + if (track_pfn_insert(vma, &pgprot, pfn)) + return -EINVAL; /* * If we don't have pte special, then we have to use the pfn_valid() @@ -1670,9 +1674,9 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, struct page *page; page = pfn_to_page(pfn); - return insert_page(vma, addr, page, vma->vm_page_prot); + return insert_page(vma, addr, page, pgprot); } - return insert_pfn(vma, addr, pfn, vma->vm_page_prot); + return insert_pfn(vma, addr, pfn, pgprot); } EXPORT_SYMBOL(vm_insert_mixed); From d71af2dbacb5611c1dcdc16fd1d343821d61bd5e Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 13 Jun 2018 15:48:27 -0700 Subject: [PATCH 300/519] x86/speculation/l1tf: Disallow non privileged high MMIO PROT_NONE mappings commit 42e4089c7890725fcd329999252dc489b72f2921 upstream For L1TF PROT_NONE mappings are protected by inverting the PFN in the page table entry. This sets the high bits in the CPU's address space, thus making sure to point to not point an unmapped entry to valid cached memory. Some server system BIOSes put the MMIO mappings high up in the physical address space. If such an high mapping was mapped to unprivileged users they could attack low memory by setting such a mapping to PROT_NONE. This could happen through a special device driver which is not access protected. Normal /dev/mem is of course access protected. To avoid this forbid PROT_NONE mappings or mprotect for high MMIO mappings. Valid page mappings are allowed because the system is then unsafe anyways. It's not expected that users commonly use PROT_NONE on MMIO. But to minimize any impact this is only enforced if the mapping actually refers to a high MMIO address (defined as the MAX_PA-1 bit being set), and also skip the check for root. For mmaps this is straight forward and can be handled in vm_insert_pfn and in remap_pfn_range(). For mprotect it's a bit trickier. At the point where the actual PTEs are accessed a lot of state has been changed and it would be difficult to undo on an error. Since this is a uncommon case use a separate early page talk walk pass for MMIO PROT_NONE mappings that checks for this condition early. For non MMIO and non PROT_NONE there are no changes. [dwmw2: Backport to 4.9] [groeck: Backport to 4.4] Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Reviewed-by: Josh Poimboeuf Acked-by: Dave Hansen Signed-off-by: David Woodhouse Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pgtable.h | 8 ++++++ arch/x86/mm/mmap.c | 21 +++++++++++++++ include/asm-generic/pgtable.h | 12 +++++++++ mm/memory.c | 29 +++++++++++++++----- mm/mprotect.c | 49 ++++++++++++++++++++++++++++++++++ 5 files changed, 112 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 2ed1556d99b1..70e2248353cb 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -942,6 +942,14 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) } #endif +#define __HAVE_ARCH_PFN_MODIFY_ALLOWED 1 +extern bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot); + +static inline bool arch_has_pfn_modify_check(void) +{ + return boot_cpu_has_bug(X86_BUG_L1TF); +} + #include #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 307f60ecfc6d..9a055ea279eb 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -121,3 +121,24 @@ const char *arch_vma_name(struct vm_area_struct *vma) return "[mpx]"; return NULL; } + +/* + * Only allow root to set high MMIO mappings to PROT_NONE. + * This prevents an unpriv. user to set them to PROT_NONE and invert + * them, then pointing to valid memory for L1TF speculation. + * + * Note: for locked down kernels may want to disable the root override. + */ +bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot) +{ + if (!boot_cpu_has_bug(X86_BUG_L1TF)) + return true; + if (!__pte_needs_invert(pgprot_val(prot))) + return true; + /* If it's real memory always allow */ + if (pfn_valid(pfn)) + return true; + if (pfn > l1tf_pfn_limit() && !capable(CAP_SYS_ADMIN)) + return false; + return true; +} diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 25b793325b09..976f749099ef 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -805,4 +805,16 @@ static inline int pmd_free_pte_page(pmd_t *pmd) #define io_remap_pfn_range remap_pfn_range #endif +#ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED +static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot) +{ + return true; +} + +static inline bool arch_has_pfn_modify_check(void) +{ + return false; +} +#endif + #endif /* _ASM_GENERIC_PGTABLE_H */ diff --git a/mm/memory.c b/mm/memory.c index 78efb8c7ee20..d5bb1465d30c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1645,6 +1645,9 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, if (track_pfn_insert(vma, &pgprot, pfn)) return -EINVAL; + if (!pfn_modify_allowed(pfn, pgprot)) + return -EACCES; + ret = insert_pfn(vma, addr, pfn, pgprot); return ret; @@ -1663,6 +1666,9 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, if (track_pfn_insert(vma, &pgprot, pfn)) return -EINVAL; + if (!pfn_modify_allowed(pfn, pgprot)) + return -EACCES; + /* * If we don't have pte special, then we have to use the pfn_valid() * based VM_MIXEDMAP scheme (see vm_normal_page), and thus we *must* @@ -1691,6 +1697,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, { pte_t *pte; spinlock_t *ptl; + int err = 0; pte = pte_alloc_map_lock(mm, pmd, addr, &ptl); if (!pte) @@ -1698,12 +1705,16 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, arch_enter_lazy_mmu_mode(); do { BUG_ON(!pte_none(*pte)); + if (!pfn_modify_allowed(pfn, prot)) { + err = -EACCES; + break; + } set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot))); pfn++; } while (pte++, addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); pte_unmap_unlock(pte - 1, ptl); - return 0; + return err; } static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud, @@ -1712,6 +1723,7 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud, { pmd_t *pmd; unsigned long next; + int err; pfn -= addr >> PAGE_SHIFT; pmd = pmd_alloc(mm, pud, addr); @@ -1720,9 +1732,10 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud, VM_BUG_ON(pmd_trans_huge(*pmd)); do { next = pmd_addr_end(addr, end); - if (remap_pte_range(mm, pmd, addr, next, - pfn + (addr >> PAGE_SHIFT), prot)) - return -ENOMEM; + err = remap_pte_range(mm, pmd, addr, next, + pfn + (addr >> PAGE_SHIFT), prot); + if (err) + return err; } while (pmd++, addr = next, addr != end); return 0; } @@ -1733,6 +1746,7 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd, { pud_t *pud; unsigned long next; + int err; pfn -= addr >> PAGE_SHIFT; pud = pud_alloc(mm, pgd, addr); @@ -1740,9 +1754,10 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd, return -ENOMEM; do { next = pud_addr_end(addr, end); - if (remap_pmd_range(mm, pud, addr, next, - pfn + (addr >> PAGE_SHIFT), prot)) - return -ENOMEM; + err = remap_pmd_range(mm, pud, addr, next, + pfn + (addr >> PAGE_SHIFT), prot); + if (err) + return err; } while (pud++, addr = next, addr != end); return 0; } diff --git a/mm/mprotect.c b/mm/mprotect.c index c0b4b2a49462..a277f3412a5d 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -255,6 +255,42 @@ unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, return pages; } +static int prot_none_pte_entry(pte_t *pte, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ? + 0 : -EACCES; +} + +static int prot_none_hugetlb_entry(pte_t *pte, unsigned long hmask, + unsigned long addr, unsigned long next, + struct mm_walk *walk) +{ + return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ? + 0 : -EACCES; +} + +static int prot_none_test(unsigned long addr, unsigned long next, + struct mm_walk *walk) +{ + return 0; +} + +static int prot_none_walk(struct vm_area_struct *vma, unsigned long start, + unsigned long end, unsigned long newflags) +{ + pgprot_t new_pgprot = vm_get_page_prot(newflags); + struct mm_walk prot_none_walk = { + .pte_entry = prot_none_pte_entry, + .hugetlb_entry = prot_none_hugetlb_entry, + .test_walk = prot_none_test, + .mm = current->mm, + .private = &new_pgprot, + }; + + return walk_page_range(start, end, &prot_none_walk); +} + int mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, unsigned long start, unsigned long end, unsigned long newflags) @@ -272,6 +308,19 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, return 0; } + /* + * Do PROT_NONE PFN permission checks here when we can still + * bail out without undoing a lot of state. This is a rather + * uncommon case, so doesn't need to be very optimized. + */ + if (arch_has_pfn_modify_check() && + (vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) && + (newflags & (VM_READ|VM_WRITE|VM_EXEC)) == 0) { + error = prot_none_walk(vma, start, end, newflags); + if (error) + return error; + } + /* * If we make a private mapping writable we increase our commit; * but (without finer accounting) cannot reduce our commit if we From 685b44483f077c949bd5016fdfe734b662b74aba Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 13 Jun 2018 15:48:28 -0700 Subject: [PATCH 301/519] x86/speculation/l1tf: Limit swap file size to MAX_PA/2 commit 377eeaa8e11fe815b1d07c81c4a0e2843a8c15eb upstream For the L1TF workaround its necessary to limit the swap file size to below MAX_PA/2, so that the higher bits of the swap offset inverted never point to valid memory. Add a mechanism for the architecture to override the swap file size check in swapfile.c and add a x86 specific max swapfile check function that enforces that limit. The check is only enabled if the CPU is vulnerable to L1TF. In VMs with 42bit MAX_PA the typical limit is 2TB now, on a native system with 46bit PA it is 32TB. The limit is only per individual swap file, so it's always possible to exceed these limits with multiple swap files or partitions. Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Reviewed-by: Josh Poimboeuf Acked-by: Michal Hocko Acked-by: Dave Hansen Signed-off-by: David Woodhouse Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/init.c | 15 +++++++++++++ include/linux/swapfile.h | 2 ++ mm/swapfile.c | 46 ++++++++++++++++++++++++++-------------- 3 files changed, 47 insertions(+), 16 deletions(-) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 151fd33e9043..afde6da2768f 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -4,6 +4,8 @@ #include #include #include /* for max_low_pfn */ +#include +#include #include #include @@ -767,3 +769,16 @@ void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache) __cachemode2pte_tbl[cache] = __cm_idx2pte(entry); __pte2cachemode_tbl[entry] = cache; } + +unsigned long max_swapfile_size(void) +{ + unsigned long pages; + + pages = generic_max_swapfile_size(); + + if (boot_cpu_has_bug(X86_BUG_L1TF)) { + /* Limit the swap file size to MAX_PA/2 for L1TF workaround */ + pages = min_t(unsigned long, l1tf_pfn_limit() + 1, pages); + } + return pages; +} diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h index 388293a91e8c..e4594de79bc4 100644 --- a/include/linux/swapfile.h +++ b/include/linux/swapfile.h @@ -9,5 +9,7 @@ extern spinlock_t swap_lock; extern struct plist_head swap_active_head; extern struct swap_info_struct *swap_info[]; extern int try_to_unuse(unsigned int, bool, unsigned long); +extern unsigned long generic_max_swapfile_size(void); +extern unsigned long max_swapfile_size(void); #endif /* _LINUX_SWAPFILE_H */ diff --git a/mm/swapfile.c b/mm/swapfile.c index 674bf177ce44..8e25ff2b693a 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2206,6 +2206,35 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode) return 0; } + +/* + * Find out how many pages are allowed for a single swap device. There + * are two limiting factors: + * 1) the number of bits for the swap offset in the swp_entry_t type, and + * 2) the number of bits in the swap pte, as defined by the different + * architectures. + * + * In order to find the largest possible bit mask, a swap entry with + * swap type 0 and swap offset ~0UL is created, encoded to a swap pte, + * decoded to a swp_entry_t again, and finally the swap offset is + * extracted. + * + * This will mask all the bits from the initial ~0UL mask that can't + * be encoded in either the swp_entry_t or the architecture definition + * of a swap pte. + */ +unsigned long generic_max_swapfile_size(void) +{ + return swp_offset(pte_to_swp_entry( + swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1; +} + +/* Can be overridden by an architecture for additional checks. */ +__weak unsigned long max_swapfile_size(void) +{ + return generic_max_swapfile_size(); +} + static unsigned long read_swap_header(struct swap_info_struct *p, union swap_header *swap_header, struct inode *inode) @@ -2241,22 +2270,7 @@ static unsigned long read_swap_header(struct swap_info_struct *p, p->cluster_next = 1; p->cluster_nr = 0; - /* - * Find out how many pages are allowed for a single swap - * device. There are two limiting factors: 1) the number - * of bits for the swap offset in the swp_entry_t type, and - * 2) the number of bits in the swap pte as defined by the - * different architectures. In order to find the - * largest possible bit mask, a swap entry with swap type 0 - * and swap offset ~0UL is created, encoded to a swap pte, - * decoded to a swp_entry_t again, and finally the swap - * offset is extracted. This will mask all the bits from - * the initial ~0UL mask that can't be encoded in either - * the swp_entry_t or the architecture definition of a - * swap pte. - */ - maxpages = swp_offset(pte_to_swp_entry( - swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1; + maxpages = max_swapfile_size(); last_page = swap_header->info.last_page; if (!last_page) { pr_warn("Empty swap-file\n"); From fa86c208d22d8179ef3d295f6084fc87390c8366 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 20 Jun 2018 16:42:57 -0400 Subject: [PATCH 302/519] x86/bugs: Move the l1tf function and define pr_fmt properly commit 56563f53d3066afa9e63d6c997bf67e76a8b05c0 upstream The pr_warn in l1tf_select_mitigation would have used the prior pr_fmt which was defined as "Spectre V2 : ". Move the function to be past SSBD and also define the pr_fmt. Fixes: 17dbca119312 ("x86/speculation/l1tf: Add sysfs reporting for l1tf") Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 55 ++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index ce8d0abf3d35..34e4aaaf03d2 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -207,32 +207,6 @@ static void x86_amd_ssb_disable(void) wrmsrl(MSR_AMD64_LS_CFG, msrval); } -static void __init l1tf_select_mitigation(void) -{ - u64 half_pa; - - if (!boot_cpu_has_bug(X86_BUG_L1TF)) - return; - -#if CONFIG_PGTABLE_LEVELS == 2 - pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n"); - return; -#endif - - /* - * This is extremely unlikely to happen because almost all - * systems have far more MAX_PA/2 than RAM can be fit into - * DIMM slots. - */ - half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT; - if (e820_any_mapped(half_pa, ULLONG_MAX - half_pa, E820_RAM)) { - pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n"); - return; - } - - setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV); -} - #ifdef RETPOLINE static bool spectre_v2_bad_module; @@ -658,6 +632,35 @@ void x86_spec_ctrl_setup_ap(void) x86_amd_ssb_disable(); } +#undef pr_fmt +#define pr_fmt(fmt) "L1TF: " fmt +static void __init l1tf_select_mitigation(void) +{ + u64 half_pa; + + if (!boot_cpu_has_bug(X86_BUG_L1TF)) + return; + +#if CONFIG_PGTABLE_LEVELS == 2 + pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n"); + return; +#endif + + /* + * This is extremely unlikely to happen because almost all + * systems have far more MAX_PA/2 than RAM can be fit into + * DIMM slots. + */ + half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT; + if (e820_any_mapped(half_pa, ULLONG_MAX - half_pa, E820_RAM)) { + pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n"); + return; + } + + setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV); +} +#undef pr_fmt + #ifdef CONFIG_SYSFS static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, From df7fd6ccb358bd4aa3abc8a6ff995b1f3da1b0fb Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 21 Jun 2018 12:36:29 +0200 Subject: [PATCH 303/519] x86/speculation/l1tf: Extend 64bit swap file size limit commit 1a7ed1ba4bba6c075d5ad61bb75e3fbc870840d6 upstream The previous patch has limited swap file size so that large offsets cannot clear bits above MAX_PA/2 in the pte and interfere with L1TF mitigation. It assumed that offsets are encoded starting with bit 12, same as pfn. But on x86_64, offsets are encoded starting with bit 9. Thus the limit can be raised by 3 bits. That means 16TB with 42bit MAX_PA and 256TB with 46bit MAX_PA. Fixes: 377eeaa8e11f ("x86/speculation/l1tf: Limit swap file size to MAX_PA/2") Signed-off-by: Vlastimil Babka Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/init.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index afde6da2768f..8de904926d7f 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -778,7 +778,15 @@ unsigned long max_swapfile_size(void) if (boot_cpu_has_bug(X86_BUG_L1TF)) { /* Limit the swap file size to MAX_PA/2 for L1TF workaround */ - pages = min_t(unsigned long, l1tf_pfn_limit() + 1, pages); + unsigned long l1tf_limit = l1tf_pfn_limit() + 1; + /* + * We encode swap offsets also with 3 bits below those for pfn + * which makes the usable limit higher. + */ +#ifdef CONFIG_X86_64 + l1tf_limit <<= PAGE_SHIFT - SWP_OFFSET_FIRST_BIT; +#endif + pages = min_t(unsigned long, l1tf_limit, pages); } return pages; } From dc48c1a2f45b628d3128ad4bb31d1bcd342c059d Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 20 Jun 2018 16:42:58 -0400 Subject: [PATCH 304/519] x86/cpufeatures: Add detection of L1D cache flush support. commit 11e34e64e4103955fc4568750914c75d65ea87ee upstream 336996-Speculative-Execution-Side-Channel-Mitigations.pdf defines a new MSR (IA32_FLUSH_CMD) which is detected by CPUID.7.EDX[28]=1 bit being set. This new MSR "gives software a way to invalidate structures with finer granularity than other architectual methods like WBINVD." A copy of this document is available at https://bugzilla.kernel.org/show_bug.cgi?id=199511 Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index f3a8479c0d87..123a7105db1b 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -310,6 +310,7 @@ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ #define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ From b55b06bd3b3c977da2c938d1a73d38674cb88086 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 22 Jun 2018 17:39:33 +0200 Subject: [PATCH 305/519] x86/speculation/l1tf: Protect PAE swap entries against L1TF commit 0d0f6249058834ffe1ceaad0bb31464af66f6e7a upstream The PAE 3-level paging code currently doesn't mitigate L1TF by flipping the offset bits, and uses the high PTE word, thus bits 32-36 for type, 37-63 for offset. The lower word is zeroed, thus systems with less than 4GB memory are safe. With 4GB to 128GB the swap type selects the memory locations vulnerable to L1TF; with even more memory, also the swap offfset influences the address. This might be a problem with 32bit PAE guests running on large 64bit hosts. By continuing to keep the whole swap entry in either high or low 32bit word of PTE we would limit the swap size too much. Thus this patch uses the whole PAE PTE with the same layout as the 64bit version does. The macros just become a bit tricky since they assume the arch-dependent swp_entry_t to be 32bit. Signed-off-by: Vlastimil Babka Signed-off-by: Thomas Gleixner Acked-by: Michal Hocko Signed-off-by: David Woodhouse Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pgtable-3level.h | 35 +++++++++++++++++++++++++-- arch/x86/mm/init.c | 2 +- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 0c89891c7b44..5c686382d84b 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -177,12 +177,43 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp) #endif /* Encode and de-code a swap entry */ +#define SWP_TYPE_BITS 5 + +#define SWP_OFFSET_FIRST_BIT (_PAGE_BIT_PROTNONE + 1) + +/* We always extract/encode the offset by shifting it all the way up, and then down again */ +#define SWP_OFFSET_SHIFT (SWP_OFFSET_FIRST_BIT + SWP_TYPE_BITS) + #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5) #define __swp_type(x) (((x).val) & 0x1f) #define __swp_offset(x) ((x).val >> 5) #define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << 5}) -#define __pte_to_swp_entry(pte) ((swp_entry_t){ (pte).pte_high }) -#define __swp_entry_to_pte(x) ((pte_t){ { .pte_high = (x).val } }) + +/* + * Normally, __swp_entry() converts from arch-independent swp_entry_t to + * arch-dependent swp_entry_t, and __swp_entry_to_pte() just stores the result + * to pte. But here we have 32bit swp_entry_t and 64bit pte, and need to use the + * whole 64 bits. Thus, we shift the "real" arch-dependent conversion to + * __swp_entry_to_pte() through the following helper macro based on 64bit + * __swp_entry(). + */ +#define __swp_pteval_entry(type, offset) ((pteval_t) { \ + (~(pteval_t)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \ + | ((pteval_t)(type) << (64 - SWP_TYPE_BITS)) }) + +#define __swp_entry_to_pte(x) ((pte_t){ .pte = \ + __swp_pteval_entry(__swp_type(x), __swp_offset(x)) }) +/* + * Analogically, __pte_to_swp_entry() doesn't just extract the arch-dependent + * swp_entry_t, but also has to convert it from 64bit to the 32bit + * intermediate representation, using the following macros based on 64bit + * __swp_type() and __swp_offset(). + */ +#define __pteval_swp_type(x) ((unsigned long)((x).pte >> (64 - SWP_TYPE_BITS))) +#define __pteval_swp_offset(x) ((unsigned long)(~((x).pte) << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT)) + +#define __pte_to_swp_entry(pte) (__swp_entry(__pteval_swp_type(pte), \ + __pteval_swp_offset(pte))) #include diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 8de904926d7f..3a8e9abe0667 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -783,7 +783,7 @@ unsigned long max_swapfile_size(void) * We encode swap offsets also with 3 bits below those for pfn * which makes the usable limit higher. */ -#ifdef CONFIG_X86_64 +#if CONFIG_PGTABLE_LEVELS > 2 l1tf_limit <<= PAGE_SHIFT - SWP_OFFSET_FIRST_BIT; #endif pages = min_t(unsigned long, l1tf_limit, pages); From 09049f022a9b96b0d09d90023d4f0a097a61a767 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 27 Jun 2018 17:46:50 +0200 Subject: [PATCH 306/519] x86/speculation/l1tf: Fix up pte->pfn conversion for PAE commit e14d7dfb41f5807a0c1c26a13f2b8ef16af24935 upstream Jan has noticed that pte_pfn and co. resp. pfn_pte are incorrect for CONFIG_PAE because phys_addr_t is wider than unsigned long and so the pte_val reps. shift left would get truncated. Fix this up by using proper types. [dwmw2: Backport to 4.9] Fixes: 6b28baca9b1f ("x86/speculation/l1tf: Protect PROT_NONE PTEs against speculation") Reported-by: Jan Beulich Signed-off-by: Michal Hocko Signed-off-by: Thomas Gleixner Acked-by: Vlastimil Babka Signed-off-by: David Woodhouse Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pgtable.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 70e2248353cb..16c6886a1ece 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -154,21 +154,21 @@ static inline u64 protnone_mask(u64 val); static inline unsigned long pte_pfn(pte_t pte) { - unsigned long pfn = pte_val(pte); + phys_addr_t pfn = pte_val(pte); pfn ^= protnone_mask(pfn); return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT; } static inline unsigned long pmd_pfn(pmd_t pmd) { - unsigned long pfn = pmd_val(pmd); + phys_addr_t pfn = pmd_val(pmd); pfn ^= protnone_mask(pfn); return (pfn & pmd_pfn_mask(pmd)) >> PAGE_SHIFT; } static inline unsigned long pud_pfn(pud_t pud) { - unsigned long pfn = pud_val(pud); + phys_addr_t pfn = pud_val(pud); pfn ^= protnone_mask(pfn); return (pfn & pud_pfn_mask(pud)) >> PAGE_SHIFT; } @@ -369,7 +369,7 @@ static inline pgprotval_t massage_pgprot(pgprot_t pgprot) static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { - phys_addr_t pfn = page_nr << PAGE_SHIFT; + phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT; pfn ^= protnone_mask(pgprot_val(pgprot)); pfn &= PTE_PFN_MASK; return __pte(pfn | massage_pgprot(pgprot)); @@ -377,7 +377,7 @@ static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) { - phys_addr_t pfn = page_nr << PAGE_SHIFT; + phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT; pfn ^= protnone_mask(pgprot_val(pgprot)); pfn &= PHYSICAL_PMD_PAGE_MASK; return __pmd(pfn | massage_pgprot(pgprot)); From 0aae5fe8413dfcd949d0df1c7d6b835efecd5b3b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 7 Aug 2018 15:09:36 -0700 Subject: [PATCH 307/519] x86/speculation/l1tf: Invert all not present mappings commit f22cc87f6c1f771b57c407555cfefd811cdd9507 upstream For kernel mappings PAGE_PROTNONE is not necessarily set for a non present mapping, but the inversion logic explicitely checks for !PRESENT and PROT_NONE. Remove the PROT_NONE check and make the inversion unconditional for all not present mappings. Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pgtable-invert.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/pgtable-invert.h b/arch/x86/include/asm/pgtable-invert.h index 177564187fc0..44b1203ece12 100644 --- a/arch/x86/include/asm/pgtable-invert.h +++ b/arch/x86/include/asm/pgtable-invert.h @@ -6,7 +6,7 @@ static inline bool __pte_needs_invert(u64 val) { - return (val & (_PAGE_PRESENT|_PAGE_PROTNONE)) == _PAGE_PROTNONE; + return !(val & _PAGE_PRESENT); } /* Get a mask to xor with the page table entry to get the correct pfn. */ From 9feecdb6cb73feaa55b0135aee8777eaac848c78 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 7 Aug 2018 15:09:37 -0700 Subject: [PATCH 308/519] x86/speculation/l1tf: Make pmd/pud_mknotpresent() invert commit 0768f91530ff46683e0b372df14fd79fe8d156e5 upstream Some cases in THP like: - MADV_FREE - mprotect - split mark the PMD non present for temporarily to prevent races. The window for an L1TF attack in these contexts is very small, but it wants to be fixed for correctness sake. Use the proper low level functions for pmd/pud_mknotpresent() to address this. Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pgtable.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 16c6886a1ece..b5e157c065ae 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -315,11 +315,6 @@ static inline pmd_t pmd_mkwrite(pmd_t pmd) return pmd_set_flags(pmd, _PAGE_RW); } -static inline pmd_t pmd_mknotpresent(pmd_t pmd) -{ - return pmd_clear_flags(pmd, _PAGE_PRESENT | _PAGE_PROTNONE); -} - #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY static inline int pte_soft_dirty(pte_t pte) { @@ -383,6 +378,12 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) return __pmd(pfn | massage_pgprot(pgprot)); } +static inline pmd_t pmd_mknotpresent(pmd_t pmd) +{ + return pfn_pmd(pmd_pfn(pmd), + __pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE))); +} + static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask); static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) From 02ff2769edbce2261e981effbc3c4b98fae4faf0 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 7 Aug 2018 15:09:39 -0700 Subject: [PATCH 309/519] x86/mm/pat: Make set_memory_np() L1TF safe commit 958f79b9ee55dfaf00c8106ed1c22a2919e0028b upstream set_memory_np() is used to mark kernel mappings not present, but it has it's own open coded mechanism which does not have the L1TF protection of inverting the address bits. Replace the open coded PTE manipulation with the L1TF protecting low level PTE routines. Passes the CPA self test. Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner [ dwmw2: Pull in pud_mkhuge() from commit a00cc7d9dd, and pfn_pud() ] Signed-off-by: David Woodhouse [groeck: port to 4.4] Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pgtable.h | 27 +++++++++++++++++++++++++++ arch/x86/mm/pageattr.c | 8 ++++---- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index b5e157c065ae..4de6c282c02a 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -378,12 +378,39 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) return __pmd(pfn | massage_pgprot(pgprot)); } +static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot) +{ + phys_addr_t pfn = page_nr << PAGE_SHIFT; + pfn ^= protnone_mask(pgprot_val(pgprot)); + pfn &= PHYSICAL_PUD_PAGE_MASK; + return __pud(pfn | massage_pgprot(pgprot)); +} + static inline pmd_t pmd_mknotpresent(pmd_t pmd) { return pfn_pmd(pmd_pfn(pmd), __pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE))); } +static inline pud_t pud_set_flags(pud_t pud, pudval_t set) +{ + pudval_t v = native_pud_val(pud); + + return __pud(v | set); +} + +static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear) +{ + pudval_t v = native_pud_val(pud); + + return __pud(v & ~clear); +} + +static inline pud_t pud_mkhuge(pud_t pud) +{ + return pud_set_flags(pud, _PAGE_PSE); +} + static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask); static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 79377e2a7bcd..27610c2d1821 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1006,8 +1006,8 @@ static int populate_pmd(struct cpa_data *cpa, pmd = pmd_offset(pud, start); - set_pmd(pmd, __pmd(cpa->pfn | _PAGE_PSE | - massage_pgprot(pmd_pgprot))); + set_pmd(pmd, pmd_mkhuge(pfn_pmd(cpa->pfn, + canon_pgprot(pmd_pgprot)))); start += PMD_SIZE; cpa->pfn += PMD_SIZE; @@ -1079,8 +1079,8 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, * Map everything starting from the Gb boundary, possibly with 1G pages */ while (end - start >= PUD_SIZE) { - set_pud(pud, __pud(cpa->pfn | _PAGE_PSE | - massage_pgprot(pud_pgprot))); + set_pud(pud, pud_mkhuge(pfn_pud(cpa->pfn, + canon_pgprot(pud_pgprot)))); start += PUD_SIZE; cpa->pfn += PUD_SIZE; From 6b06f36f07e2c91ad0126f17d0fc8f933c827da8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 7 Aug 2018 15:09:38 -0700 Subject: [PATCH 310/519] x86/mm/kmmio: Make the tracer robust against L1TF commit 1063711b57393c1999248cccb57bebfaf16739e7 upstream The mmio tracer sets io mapping PTEs and PMDs to non present when enabled without inverting the address bits, which makes the PTE entry vulnerable for L1TF. Make it use the right low level macros to actually invert the address bits to protect against L1TF. In principle this could be avoided because MMIO tracing is not likely to be enabled on production machines, but the fix is straigt forward and for consistency sake it's better to get rid of the open coded PTE manipulation. Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/kmmio.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 76604c8a2a48..7bf14e74fc8f 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -125,24 +125,29 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long addr) static void clear_pmd_presence(pmd_t *pmd, bool clear, pmdval_t *old) { + pmd_t new_pmd; pmdval_t v = pmd_val(*pmd); if (clear) { - *old = v & _PAGE_PRESENT; - v &= ~_PAGE_PRESENT; - } else /* presume this has been called with clear==true previously */ - v |= *old; - set_pmd(pmd, __pmd(v)); + *old = v; + new_pmd = pmd_mknotpresent(*pmd); + } else { + /* Presume this has been called with clear==true previously */ + new_pmd = __pmd(*old); + } + set_pmd(pmd, new_pmd); } static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old) { pteval_t v = pte_val(*pte); if (clear) { - *old = v & _PAGE_PRESENT; - v &= ~_PAGE_PRESENT; - } else /* presume this has been called with clear==true previously */ - v |= *old; - set_pte_atomic(pte, __pte(v)); + *old = v; + /* Nothing should care about address */ + pte_clear(&init_mm, 0, pte); + } else { + /* Presume this has been called with clear==true previously */ + set_pte_atomic(pte, __pte(*old)); + } } static int clear_page_presence(struct kmmio_fault_page *f, bool clear) From eb993211b9d7856a9ab8c487c701c84103842713 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 13 Aug 2018 10:15:16 -0700 Subject: [PATCH 311/519] x86/speculation/l1tf: Fix up CPU feature flags In linux-4.4.y, the definition of X86_FEATURE_RETPOLINE and X86_FEATURE_RETPOLINE_AMD is different from the upstream definition. Result is an overlap with the newly introduced X86_FEATURE_L1TF_PTEINV. Update RETPOLINE definitions to match upstream definitions to improve alignment with upstream code. Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 123a7105db1b..dd2269dcbc47 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -193,12 +193,12 @@ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ +#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ + #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ -#define X86_FEATURE_RETPOLINE ( 7*32+29) /* "" Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_AMD ( 7*32+30) /* "" AMD Retpoline mitigation for Spectre variant 2 */ - #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ #define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ From 4b90ff885c6cc88795b678414aaf5d7b0153a5dc Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 14 Aug 2018 20:50:47 +0200 Subject: [PATCH 312/519] x86/init: fix build with CONFIG_SWAP=n commit 792adb90fa724ce07c0171cbc96b9215af4b1045 upstream. The introduction of generic_max_swapfile_size and arch-specific versions has broken linking on x86 with CONFIG_SWAP=n due to undefined reference to 'generic_max_swapfile_size'. Fix it by compiling the x86-specific max_swapfile_size() only with CONFIG_SWAP=y. Reported-by: Tomas Pruzina Fixes: 377eeaa8e11f ("x86/speculation/l1tf: Limit swap file size to MAX_PA/2") Signed-off-by: Vlastimil Babka Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/init.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 3a8e9abe0667..4954a6cef50a 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -770,6 +770,7 @@ void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache) __pte2cachemode_tbl[entry] = cache; } +#ifdef CONFIG_SWAP unsigned long max_swapfile_size(void) { unsigned long pages; @@ -790,3 +791,4 @@ unsigned long max_swapfile_size(void) } return pages; } +#endif From 8f2adf3d2118cc0822b83a7bb43475f9149a1d26 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Sat, 14 Jul 2018 21:56:13 +0200 Subject: [PATCH 313/519] x86/speculation/l1tf: Unbreak !__HAVE_ARCH_PFN_MODIFY_ALLOWED architectures commit 6c26fcd2abfe0a56bbd95271fce02df2896cfd24 upstream. pfn_modify_allowed() and arch_has_pfn_modify_check() are outside of the !__ASSEMBLY__ section in include/asm-generic/pgtable.h, which confuses assembler on archs that don't have __HAVE_ARCH_PFN_MODIFY_ALLOWED (e.g. ia64) and breaks build: include/asm-generic/pgtable.h: Assembler messages: include/asm-generic/pgtable.h:538: Error: Unknown opcode `static inline bool pfn_modify_allowed(unsigned long pfn,pgprot_t prot)' include/asm-generic/pgtable.h:540: Error: Unknown opcode `return true' include/asm-generic/pgtable.h:543: Error: Unknown opcode `static inline bool arch_has_pfn_modify_check(void)' include/asm-generic/pgtable.h:545: Error: Unknown opcode `return false' arch/ia64/kernel/entry.S:69: Error: `mov' does not fit into bundle Move those two static inlines into the !__ASSEMBLY__ section so that they don't confuse the asm build pass. Fixes: 42e4089c7890 ("x86/speculation/l1tf: Disallow non privileged high MMIO PROT_NONE mappings") Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman [groeck: Context changes] Signed-off-by: Guenter Roeck --- include/asm-generic/pgtable.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 976f749099ef..dabecb661264 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -799,12 +799,6 @@ static inline int pmd_free_pte_page(pmd_t *pmd) } #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ -#endif /* !__ASSEMBLY__ */ - -#ifndef io_remap_pfn_range -#define io_remap_pfn_range remap_pfn_range -#endif - #ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot) { @@ -815,6 +809,12 @@ static inline bool arch_has_pfn_modify_check(void) { return false; } +#endif /* !_HAVE_ARCH_PFN_MODIFY_ALLOWED */ + +#endif /* !__ASSEMBLY__ */ + +#ifndef io_remap_pfn_range +#define io_remap_pfn_range remap_pfn_range #endif #endif /* _ASM_GENERIC_PGTABLE_H */ From 30a97c1e2dc39f45d9deeeccc2733278fc285d5e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 15 Aug 2018 17:42:11 +0200 Subject: [PATCH 314/519] Linux 4.4.148 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ee92a12e3a4b..9b795164122e 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 147 +SUBLEVEL = 148 EXTRAVERSION = NAME = Blurry Fish Butt From 438604aa025a449e5c994ba4c824cc7267b8ccd2 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Wed, 27 Jun 2018 08:13:46 -0600 Subject: [PATCH 315/519] x86/mm: Disable ioremap free page handling on x86-PAE commit f967db0b9ed44ec3057a28f3b28efc51df51b835 upstream. ioremap() supports pmd mappings on x86-PAE. However, kernel's pmd tables are not shared among processes on x86-PAE. Therefore, any update to sync'd pmd entries need re-syncing. Freeing a pte page also leads to a vmalloc fault and hits the BUG_ON in vmalloc_sync_one(). Disable free page handling on x86-PAE. pud_free_pmd_page() and pmd_free_pte_page() simply return 0 if a given pud/pmd entry is present. This assures that ioremap() does not update sync'd pmd entries at the cost of falling back to pte mappings. Fixes: 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces") Reported-by: Joerg Roedel Signed-off-by: Toshi Kani Signed-off-by: Thomas Gleixner Cc: mhocko@suse.com Cc: akpm@linux-foundation.org Cc: hpa@zytor.com Cc: cpandya@codeaurora.org Cc: linux-mm@kvack.org Cc: linux-arm-kernel@lists.infradead.org Cc: stable@vger.kernel.org Cc: Andrew Morton Cc: Michal Hocko Cc: "H. Peter Anvin" Cc: Link: https://lkml.kernel.org/r/20180627141348.21777-2-toshi.kani@hpe.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/pgtable.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 08e94b6139ab..8e0378a00d50 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -676,6 +676,7 @@ int pmd_clear_huge(pmd_t *pmd) return 0; } +#ifdef CONFIG_X86_64 /** * pud_free_pmd_page - Clear pud entry and free pmd page. * @pud: Pointer to a PUD. @@ -723,4 +724,22 @@ int pmd_free_pte_page(pmd_t *pmd) return 1; } + +#else /* !CONFIG_X86_64 */ + +int pud_free_pmd_page(pud_t *pud) +{ + return pud_none(*pud); +} + +/* + * Disable free page handling on x86-PAE. This assures that ioremap() + * does not update sync'd pmd entries. See vmalloc_sync_one(). + */ +int pmd_free_pte_page(pmd_t *pmd) +{ + return pmd_none(*pmd); +} + +#endif /* CONFIG_X86_64 */ #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ From 42962538cd9fe281a6e8602f22c7b1e218ed812a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 15 Aug 2018 11:58:46 +0200 Subject: [PATCH 316/519] tcp: Fix missing range_truesize enlargement in the backport The 4.4.y stable backport dc6ae4dffd65 for the upstream commit 3d4bf93ac120 ("tcp: detect malicious patterns in tcp_collapse_ofo_queue()") missed a line that enlarges the range_truesize value, which broke the whole check. Fixes: dc6ae4dffd65 ("tcp: detect malicious patterns in tcp_collapse_ofo_queue()") Signed-off-by: Takashi Iwai Cc: Michal Kubecek --- net/ipv4/tcp_input.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 4a261e078082..9c4c6cd0316e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4835,6 +4835,7 @@ static void tcp_collapse_ofo_queue(struct sock *sk) end = TCP_SKB_CB(skb)->end_seq; range_truesize = skb->truesize; } else { + range_truesize += skb->truesize; if (before(TCP_SKB_CB(skb)->seq, start)) start = TCP_SKB_CB(skb)->seq; if (after(TCP_SKB_CB(skb)->end_seq, end)) From dcb852a7db98fb702878e811425063cd00e688b2 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 6 Feb 2018 15:36:00 -0800 Subject: [PATCH 317/519] kasan: don't emit builtin calls when sanitization is off commit 0e410e158e5baa1300bdf678cea4f4e0cf9d8b94 upstream. With KASAN enabled the kernel has two different memset() functions, one with KASAN checks (memset) and one without (__memset). KASAN uses some macro tricks to use the proper version where required. For example memset() calls in mm/slub.c are without KASAN checks, since they operate on poisoned slab object metadata. The issue is that clang emits memset() calls even when there is no memset() in the source code. They get linked with improper memset() implementation and the kernel fails to boot due to a huge amount of KASAN reports during early boot stages. The solution is to add -fno-builtin flag for files with KASAN_SANITIZE := n marker. Link: http://lkml.kernel.org/r/8ffecfffe04088c52c42b92739c2bd8a0bcb3f5e.1516384594.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Nick Desaulniers Cc: Masahiro Yamada Cc: Michal Marek Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds [ Nick : Backported to 4.4 avoiding KUBSAN ] Signed-off-by: Nick Desaulniers Signed-off-by: Greg Kroah-Hartman --- Makefile | 3 ++- scripts/Makefile.kasan | 3 +++ scripts/Makefile.lib | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 9b795164122e..cacb45c3173a 100644 --- a/Makefile +++ b/Makefile @@ -418,7 +418,8 @@ export MAKE AWK GENKSYMS INSTALLKERNEL PERL PYTHON UTS_MACHINE export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS -export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_GCOV CFLAGS_KASAN +export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_GCOV +export CFLAGS_KASAN CFLAGS_KASAN_NOSANITIZE export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan index 37323b0df374..2624d4bf9a45 100644 --- a/scripts/Makefile.kasan +++ b/scripts/Makefile.kasan @@ -28,4 +28,7 @@ else CFLAGS_KASAN := $(CFLAGS_KASAN_MINIMAL) endif endif + +CFLAGS_KASAN_NOSANITIZE := -fno-builtin + endif diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 24914e7de944..a2d0e6d32659 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -126,7 +126,7 @@ endif ifeq ($(CONFIG_KASAN),y) _c_flags += $(if $(patsubst n%,, \ $(KASAN_SANITIZE_$(basetarget).o)$(KASAN_SANITIZE)y), \ - $(CFLAGS_KASAN)) + $(CFLAGS_KASAN), $(CFLAGS_KASAN_NOSANITIZE)) endif # If building the kernel in a separate objtree expand all occurrences From 3abc229cfc033612acf27a9efbe9a44ea9004592 Mon Sep 17 00:00:00 2001 From: Liwei Song Date: Tue, 13 Jun 2017 00:59:53 -0400 Subject: [PATCH 318/519] i2c: ismt: fix wrong device address when unmap the data buffer commit 17e83549e199d89aace7788a9f11c108671eecf5 upstream. Fix the following kernel bug: kernel BUG at drivers/iommu/intel-iommu.c:3260! invalid opcode: 0000 [#5] PREEMPT SMP Hardware name: Intel Corp. Harcuvar/Server, BIOS HAVLCRB0.X64.0013.D39.1608311820 08/31/2016 task: ffff880175389950 ti: ffff880176bec000 task.ti: ffff880176bec000 RIP: 0010:[] [] intel_unmap+0x25b/0x260 RSP: 0018:ffff880176bef5e8 EFLAGS: 00010296 RAX: 0000000000000024 RBX: ffff8800773c7c88 RCX: 000000000000ce04 RDX: 0000000080000000 RSI: 0000000000000000 RDI: 0000000000000009 RBP: ffff880176bef638 R08: 0000000000000010 R09: 0000000000000004 R10: ffff880175389c78 R11: 0000000000000a4f R12: ffff8800773c7868 R13: 00000000ffffac88 R14: ffff8800773c7818 R15: 0000000000000001 FS: 00007fef21258700(0000) GS:ffff88017b5c0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000066d6d8 CR3: 000000007118c000 CR4: 00000000003406e0 Stack: 00000000ffffac88 ffffffff8199867f ffff880176bef5f8 ffff880100000030 ffff880176bef668 ffff8800773c7c88 ffff880178288098 ffff8800772c0010 ffff8800773c7818 0000000000000001 ffff880176bef648 ffffffff8150a86e Call Trace: [] ? printk+0x46/0x48 [] intel_unmap_page+0xe/0x10 [] ismt_access+0x27b/0x8fa [i2c_ismt] [] ? __pm_runtime_suspend+0xa0/0xa0 [] ? pm_suspend_timer_fn+0x80/0x80 [] ? __pm_runtime_suspend+0xa0/0xa0 [] ? pm_suspend_timer_fn+0x80/0x80 [] ? pci_bus_read_dev_vendor_id+0xf0/0xf0 [] i2c_smbus_xfer+0xec/0x4b0 [] ? vprintk_emit+0x345/0x530 [] i2cdev_ioctl_smbus+0x12b/0x240 [i2c_dev] [] ? vprintk_default+0x29/0x40 [] i2cdev_ioctl+0x63/0x1ec [i2c_dev] [] do_vfs_ioctl+0x328/0x5d0 [] ? vfs_write+0x11c/0x190 [] ? rt_up_read+0x19/0x20 [] SyS_ioctl+0x81/0xa0 [] system_call_fastpath+0x16/0x6e This happen When run "i2cdetect -y 0" detect SMBus iSMT adapter. After finished I2C block read/write, when unmap the data buffer, a wrong device address was pass to dma_unmap_single(). To fix this, give dma_unmap_single() the "dev" parameter, just like what dma_map_single() does, then unmap can find the right devices. Fixes: 13f35ac14cd0 ("i2c: Adding support for Intel iSMT SMBus 2.0 host controller") Signed-off-by: Liwei Song Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-ismt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c index 1111cb966a44..fa2b58142cde 100644 --- a/drivers/i2c/busses/i2c-ismt.c +++ b/drivers/i2c/busses/i2c-ismt.c @@ -587,7 +587,7 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr, /* unmap the data buffer */ if (dma_size != 0) - dma_unmap_single(&adap->dev, dma_addr, dma_size, dma_direction); + dma_unmap_single(dev, dma_addr, dma_size, dma_direction); if (unlikely(!time_left)) { dev_err(dev, "completion wait timed out\n"); From 293cce718b133959f04b94148326334c9bfab5ad Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 1 Jul 2018 19:46:06 -0700 Subject: [PATCH 319/519] kbuild: verify that $DEPMOD is installed commit 934193a654c1f4d0643ddbf4b2529b508cae926e upstream. Verify that 'depmod' ($DEPMOD) is installed. This is a partial revert of commit 620c231c7a7f ("kbuild: do not check for ancient modutils tools"). Also update Documentation/process/changes.rst to refer to kmod instead of module-init-tools. Fixes kernel bugzilla #198965: https://bugzilla.kernel.org/show_bug.cgi?id=198965 Signed-off-by: Randy Dunlap Cc: Lucas De Marchi Cc: Lucas De Marchi Cc: Michal Marek Cc: Jessica Yu Cc: Chih-Wei Huang Cc: stable@vger.kernel.org # any kernel since 2012 Signed-off-by: Masahiro Yamada Signed-off-by: Greg Kroah-Hartman --- Documentation/Changes | 17 ++++++----------- scripts/depmod.sh | 8 +++++++- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/Documentation/Changes b/Documentation/Changes index ec97b77c8b00..f25649ffb892 100644 --- a/Documentation/Changes +++ b/Documentation/Changes @@ -25,7 +25,7 @@ o GNU C 3.2 # gcc --version o GNU make 3.80 # make --version o binutils 2.12 # ld -v o util-linux 2.10o # fdformat --version -o module-init-tools 0.9.10 # depmod -V +o kmod 13 # depmod -V o e2fsprogs 1.41.4 # e2fsck -V o jfsutils 1.1.3 # fsck.jfs -V o reiserfsprogs 3.6.3 # reiserfsck -V @@ -132,12 +132,6 @@ is not build with CONFIG_KALLSYMS and you have no way to rebuild and reproduce the Oops with that option, then you can still decode that Oops with ksymoops. -Module-Init-Tools ------------------ - -A new module loader is now in the kernel that requires module-init-tools -to use. It is backward compatible with the 2.4.x series kernels. - Mkinitrd -------- @@ -319,14 +313,15 @@ Util-linux ---------- o +Kmod +---- +o +o + Ksymoops -------- o -Module-Init-Tools ------------------ -o - Mkinitrd -------- o diff --git a/scripts/depmod.sh b/scripts/depmod.sh index 122599b1c13b..ea1e96921e3b 100755 --- a/scripts/depmod.sh +++ b/scripts/depmod.sh @@ -10,10 +10,16 @@ DEPMOD=$1 KERNELRELEASE=$2 SYMBOL_PREFIX=$3 -if ! test -r System.map -a -x "$DEPMOD"; then +if ! test -r System.map ; then exit 0 fi +if [ -z $(command -v $DEPMOD) ]; then + echo "'make modules_install' requires $DEPMOD. Please install it." >&2 + echo "This is probably in the kmod package." >&2 + exit 1 +fi + # older versions of depmod don't support -P # support was added in module-init-tools 3.13 if test -n "$SYMBOL_PREFIX"; then From 9054a54766f7cd3e33086c47701ad19eb1d0d404 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 18 Jun 2018 10:22:37 -0700 Subject: [PATCH 320/519] crypto: vmac - require a block cipher with 128-bit block size commit 73bf20ef3df262026c3470241ae4ac8196943ffa upstream. The VMAC template assumes the block cipher has a 128-bit block size, but it failed to check for that. Thus it was possible to instantiate it using a 64-bit block size cipher, e.g. "vmac(cast5)", causing uninitialized memory to be used. Add the needed check when instantiating the template. Fixes: f1939f7c5645 ("crypto: vmac - New hash algorithm for intel_txt support") Cc: # v2.6.32+ Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/vmac.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crypto/vmac.c b/crypto/vmac.c index df76a816cfb2..3034454a3713 100644 --- a/crypto/vmac.c +++ b/crypto/vmac.c @@ -655,6 +655,10 @@ static int vmac_create(struct crypto_template *tmpl, struct rtattr **tb) if (IS_ERR(alg)) return PTR_ERR(alg); + err = -EINVAL; + if (alg->cra_blocksize != 16) + goto out_put_alg; + inst = shash_alloc_instance("vmac", alg); err = PTR_ERR(inst); if (IS_ERR(inst)) From 335e988310f9bf17b94001945f0c6985e54c88b4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 18 Jun 2018 10:22:38 -0700 Subject: [PATCH 321/519] crypto: vmac - separate tfm and request context commit bb29648102335586e9a66289a1d98a0cb392b6e5 upstream. syzbot reported a crash in vmac_final() when multiple threads concurrently use the same "vmac(aes)" transform through AF_ALG. The bug is pretty fundamental: the VMAC template doesn't separate per-request state from per-tfm (per-key) state like the other hash algorithms do, but rather stores it all in the tfm context. That's wrong. Also, vmac_final() incorrectly zeroes most of the state including the derived keys and cached pseudorandom pad. Therefore, only the first VMAC invocation with a given key calculates the correct digest. Fix these bugs by splitting the per-tfm state from the per-request state and using the proper init/update/final sequencing for requests. Reproducer for the crash: #include #include #include int main() { int fd; struct sockaddr_alg addr = { .salg_type = "hash", .salg_name = "vmac(aes)", }; char buf[256] = { 0 }; fd = socket(AF_ALG, SOCK_SEQPACKET, 0); bind(fd, (void *)&addr, sizeof(addr)); setsockopt(fd, SOL_ALG, ALG_SET_KEY, buf, 16); fork(); fd = accept(fd, NULL, NULL); for (;;) write(fd, buf, 256); } The immediate cause of the crash is that vmac_ctx_t.partial_size exceeds VMAC_NHBYTES, causing vmac_final() to memset() a negative length. Reported-by: syzbot+264bca3a6e8d645550d3@syzkaller.appspotmail.com Fixes: f1939f7c5645 ("crypto: vmac - New hash algorithm for intel_txt support") Cc: # v2.6.32+ Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/vmac.c | 428 +++++++++++++++++++----------------------- include/crypto/vmac.h | 63 ------- 2 files changed, 191 insertions(+), 300 deletions(-) delete mode 100644 include/crypto/vmac.h diff --git a/crypto/vmac.c b/crypto/vmac.c index 3034454a3713..bb2fc787d615 100644 --- a/crypto/vmac.c +++ b/crypto/vmac.c @@ -1,6 +1,10 @@ /* - * Modified to interface to the Linux kernel + * VMAC: Message Authentication Code using Universal Hashing + * + * Reference: https://tools.ietf.org/html/draft-krovetz-vmac-01 + * * Copyright (c) 2009, Intel Corporation. + * Copyright (c) 2018, Google Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -16,14 +20,15 @@ * Place - Suite 330, Boston, MA 02111-1307 USA. */ -/* -------------------------------------------------------------------------- - * VMAC and VHASH Implementation by Ted Krovetz (tdk@acm.org) and Wei Dai. - * This implementation is herby placed in the public domain. - * The authors offers no warranty. Use at your own risk. - * Please send bug reports to the authors. - * Last modified: 17 APR 08, 1700 PDT - * ----------------------------------------------------------------------- */ +/* + * Derived from: + * VMAC and VHASH Implementation by Ted Krovetz (tdk@acm.org) and Wei Dai. + * This implementation is herby placed in the public domain. + * The authors offers no warranty. Use at your own risk. + * Last modified: 17 APR 08, 1700 PDT + */ +#include #include #include #include @@ -31,9 +36,35 @@ #include #include #include -#include #include +/* + * User definable settings. + */ +#define VMAC_TAG_LEN 64 +#define VMAC_KEY_SIZE 128/* Must be 128, 192 or 256 */ +#define VMAC_KEY_LEN (VMAC_KEY_SIZE/8) +#define VMAC_NHBYTES 128/* Must 2^i for any 3 < i < 13 Standard = 128*/ + +/* per-transform (per-key) context */ +struct vmac_tfm_ctx { + struct crypto_cipher *cipher; + u64 nhkey[(VMAC_NHBYTES/8)+2*(VMAC_TAG_LEN/64-1)]; + u64 polykey[2*VMAC_TAG_LEN/64]; + u64 l3key[2*VMAC_TAG_LEN/64]; +}; + +/* per-request context */ +struct vmac_desc_ctx { + union { + u8 partial[VMAC_NHBYTES]; /* partial block */ + __le64 partial_words[VMAC_NHBYTES / 8]; + }; + unsigned int partial_size; /* size of the partial block */ + bool first_block_processed; + u64 polytmp[2*VMAC_TAG_LEN/64]; /* running total of L2-hash */ +}; + /* * Constants and masks */ @@ -318,13 +349,6 @@ static void poly_step_func(u64 *ahi, u64 *alo, } while (0) #endif -static void vhash_abort(struct vmac_ctx *ctx) -{ - ctx->polytmp[0] = ctx->polykey[0] ; - ctx->polytmp[1] = ctx->polykey[1] ; - ctx->first_block_processed = 0; -} - static u64 l3hash(u64 p1, u64 p2, u64 k1, u64 k2, u64 len) { u64 rh, rl, t, z = 0; @@ -364,280 +388,209 @@ static u64 l3hash(u64 p1, u64 p2, u64 k1, u64 k2, u64 len) return rl; } -static void vhash_update(const unsigned char *m, - unsigned int mbytes, /* Pos multiple of VMAC_NHBYTES */ - struct vmac_ctx *ctx) +/* L1 and L2-hash one or more VMAC_NHBYTES-byte blocks */ +static void vhash_blocks(const struct vmac_tfm_ctx *tctx, + struct vmac_desc_ctx *dctx, + const __le64 *mptr, unsigned int blocks) { - u64 rh, rl, *mptr; - const u64 *kptr = (u64 *)ctx->nhkey; - int i; - u64 ch, cl; - u64 pkh = ctx->polykey[0]; - u64 pkl = ctx->polykey[1]; + const u64 *kptr = tctx->nhkey; + const u64 pkh = tctx->polykey[0]; + const u64 pkl = tctx->polykey[1]; + u64 ch = dctx->polytmp[0]; + u64 cl = dctx->polytmp[1]; + u64 rh, rl; - if (!mbytes) - return; - - BUG_ON(mbytes % VMAC_NHBYTES); - - mptr = (u64 *)m; - i = mbytes / VMAC_NHBYTES; /* Must be non-zero */ - - ch = ctx->polytmp[0]; - cl = ctx->polytmp[1]; - - if (!ctx->first_block_processed) { - ctx->first_block_processed = 1; + if (!dctx->first_block_processed) { + dctx->first_block_processed = true; nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, rh, rl); rh &= m62; ADD128(ch, cl, rh, rl); mptr += (VMAC_NHBYTES/sizeof(u64)); - i--; + blocks--; } - while (i--) { + while (blocks--) { nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, rh, rl); rh &= m62; poly_step(ch, cl, pkh, pkl, rh, rl); mptr += (VMAC_NHBYTES/sizeof(u64)); } - ctx->polytmp[0] = ch; - ctx->polytmp[1] = cl; + dctx->polytmp[0] = ch; + dctx->polytmp[1] = cl; } -static u64 vhash(unsigned char m[], unsigned int mbytes, - u64 *tagl, struct vmac_ctx *ctx) +static int vmac_setkey(struct crypto_shash *tfm, + const u8 *key, unsigned int keylen) { - u64 rh, rl, *mptr; - const u64 *kptr = (u64 *)ctx->nhkey; - int i, remaining; - u64 ch, cl; - u64 pkh = ctx->polykey[0]; - u64 pkl = ctx->polykey[1]; + struct vmac_tfm_ctx *tctx = crypto_shash_ctx(tfm); + __be64 out[2]; + u8 in[16] = { 0 }; + unsigned int i; + int err; - mptr = (u64 *)m; - i = mbytes / VMAC_NHBYTES; - remaining = mbytes % VMAC_NHBYTES; - - if (ctx->first_block_processed) { - ch = ctx->polytmp[0]; - cl = ctx->polytmp[1]; - } else if (i) { - nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, ch, cl); - ch &= m62; - ADD128(ch, cl, pkh, pkl); - mptr += (VMAC_NHBYTES/sizeof(u64)); - i--; - } else if (remaining) { - nh_16(mptr, kptr, 2*((remaining+15)/16), ch, cl); - ch &= m62; - ADD128(ch, cl, pkh, pkl); - mptr += (VMAC_NHBYTES/sizeof(u64)); - goto do_l3; - } else {/* Empty String */ - ch = pkh; cl = pkl; - goto do_l3; + if (keylen != VMAC_KEY_LEN) { + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; } - while (i--) { - nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, rh, rl); - rh &= m62; - poly_step(ch, cl, pkh, pkl, rh, rl); - mptr += (VMAC_NHBYTES/sizeof(u64)); - } - if (remaining) { - nh_16(mptr, kptr, 2*((remaining+15)/16), rh, rl); - rh &= m62; - poly_step(ch, cl, pkh, pkl, rh, rl); - } - -do_l3: - vhash_abort(ctx); - remaining *= 8; - return l3hash(ch, cl, ctx->l3key[0], ctx->l3key[1], remaining); -} - -static u64 vmac(unsigned char m[], unsigned int mbytes, - const unsigned char n[16], u64 *tagl, - struct vmac_ctx_t *ctx) -{ - u64 *in_n, *out_p; - u64 p, h; - int i; - - in_n = ctx->__vmac_ctx.cached_nonce; - out_p = ctx->__vmac_ctx.cached_aes; - - i = n[15] & 1; - if ((*(u64 *)(n+8) != in_n[1]) || (*(u64 *)(n) != in_n[0])) { - in_n[0] = *(u64 *)(n); - in_n[1] = *(u64 *)(n+8); - ((unsigned char *)in_n)[15] &= 0xFE; - crypto_cipher_encrypt_one(ctx->child, - (unsigned char *)out_p, (unsigned char *)in_n); - - ((unsigned char *)in_n)[15] |= (unsigned char)(1-i); - } - p = be64_to_cpup(out_p + i); - h = vhash(m, mbytes, (u64 *)0, &ctx->__vmac_ctx); - return le64_to_cpu(p + h); -} - -static int vmac_set_key(unsigned char user_key[], struct vmac_ctx_t *ctx) -{ - u64 in[2] = {0}, out[2]; - unsigned i; - int err = 0; - - err = crypto_cipher_setkey(ctx->child, user_key, VMAC_KEY_LEN); + err = crypto_cipher_setkey(tctx->cipher, key, keylen); if (err) return err; /* Fill nh key */ - ((unsigned char *)in)[0] = 0x80; - for (i = 0; i < sizeof(ctx->__vmac_ctx.nhkey)/8; i += 2) { - crypto_cipher_encrypt_one(ctx->child, - (unsigned char *)out, (unsigned char *)in); - ctx->__vmac_ctx.nhkey[i] = be64_to_cpup(out); - ctx->__vmac_ctx.nhkey[i+1] = be64_to_cpup(out+1); - ((unsigned char *)in)[15] += 1; + in[0] = 0x80; + for (i = 0; i < ARRAY_SIZE(tctx->nhkey); i += 2) { + crypto_cipher_encrypt_one(tctx->cipher, (u8 *)out, in); + tctx->nhkey[i] = be64_to_cpu(out[0]); + tctx->nhkey[i+1] = be64_to_cpu(out[1]); + in[15]++; } /* Fill poly key */ - ((unsigned char *)in)[0] = 0xC0; - in[1] = 0; - for (i = 0; i < sizeof(ctx->__vmac_ctx.polykey)/8; i += 2) { - crypto_cipher_encrypt_one(ctx->child, - (unsigned char *)out, (unsigned char *)in); - ctx->__vmac_ctx.polytmp[i] = - ctx->__vmac_ctx.polykey[i] = - be64_to_cpup(out) & mpoly; - ctx->__vmac_ctx.polytmp[i+1] = - ctx->__vmac_ctx.polykey[i+1] = - be64_to_cpup(out+1) & mpoly; - ((unsigned char *)in)[15] += 1; + in[0] = 0xC0; + in[15] = 0; + for (i = 0; i < ARRAY_SIZE(tctx->polykey); i += 2) { + crypto_cipher_encrypt_one(tctx->cipher, (u8 *)out, in); + tctx->polykey[i] = be64_to_cpu(out[0]) & mpoly; + tctx->polykey[i+1] = be64_to_cpu(out[1]) & mpoly; + in[15]++; } /* Fill ip key */ - ((unsigned char *)in)[0] = 0xE0; - in[1] = 0; - for (i = 0; i < sizeof(ctx->__vmac_ctx.l3key)/8; i += 2) { + in[0] = 0xE0; + in[15] = 0; + for (i = 0; i < ARRAY_SIZE(tctx->l3key); i += 2) { do { - crypto_cipher_encrypt_one(ctx->child, - (unsigned char *)out, (unsigned char *)in); - ctx->__vmac_ctx.l3key[i] = be64_to_cpup(out); - ctx->__vmac_ctx.l3key[i+1] = be64_to_cpup(out+1); - ((unsigned char *)in)[15] += 1; - } while (ctx->__vmac_ctx.l3key[i] >= p64 - || ctx->__vmac_ctx.l3key[i+1] >= p64); + crypto_cipher_encrypt_one(tctx->cipher, (u8 *)out, in); + tctx->l3key[i] = be64_to_cpu(out[0]); + tctx->l3key[i+1] = be64_to_cpu(out[1]); + in[15]++; + } while (tctx->l3key[i] >= p64 || tctx->l3key[i+1] >= p64); } - /* Invalidate nonce/aes cache and reset other elements */ - ctx->__vmac_ctx.cached_nonce[0] = (u64)-1; /* Ensure illegal nonce */ - ctx->__vmac_ctx.cached_nonce[1] = (u64)0; /* Ensure illegal nonce */ - ctx->__vmac_ctx.first_block_processed = 0; - - return err; -} - -static int vmac_setkey(struct crypto_shash *parent, - const u8 *key, unsigned int keylen) -{ - struct vmac_ctx_t *ctx = crypto_shash_ctx(parent); - - if (keylen != VMAC_KEY_LEN) { - crypto_shash_set_flags(parent, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - - return vmac_set_key((u8 *)key, ctx); -} - -static int vmac_init(struct shash_desc *pdesc) -{ - return 0; -} - -static int vmac_update(struct shash_desc *pdesc, const u8 *p, - unsigned int len) -{ - struct crypto_shash *parent = pdesc->tfm; - struct vmac_ctx_t *ctx = crypto_shash_ctx(parent); - int expand; - int min; - - expand = VMAC_NHBYTES - ctx->partial_size > 0 ? - VMAC_NHBYTES - ctx->partial_size : 0; - - min = len < expand ? len : expand; - - memcpy(ctx->partial + ctx->partial_size, p, min); - ctx->partial_size += min; - - if (len < expand) - return 0; - - vhash_update(ctx->partial, VMAC_NHBYTES, &ctx->__vmac_ctx); - ctx->partial_size = 0; - - len -= expand; - p += expand; - - if (len % VMAC_NHBYTES) { - memcpy(ctx->partial, p + len - (len % VMAC_NHBYTES), - len % VMAC_NHBYTES); - ctx->partial_size = len % VMAC_NHBYTES; - } - - vhash_update(p, len - len % VMAC_NHBYTES, &ctx->__vmac_ctx); - return 0; } -static int vmac_final(struct shash_desc *pdesc, u8 *out) +static int vmac_init(struct shash_desc *desc) { - struct crypto_shash *parent = pdesc->tfm; - struct vmac_ctx_t *ctx = crypto_shash_ctx(parent); - vmac_t mac; - u8 nonce[16] = {}; + const struct vmac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); + struct vmac_desc_ctx *dctx = shash_desc_ctx(desc); - /* vmac() ends up accessing outside the array bounds that - * we specify. In appears to access up to the next 2-word - * boundary. We'll just be uber cautious and zero the - * unwritten bytes in the buffer. - */ - if (ctx->partial_size) { - memset(ctx->partial + ctx->partial_size, 0, - VMAC_NHBYTES - ctx->partial_size); + dctx->partial_size = 0; + dctx->first_block_processed = false; + memcpy(dctx->polytmp, tctx->polykey, sizeof(dctx->polytmp)); + return 0; +} + +static int vmac_update(struct shash_desc *desc, const u8 *p, unsigned int len) +{ + const struct vmac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); + struct vmac_desc_ctx *dctx = shash_desc_ctx(desc); + unsigned int n; + + if (dctx->partial_size) { + n = min(len, VMAC_NHBYTES - dctx->partial_size); + memcpy(&dctx->partial[dctx->partial_size], p, n); + dctx->partial_size += n; + p += n; + len -= n; + if (dctx->partial_size == VMAC_NHBYTES) { + vhash_blocks(tctx, dctx, dctx->partial_words, 1); + dctx->partial_size = 0; + } } - mac = vmac(ctx->partial, ctx->partial_size, nonce, NULL, ctx); - memcpy(out, &mac, sizeof(vmac_t)); - memzero_explicit(&mac, sizeof(vmac_t)); - memset(&ctx->__vmac_ctx, 0, sizeof(struct vmac_ctx)); - ctx->partial_size = 0; + + if (len >= VMAC_NHBYTES) { + n = round_down(len, VMAC_NHBYTES); + /* TODO: 'p' may be misaligned here */ + vhash_blocks(tctx, dctx, (const __le64 *)p, n / VMAC_NHBYTES); + p += n; + len -= n; + } + + if (len) { + memcpy(dctx->partial, p, len); + dctx->partial_size = len; + } + + return 0; +} + +static u64 vhash_final(const struct vmac_tfm_ctx *tctx, + struct vmac_desc_ctx *dctx) +{ + unsigned int partial = dctx->partial_size; + u64 ch = dctx->polytmp[0]; + u64 cl = dctx->polytmp[1]; + + /* L1 and L2-hash the final block if needed */ + if (partial) { + /* Zero-pad to next 128-bit boundary */ + unsigned int n = round_up(partial, 16); + u64 rh, rl; + + memset(&dctx->partial[partial], 0, n - partial); + nh_16(dctx->partial_words, tctx->nhkey, n / 8, rh, rl); + rh &= m62; + if (dctx->first_block_processed) + poly_step(ch, cl, tctx->polykey[0], tctx->polykey[1], + rh, rl); + else + ADD128(ch, cl, rh, rl); + } + + /* L3-hash the 128-bit output of L2-hash */ + return l3hash(ch, cl, tctx->l3key[0], tctx->l3key[1], partial * 8); +} + +static int vmac_final(struct shash_desc *desc, u8 *out) +{ + const struct vmac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); + struct vmac_desc_ctx *dctx = shash_desc_ctx(desc); + static const u8 nonce[16] = {}; /* TODO: this is insecure */ + union { + u8 bytes[16]; + __be64 pads[2]; + } block; + int index; + u64 hash, pad; + + /* Finish calculating the VHASH of the message */ + hash = vhash_final(tctx, dctx); + + /* Generate pseudorandom pad by encrypting the nonce */ + memcpy(&block, nonce, 16); + index = block.bytes[15] & 1; + block.bytes[15] &= ~1; + crypto_cipher_encrypt_one(tctx->cipher, block.bytes, block.bytes); + pad = be64_to_cpu(block.pads[index]); + + /* The VMAC is the sum of VHASH and the pseudorandom pad */ + put_unaligned_le64(hash + pad, out); return 0; } static int vmac_init_tfm(struct crypto_tfm *tfm) { - struct crypto_cipher *cipher; - struct crypto_instance *inst = (void *)tfm->__crt_alg; + struct crypto_instance *inst = crypto_tfm_alg_instance(tfm); struct crypto_spawn *spawn = crypto_instance_ctx(inst); - struct vmac_ctx_t *ctx = crypto_tfm_ctx(tfm); + struct vmac_tfm_ctx *tctx = crypto_tfm_ctx(tfm); + struct crypto_cipher *cipher; cipher = crypto_spawn_cipher(spawn); if (IS_ERR(cipher)) return PTR_ERR(cipher); - ctx->child = cipher; + tctx->cipher = cipher; return 0; } static void vmac_exit_tfm(struct crypto_tfm *tfm) { - struct vmac_ctx_t *ctx = crypto_tfm_ctx(tfm); - crypto_free_cipher(ctx->child); + struct vmac_tfm_ctx *tctx = crypto_tfm_ctx(tfm); + + crypto_free_cipher(tctx->cipher); } static int vmac_create(struct crypto_template *tmpl, struct rtattr **tb) @@ -674,11 +627,12 @@ static int vmac_create(struct crypto_template *tmpl, struct rtattr **tb) inst->alg.base.cra_blocksize = alg->cra_blocksize; inst->alg.base.cra_alignmask = alg->cra_alignmask; - inst->alg.digestsize = sizeof(vmac_t); - inst->alg.base.cra_ctxsize = sizeof(struct vmac_ctx_t); + inst->alg.base.cra_ctxsize = sizeof(struct vmac_tfm_ctx); inst->alg.base.cra_init = vmac_init_tfm; inst->alg.base.cra_exit = vmac_exit_tfm; + inst->alg.descsize = sizeof(struct vmac_desc_ctx); + inst->alg.digestsize = VMAC_TAG_LEN / 8; inst->alg.init = vmac_init; inst->alg.update = vmac_update; inst->alg.final = vmac_final; diff --git a/include/crypto/vmac.h b/include/crypto/vmac.h deleted file mode 100644 index 6b700c7b2fe1..000000000000 --- a/include/crypto/vmac.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Modified to interface to the Linux kernel - * Copyright (c) 2009, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - */ - -#ifndef __CRYPTO_VMAC_H -#define __CRYPTO_VMAC_H - -/* -------------------------------------------------------------------------- - * VMAC and VHASH Implementation by Ted Krovetz (tdk@acm.org) and Wei Dai. - * This implementation is herby placed in the public domain. - * The authors offers no warranty. Use at your own risk. - * Please send bug reports to the authors. - * Last modified: 17 APR 08, 1700 PDT - * ----------------------------------------------------------------------- */ - -/* - * User definable settings. - */ -#define VMAC_TAG_LEN 64 -#define VMAC_KEY_SIZE 128/* Must be 128, 192 or 256 */ -#define VMAC_KEY_LEN (VMAC_KEY_SIZE/8) -#define VMAC_NHBYTES 128/* Must 2^i for any 3 < i < 13 Standard = 128*/ - -/* - * This implementation uses u32 and u64 as names for unsigned 32- - * and 64-bit integer types. These are defined in C99 stdint.h. The - * following may need adaptation if you are not running a C99 or - * Microsoft C environment. - */ -struct vmac_ctx { - u64 nhkey[(VMAC_NHBYTES/8)+2*(VMAC_TAG_LEN/64-1)]; - u64 polykey[2*VMAC_TAG_LEN/64]; - u64 l3key[2*VMAC_TAG_LEN/64]; - u64 polytmp[2*VMAC_TAG_LEN/64]; - u64 cached_nonce[2]; - u64 cached_aes[2]; - int first_block_processed; -}; - -typedef u64 vmac_t; - -struct vmac_ctx_t { - struct crypto_cipher *child; - struct vmac_ctx __vmac_ctx; - u8 partial[VMAC_NHBYTES]; /* partial block */ - int partial_size; /* size of the partial block */ -}; - -#endif /* __CRYPTO_VMAC_H */ From a55a2512827fde609f4844be33d7e6dce7b2a4cd Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 23 Jul 2018 10:54:57 -0700 Subject: [PATCH 322/519] crypto: blkcipher - fix crash flushing dcache in error path commit 0868def3e4100591e7a1fdbf3eed1439cc8f7ca3 upstream. Like the skcipher_walk case: scatterwalk_done() is only meant to be called after a nonzero number of bytes have been processed, since scatterwalk_pagedone() will flush the dcache of the *previous* page. But in the error case of blkcipher_walk_done(), e.g. if the input wasn't an integer number of blocks, scatterwalk_done() was actually called after advancing 0 bytes. This caused a crash ("BUG: unable to handle kernel paging request") during '!PageSlab(page)' on architectures like arm and arm64 that define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE, provided that the input was page-aligned as in that case walk->offset == 0. Fix it by reorganizing blkcipher_walk_done() to skip the scatterwalk_advance() and scatterwalk_done() if an error has occurred. This bug was found by syzkaller fuzzing. Reproducer, assuming ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE: #include #include #include int main() { struct sockaddr_alg addr = { .salg_type = "skcipher", .salg_name = "ecb(aes-generic)", }; char buffer[4096] __attribute__((aligned(4096))) = { 0 }; int fd; fd = socket(AF_ALG, SOCK_SEQPACKET, 0); bind(fd, (void *)&addr, sizeof(addr)); setsockopt(fd, SOL_ALG, ALG_SET_KEY, buffer, 16); fd = accept(fd, NULL, NULL); write(fd, buffer, 15); read(fd, buffer, 15); } Reported-by: Liu Chao Fixes: 5cde0af2a982 ("[CRYPTO] cipher: Added block cipher type") Cc: # v2.6.19+ Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/blkcipher.c | 54 ++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c index dca7bc87dad9..2d08e59b3212 100644 --- a/crypto/blkcipher.c +++ b/crypto/blkcipher.c @@ -71,19 +71,18 @@ static inline u8 *blkcipher_get_spot(u8 *start, unsigned int len) return max(start, end_page); } -static inline unsigned int blkcipher_done_slow(struct blkcipher_walk *walk, - unsigned int bsize) +static inline void blkcipher_done_slow(struct blkcipher_walk *walk, + unsigned int bsize) { u8 *addr; addr = (u8 *)ALIGN((unsigned long)walk->buffer, walk->alignmask + 1); addr = blkcipher_get_spot(addr, bsize); scatterwalk_copychunks(addr, &walk->out, bsize, 1); - return bsize; } -static inline unsigned int blkcipher_done_fast(struct blkcipher_walk *walk, - unsigned int n) +static inline void blkcipher_done_fast(struct blkcipher_walk *walk, + unsigned int n) { if (walk->flags & BLKCIPHER_WALK_COPY) { blkcipher_map_dst(walk); @@ -97,49 +96,48 @@ static inline unsigned int blkcipher_done_fast(struct blkcipher_walk *walk, scatterwalk_advance(&walk->in, n); scatterwalk_advance(&walk->out, n); - - return n; } int blkcipher_walk_done(struct blkcipher_desc *desc, struct blkcipher_walk *walk, int err) { - unsigned int nbytes = 0; + unsigned int n; /* bytes processed */ + bool more; - if (likely(err >= 0)) { - unsigned int n = walk->nbytes - err; + if (unlikely(err < 0)) + goto finish; - if (likely(!(walk->flags & BLKCIPHER_WALK_SLOW))) - n = blkcipher_done_fast(walk, n); - else if (WARN_ON(err)) { + n = walk->nbytes - err; + walk->total -= n; + more = (walk->total != 0); + + if (likely(!(walk->flags & BLKCIPHER_WALK_SLOW))) { + blkcipher_done_fast(walk, n); + } else { + if (WARN_ON(err)) { + /* unexpected case; didn't process all bytes */ err = -EINVAL; - goto err; - } else - n = blkcipher_done_slow(walk, n); - - nbytes = walk->total - n; - err = 0; + goto finish; + } + blkcipher_done_slow(walk, n); } - scatterwalk_done(&walk->in, 0, nbytes); - scatterwalk_done(&walk->out, 1, nbytes); + scatterwalk_done(&walk->in, 0, more); + scatterwalk_done(&walk->out, 1, more); -err: - walk->total = nbytes; - walk->nbytes = nbytes; - - if (nbytes) { + if (more) { crypto_yield(desc->flags); return blkcipher_walk_next(desc, walk); } - + err = 0; +finish: + walk->nbytes = 0; if (walk->iv != desc->info) memcpy(desc->info, walk->iv, walk->ivsize); if (walk->buffer != walk->page) kfree(walk->buffer); if (walk->page) free_page((unsigned long)walk->page); - return err; } EXPORT_SYMBOL_GPL(blkcipher_walk_done); From 930787c9cdd7179025f10ef45d9957f1ef38880b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 23 Jul 2018 10:54:58 -0700 Subject: [PATCH 323/519] crypto: ablkcipher - fix crash flushing dcache in error path commit 318abdfbe708aaaa652c79fb500e9bd60521f9dc upstream. Like the skcipher_walk and blkcipher_walk cases: scatterwalk_done() is only meant to be called after a nonzero number of bytes have been processed, since scatterwalk_pagedone() will flush the dcache of the *previous* page. But in the error case of ablkcipher_walk_done(), e.g. if the input wasn't an integer number of blocks, scatterwalk_done() was actually called after advancing 0 bytes. This caused a crash ("BUG: unable to handle kernel paging request") during '!PageSlab(page)' on architectures like arm and arm64 that define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE, provided that the input was page-aligned as in that case walk->offset == 0. Fix it by reorganizing ablkcipher_walk_done() to skip the scatterwalk_advance() and scatterwalk_done() if an error has occurred. Reported-by: Liu Chao Fixes: bf06099db18a ("crypto: skcipher - Add ablkcipher_walk interfaces") Cc: # v2.6.35+ Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/ablkcipher.c | 57 +++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 31 deletions(-) diff --git a/crypto/ablkcipher.c b/crypto/ablkcipher.c index e5b5721809e2..149e7a7f04fe 100644 --- a/crypto/ablkcipher.c +++ b/crypto/ablkcipher.c @@ -73,11 +73,9 @@ static inline u8 *ablkcipher_get_spot(u8 *start, unsigned int len) return max(start, end_page); } -static inline unsigned int ablkcipher_done_slow(struct ablkcipher_walk *walk, - unsigned int bsize) +static inline void ablkcipher_done_slow(struct ablkcipher_walk *walk, + unsigned int n) { - unsigned int n = bsize; - for (;;) { unsigned int len_this_page = scatterwalk_pagelen(&walk->out); @@ -89,17 +87,13 @@ static inline unsigned int ablkcipher_done_slow(struct ablkcipher_walk *walk, n -= len_this_page; scatterwalk_start(&walk->out, sg_next(walk->out.sg)); } - - return bsize; } -static inline unsigned int ablkcipher_done_fast(struct ablkcipher_walk *walk, - unsigned int n) +static inline void ablkcipher_done_fast(struct ablkcipher_walk *walk, + unsigned int n) { scatterwalk_advance(&walk->in, n); scatterwalk_advance(&walk->out, n); - - return n; } static int ablkcipher_walk_next(struct ablkcipher_request *req, @@ -109,39 +103,40 @@ int ablkcipher_walk_done(struct ablkcipher_request *req, struct ablkcipher_walk *walk, int err) { struct crypto_tfm *tfm = req->base.tfm; - unsigned int nbytes = 0; + unsigned int n; /* bytes processed */ + bool more; - if (likely(err >= 0)) { - unsigned int n = walk->nbytes - err; + if (unlikely(err < 0)) + goto finish; - if (likely(!(walk->flags & ABLKCIPHER_WALK_SLOW))) - n = ablkcipher_done_fast(walk, n); - else if (WARN_ON(err)) { + n = walk->nbytes - err; + walk->total -= n; + more = (walk->total != 0); + + if (likely(!(walk->flags & ABLKCIPHER_WALK_SLOW))) { + ablkcipher_done_fast(walk, n); + } else { + if (WARN_ON(err)) { + /* unexpected case; didn't process all bytes */ err = -EINVAL; - goto err; - } else - n = ablkcipher_done_slow(walk, n); - - nbytes = walk->total - n; - err = 0; + goto finish; + } + ablkcipher_done_slow(walk, n); } - scatterwalk_done(&walk->in, 0, nbytes); - scatterwalk_done(&walk->out, 1, nbytes); + scatterwalk_done(&walk->in, 0, more); + scatterwalk_done(&walk->out, 1, more); -err: - walk->total = nbytes; - walk->nbytes = nbytes; - - if (nbytes) { + if (more) { crypto_yield(req->base.flags); return ablkcipher_walk_next(req, walk); } - + err = 0; +finish: + walk->nbytes = 0; if (walk->iv != req->info) memcpy(req->info, walk->iv, tfm->crt_ablkcipher.ivsize); kfree(walk->iv_buffer); - return err; } EXPORT_SYMBOL_GPL(ablkcipher_walk_done); From 59e68641add22a2b6847b653f8e823ccb4042d0a Mon Sep 17 00:00:00 2001 From: Thierry Escande Date: Fri, 8 Sep 2017 00:13:08 -0500 Subject: [PATCH 324/519] ASoC: Intel: cht_bsw_max98090_ti: Fix jack initialization commit 3bbda5a38601f7675a214be2044e41d7749e6c7b upstream. If the ts3a227e audio accessory detection hardware is present and its driver probed, the jack needs to be created before enabling jack detection in the ts3a227e driver. With this patch, the jack is instantiated in the max98090 headset init function if the ts3a227e is present. This fixes a null pointer dereference as the jack detection enabling function in the ts3a driver was called before the jack is created. [minor correction to keep error handling on jack creation the same as before by Pierre Bossart] Signed-off-by: Thierry Escande Signed-off-by: Pierre-Louis Bossart Acked-By: Vinod Koul Signed-off-by: Mark Brown Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- sound/soc/intel/boards/cht_bsw_max98090_ti.c | 45 ++++++++++++++------ 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/sound/soc/intel/boards/cht_bsw_max98090_ti.c b/sound/soc/intel/boards/cht_bsw_max98090_ti.c index 4e2fcf188dd1..01a573a063d1 100644 --- a/sound/soc/intel/boards/cht_bsw_max98090_ti.c +++ b/sound/soc/intel/boards/cht_bsw_max98090_ti.c @@ -131,23 +131,19 @@ static int cht_codec_init(struct snd_soc_pcm_runtime *runtime) struct cht_mc_private *ctx = snd_soc_card_get_drvdata(runtime->card); struct snd_soc_jack *jack = &ctx->jack; - /** - * TI supports 4 butons headset detection - * KEY_MEDIA - * KEY_VOICECOMMAND - * KEY_VOLUMEUP - * KEY_VOLUMEDOWN - */ - if (ctx->ts3a227e_present) - jack_type = SND_JACK_HEADPHONE | SND_JACK_MICROPHONE | - SND_JACK_BTN_0 | SND_JACK_BTN_1 | - SND_JACK_BTN_2 | SND_JACK_BTN_3; - else - jack_type = SND_JACK_HEADPHONE | SND_JACK_MICROPHONE; + if (ctx->ts3a227e_present) { + /* + * The jack has already been created in the + * cht_max98090_headset_init() function. + */ + snd_soc_jack_notifier_register(jack, &cht_jack_nb); + return 0; + } + + jack_type = SND_JACK_HEADPHONE | SND_JACK_MICROPHONE; ret = snd_soc_card_jack_new(runtime->card, "Headset Jack", jack_type, jack, NULL, 0); - if (ret) { dev_err(runtime->dev, "Headset Jack creation failed %d\n", ret); return ret; @@ -203,6 +199,27 @@ static int cht_max98090_headset_init(struct snd_soc_component *component) { struct snd_soc_card *card = component->card; struct cht_mc_private *ctx = snd_soc_card_get_drvdata(card); + struct snd_soc_jack *jack = &ctx->jack; + int jack_type; + int ret; + + /* + * TI supports 4 butons headset detection + * KEY_MEDIA + * KEY_VOICECOMMAND + * KEY_VOLUMEUP + * KEY_VOLUMEDOWN + */ + jack_type = SND_JACK_HEADPHONE | SND_JACK_MICROPHONE | + SND_JACK_BTN_0 | SND_JACK_BTN_1 | + SND_JACK_BTN_2 | SND_JACK_BTN_3; + + ret = snd_soc_card_jack_new(card, "Headset Jack", jack_type, + jack, NULL, 0); + if (ret) { + dev_err(card->dev, "Headset Jack creation failed %d\n", ret); + return ret; + } return ts3a227e_enable_jack_detect(component, &ctx->jack); } From 17c1e0b1f6a161cc4f533d4869ff574273dbfe8d Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Tue, 31 Jul 2018 15:02:13 -0700 Subject: [PATCH 325/519] Bluetooth: hidp: buffer overflow in hidp_process_report commit 7992c18810e568b95c869b227137a2215702a805 upstream. CVE-2018-9363 The buffer length is unsigned at all layers, but gets cast to int and checked in hidp_process_report and can lead to a buffer overflow. Switch len parameter to unsigned int to resolve issue. This affects 3.18 and newer kernels. Signed-off-by: Mark Salyzyn Fixes: a4b1b5877b514b276f0f31efe02388a9c2836728 ("HID: Bluetooth: hidp: make sure input buffers are big enough") Cc: Marcel Holtmann Cc: Johan Hedberg Cc: "David S. Miller" Cc: Kees Cook Cc: Benjamin Tissoires Cc: linux-bluetooth@vger.kernel.org Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: security@kernel.org Cc: kernel-team@android.com Acked-by: Kees Cook Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hidp/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 1fc076420d1e..1811f8e7ddf4 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -431,8 +431,8 @@ static void hidp_del_timer(struct hidp_session *session) del_timer(&session->timer); } -static void hidp_process_report(struct hidp_session *session, - int type, const u8 *data, int len, int intr) +static void hidp_process_report(struct hidp_session *session, int type, + const u8 *data, unsigned int len, int intr) { if (len > HID_MAX_BUFFER_SIZE) len = HID_MAX_BUFFER_SIZE; From 29f475cbff9b56c83821344b02d5152847839bc2 Mon Sep 17 00:00:00 2001 From: Chintan Pandya Date: Wed, 27 Jun 2018 08:13:47 -0600 Subject: [PATCH 326/519] ioremap: Update pgtable free interfaces with addr commit 785a19f9d1dd8a4ab2d0633be4656653bd3de1fc upstream. The following kernel panic was observed on ARM64 platform due to a stale TLB entry. 1. ioremap with 4K size, a valid pte page table is set. 2. iounmap it, its pte entry is set to 0. 3. ioremap the same address with 2M size, update its pmd entry with a new value. 4. CPU may hit an exception because the old pmd entry is still in TLB, which leads to a kernel panic. Commit b6bdb7517c3d ("mm/vmalloc: add interfaces to free unmapped page table") has addressed this panic by falling to pte mappings in the above case on ARM64. To support pmd mappings in all cases, TLB purge needs to be performed in this case on ARM64. Add a new arg, 'addr', to pud_free_pmd_page() and pmd_free_pte_page() so that TLB purge can be added later in seprate patches. [toshi.kani@hpe.com: merge changes, rewrite patch description] Fixes: 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces") Signed-off-by: Chintan Pandya Signed-off-by: Toshi Kani Signed-off-by: Thomas Gleixner Cc: mhocko@suse.com Cc: akpm@linux-foundation.org Cc: hpa@zytor.com Cc: linux-mm@kvack.org Cc: linux-arm-kernel@lists.infradead.org Cc: Will Deacon Cc: Joerg Roedel Cc: stable@vger.kernel.org Cc: Andrew Morton Cc: Michal Hocko Cc: "H. Peter Anvin" Cc: Link: https://lkml.kernel.org/r/20180627141348.21777-3-toshi.kani@hpe.com Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/mmu.c | 4 ++-- arch/x86/mm/pgtable.c | 12 +++++++----- include/asm-generic/pgtable.h | 8 ++++---- lib/ioremap.c | 4 ++-- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 51ac84e0812d..e9d96b028766 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -699,12 +699,12 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys) } #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP -int pud_free_pmd_page(pud_t *pud) +int pud_free_pmd_page(pud_t *pud, unsigned long addr) { return pud_none(*pud); } -int pmd_free_pte_page(pmd_t *pmd) +int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) { return pmd_none(*pmd); } diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 8e0378a00d50..8f5ef9d39555 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -680,11 +680,12 @@ int pmd_clear_huge(pmd_t *pmd) /** * pud_free_pmd_page - Clear pud entry and free pmd page. * @pud: Pointer to a PUD. + * @addr: Virtual address associated with pud. * * Context: The pud range has been unmaped and TLB purged. * Return: 1 if clearing the entry succeeded. 0 otherwise. */ -int pud_free_pmd_page(pud_t *pud) +int pud_free_pmd_page(pud_t *pud, unsigned long addr) { pmd_t *pmd; int i; @@ -695,7 +696,7 @@ int pud_free_pmd_page(pud_t *pud) pmd = (pmd_t *)pud_page_vaddr(*pud); for (i = 0; i < PTRS_PER_PMD; i++) - if (!pmd_free_pte_page(&pmd[i])) + if (!pmd_free_pte_page(&pmd[i], addr + (i * PMD_SIZE))) return 0; pud_clear(pud); @@ -707,11 +708,12 @@ int pud_free_pmd_page(pud_t *pud) /** * pmd_free_pte_page - Clear pmd entry and free pte page. * @pmd: Pointer to a PMD. + * @addr: Virtual address associated with pmd. * * Context: The pmd range has been unmaped and TLB purged. * Return: 1 if clearing the entry succeeded. 0 otherwise. */ -int pmd_free_pte_page(pmd_t *pmd) +int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) { pte_t *pte; @@ -727,7 +729,7 @@ int pmd_free_pte_page(pmd_t *pmd) #else /* !CONFIG_X86_64 */ -int pud_free_pmd_page(pud_t *pud) +int pud_free_pmd_page(pud_t *pud, unsigned long addr) { return pud_none(*pud); } @@ -736,7 +738,7 @@ int pud_free_pmd_page(pud_t *pud) * Disable free page handling on x86-PAE. This assures that ioremap() * does not update sync'd pmd entries. See vmalloc_sync_one(). */ -int pmd_free_pte_page(pmd_t *pmd) +int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) { return pmd_none(*pmd); } diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index dabecb661264..53a47d75cc43 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -770,8 +770,8 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot); int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot); int pud_clear_huge(pud_t *pud); int pmd_clear_huge(pmd_t *pmd); -int pud_free_pmd_page(pud_t *pud); -int pmd_free_pte_page(pmd_t *pmd); +int pud_free_pmd_page(pud_t *pud, unsigned long addr); +int pmd_free_pte_page(pmd_t *pmd, unsigned long addr); #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) { @@ -789,11 +789,11 @@ static inline int pmd_clear_huge(pmd_t *pmd) { return 0; } -static inline int pud_free_pmd_page(pud_t *pud) +static inline int pud_free_pmd_page(pud_t *pud, unsigned long addr) { return 0; } -static inline int pmd_free_pte_page(pmd_t *pmd) +static inline int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) { return 0; } diff --git a/lib/ioremap.c b/lib/ioremap.c index 5323b59ca393..b9462037868d 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -84,7 +84,7 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, if (ioremap_pmd_enabled() && ((next - addr) == PMD_SIZE) && IS_ALIGNED(phys_addr + addr, PMD_SIZE) && - pmd_free_pte_page(pmd)) { + pmd_free_pte_page(pmd, addr)) { if (pmd_set_huge(pmd, phys_addr + addr, prot)) continue; } @@ -111,7 +111,7 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, if (ioremap_pud_enabled() && ((next - addr) == PUD_SIZE) && IS_ALIGNED(phys_addr + addr, PUD_SIZE) && - pud_free_pmd_page(pud)) { + pud_free_pmd_page(pud, addr)) { if (pud_set_huge(pud, phys_addr + addr, prot)) continue; } From 5b9b4a8cca9c9fd9a035edbbe6eea3d1cf687981 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Wed, 27 Jun 2018 08:13:48 -0600 Subject: [PATCH 327/519] x86/mm: Add TLB purge to free pmd/pte page interfaces commit 5e0fb5df2ee871b841f96f9cb6a7f2784e96aa4e upstream. ioremap() calls pud_free_pmd_page() / pmd_free_pte_page() when it creates a pud / pmd map. The following preconditions are met at their entry. - All pte entries for a target pud/pmd address range have been cleared. - System-wide TLB purges have been peformed for a target pud/pmd address range. The preconditions assure that there is no stale TLB entry for the range. Speculation may not cache TLB entries since it requires all levels of page entries, including ptes, to have P & A-bits set for an associated address. However, speculation may cache pud/pmd entries (paging-structure caches) when they have P-bit set. Add a system-wide TLB purge (INVLPG) to a single page after clearing pud/pmd entry's P-bit. SDM 4.10.4.1, Operation that Invalidate TLBs and Paging-Structure Caches, states that: INVLPG invalidates all paging-structure caches associated with the current PCID regardless of the liner addresses to which they correspond. Fixes: 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces") Signed-off-by: Toshi Kani Signed-off-by: Thomas Gleixner Cc: mhocko@suse.com Cc: akpm@linux-foundation.org Cc: hpa@zytor.com Cc: cpandya@codeaurora.org Cc: linux-mm@kvack.org Cc: linux-arm-kernel@lists.infradead.org Cc: Joerg Roedel Cc: stable@vger.kernel.org Cc: Andrew Morton Cc: Michal Hocko Cc: "H. Peter Anvin" Cc: Link: https://lkml.kernel.org/r/20180627141348.21777-4-toshi.kani@hpe.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/pgtable.c | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 8f5ef9d39555..55c7446311a7 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -682,24 +682,44 @@ int pmd_clear_huge(pmd_t *pmd) * @pud: Pointer to a PUD. * @addr: Virtual address associated with pud. * - * Context: The pud range has been unmaped and TLB purged. + * Context: The pud range has been unmapped and TLB purged. * Return: 1 if clearing the entry succeeded. 0 otherwise. + * + * NOTE: Callers must allow a single page allocation. */ int pud_free_pmd_page(pud_t *pud, unsigned long addr) { - pmd_t *pmd; + pmd_t *pmd, *pmd_sv; + pte_t *pte; int i; if (pud_none(*pud)) return 1; pmd = (pmd_t *)pud_page_vaddr(*pud); + pmd_sv = (pmd_t *)__get_free_page(GFP_KERNEL); + if (!pmd_sv) + return 0; - for (i = 0; i < PTRS_PER_PMD; i++) - if (!pmd_free_pte_page(&pmd[i], addr + (i * PMD_SIZE))) - return 0; + for (i = 0; i < PTRS_PER_PMD; i++) { + pmd_sv[i] = pmd[i]; + if (!pmd_none(pmd[i])) + pmd_clear(&pmd[i]); + } pud_clear(pud); + + /* INVLPG to clear all paging-structure caches */ + flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1); + + for (i = 0; i < PTRS_PER_PMD; i++) { + if (!pmd_none(pmd_sv[i])) { + pte = (pte_t *)pmd_page_vaddr(pmd_sv[i]); + free_page((unsigned long)pte); + } + } + + free_page((unsigned long)pmd_sv); free_page((unsigned long)pmd); return 1; @@ -710,7 +730,7 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr) * @pmd: Pointer to a PMD. * @addr: Virtual address associated with pmd. * - * Context: The pmd range has been unmaped and TLB purged. + * Context: The pmd range has been unmapped and TLB purged. * Return: 1 if clearing the entry succeeded. 0 otherwise. */ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) @@ -722,6 +742,10 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) pte = (pte_t *)pmd_page_vaddr(*pmd); pmd_clear(pmd); + + /* INVLPG to clear all paging-structure caches */ + flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1); + free_page((unsigned long)pte); return 1; From 45cf1802a1057650768430cf3168ff7a02163338 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 17 Aug 2018 20:56:45 +0200 Subject: [PATCH 328/519] Linux 4.4.149 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index cacb45c3173a..e7c46ece5f27 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 148 +SUBLEVEL = 149 EXTRAVERSION = NAME = Blurry Fish Butt From 4cdedeefa38f45299b18ae692426d5baaff6b785 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 17 Aug 2018 10:27:36 -0700 Subject: [PATCH 329/519] x86/speculation/l1tf: Exempt zeroed PTEs from inversion commit f19f5c49bbc3ffcc9126cc245fc1b24cc29f4a37 upstream. It turns out that we should *not* invert all not-present mappings, because the all zeroes case is obviously special. clear_page() does not undergo the XOR logic to invert the address bits, i.e. PTE, PMD and PUD entries that have not been individually written will have val=0 and so will trigger __pte_needs_invert(). As a result, {pte,pmd,pud}_pfn() will return the wrong PFN value, i.e. all ones (adjusted by the max PFN mask) instead of zero. A zeroed entry is ok because the page at physical address 0 is reserved early in boot specifically to mitigate L1TF, so explicitly exempt them from the inversion when reading the PFN. Manifested as an unexpected mprotect(..., PROT_NONE) failure when called on a VMA that has VM_PFNMAP and was mmap'd to as something other than PROT_NONE but never used. mprotect() sends the PROT_NONE request down prot_none_walk(), which walks the PTEs to check the PFNs. prot_none_pte_entry() gets the bogus PFN from pte_pfn() and returns -EACCES because it thinks mprotect() is trying to adjust a high MMIO address. [ This is a very modified version of Sean's original patch, but all credit goes to Sean for doing this and also pointing out that sometimes the __pte_needs_invert() function only gets the protection bits, not the full eventual pte. But zero remains special even in just protection bits, so that's ok. - Linus ] Fixes: f22cc87f6c1f ("x86/speculation/l1tf: Invert all not present mappings") Signed-off-by: Sean Christopherson Acked-by: Andi Kleen Cc: Thomas Gleixner Cc: Josh Poimboeuf Cc: Michal Hocko Cc: Vlastimil Babka Cc: Dave Hansen Cc: Greg Kroah-Hartman Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pgtable-invert.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/pgtable-invert.h b/arch/x86/include/asm/pgtable-invert.h index 44b1203ece12..a0c1525f1b6f 100644 --- a/arch/x86/include/asm/pgtable-invert.h +++ b/arch/x86/include/asm/pgtable-invert.h @@ -4,9 +4,18 @@ #ifndef __ASSEMBLY__ +/* + * A clear pte value is special, and doesn't get inverted. + * + * Note that even users that only pass a pgprot_t (rather + * than a full pte) won't trigger the special zero case, + * because even PAGE_NONE has _PAGE_PROTNONE | _PAGE_ACCESSED + * set. So the all zero case really is limited to just the + * cleared page table entry case. + */ static inline bool __pte_needs_invert(u64 val) { - return !(val & _PAGE_PRESENT); + return val && !(val & _PAGE_PRESENT); } /* Get a mask to xor with the page table entry to get the correct pfn. */ From 7dc18ebc3101229d5238a2dc740804cd4836b383 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 18 Aug 2018 10:45:38 +0200 Subject: [PATCH 330/519] Linux 4.4.150 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e7c46ece5f27..7789195c6a59 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 149 +SUBLEVEL = 150 EXTRAVERSION = NAME = Blurry Fish Butt From f35e16c5977914d8c2e77277d554268b34bb4942 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Tue, 7 Aug 2018 20:03:57 +0300 Subject: [PATCH 331/519] dccp: fix undefined behavior with 'cwnd' shift in ccid2_cwnd_restart() [ Upstream commit 61ef4b07fcdc30535889990cf4229766502561cf ] The shift of 'cwnd' with '(now - hc->tx_lsndtime) / hc->tx_rto' value can lead to undefined behavior [1]. In order to fix this use a gradual shift of the window with a 'while' loop, similar to what tcp_cwnd_restart() is doing. When comparing delta and RTO there is a minor difference between TCP and DCCP, the last one also invokes dccp_cwnd_restart() and reduces 'cwnd' if delta equals RTO. That case is preserved in this change. [1]: [40850.963623] UBSAN: Undefined behaviour in net/dccp/ccids/ccid2.c:237:7 [40851.043858] shift exponent 67 is too large for 32-bit type 'unsigned int' [40851.127163] CPU: 3 PID: 15940 Comm: netstress Tainted: G W E 4.18.0-rc7.x86_64 #1 ... [40851.377176] Call Trace: [40851.408503] dump_stack+0xf1/0x17b [40851.451331] ? show_regs_print_info+0x5/0x5 [40851.503555] ubsan_epilogue+0x9/0x7c [40851.548363] __ubsan_handle_shift_out_of_bounds+0x25b/0x2b4 [40851.617109] ? __ubsan_handle_load_invalid_value+0x18f/0x18f [40851.686796] ? xfrm4_output_finish+0x80/0x80 [40851.739827] ? lock_downgrade+0x6d0/0x6d0 [40851.789744] ? xfrm4_prepare_output+0x160/0x160 [40851.845912] ? ip_queue_xmit+0x810/0x1db0 [40851.895845] ? ccid2_hc_tx_packet_sent+0xd36/0x10a0 [dccp] [40851.963530] ccid2_hc_tx_packet_sent+0xd36/0x10a0 [dccp] [40852.029063] dccp_xmit_packet+0x1d3/0x720 [dccp] [40852.086254] dccp_write_xmit+0x116/0x1d0 [dccp] [40852.142412] dccp_sendmsg+0x428/0xb20 [dccp] [40852.195454] ? inet_dccp_listen+0x200/0x200 [dccp] [40852.254833] ? sched_clock+0x5/0x10 [40852.298508] ? sched_clock+0x5/0x10 [40852.342194] ? inet_create+0xdf0/0xdf0 [40852.388988] sock_sendmsg+0xd9/0x160 ... Fixes: 113ced1f52e5 ("dccp ccid-2: Perform congestion-window validation") Signed-off-by: Alexey Kodanev Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dccp/ccids/ccid2.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 86a2ed0fb219..161dfcf86126 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -228,14 +228,16 @@ static void ccid2_cwnd_restart(struct sock *sk, const u32 now) struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); u32 cwnd = hc->tx_cwnd, restart_cwnd, iwnd = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache); + s32 delta = now - hc->tx_lsndtime; hc->tx_ssthresh = max(hc->tx_ssthresh, (cwnd >> 1) + (cwnd >> 2)); /* don't reduce cwnd below the initial window (IW) */ restart_cwnd = min(cwnd, iwnd); - cwnd >>= (now - hc->tx_lsndtime) / hc->tx_rto; - hc->tx_cwnd = max(cwnd, restart_cwnd); + while ((delta -= hc->tx_rto) >= 0 && cwnd > restart_cwnd) + cwnd >>= 1; + hc->tx_cwnd = max(cwnd, restart_cwnd); hc->tx_cwnd_stamp = now; hc->tx_cwnd_used = 0; From 4aef9b0fffd2295b1c523ebf43ca6b46e9cc8ffa Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 10 Aug 2018 11:14:56 -0700 Subject: [PATCH 332/519] l2tp: use sk_dst_check() to avoid race on sk->sk_dst_cache [ Upstream commit 6d37fa49da1e8db8fb1995be22ac837ca41ac8a8 ] In l2tp code, if it is a L2TP_UDP_ENCAP tunnel, tunnel->sk points to a UDP socket. User could call sendmsg() on both this tunnel and the UDP socket itself concurrently. As l2tp_xmit_skb() holds socket lock and call __sk_dst_check() to refresh sk->sk_dst_cache, while udpv6_sendmsg() is lockless and call sk_dst_check() to refresh sk->sk_dst_cache, there could be a race and cause the dst cache to be freed multiple times. So we fix l2tp side code to always call sk_dst_check() to garantee xchg() is called when refreshing sk->sk_dst_cache to avoid race conditions. Syzkaller reported stack trace: BUG: KASAN: use-after-free in atomic_read include/asm-generic/atomic-instrumented.h:21 [inline] BUG: KASAN: use-after-free in atomic_fetch_add_unless include/linux/atomic.h:575 [inline] BUG: KASAN: use-after-free in atomic_add_unless include/linux/atomic.h:597 [inline] BUG: KASAN: use-after-free in dst_hold_safe include/net/dst.h:308 [inline] BUG: KASAN: use-after-free in ip6_hold_safe+0xe6/0x670 net/ipv6/route.c:1029 Read of size 4 at addr ffff8801aea9a880 by task syz-executor129/4829 CPU: 0 PID: 4829 Comm: syz-executor129 Not tainted 4.18.0-rc7-next-20180802+ #30 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113 print_address_description+0x6c/0x20b mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report.cold.7+0x242/0x30d mm/kasan/report.c:412 check_memory_region_inline mm/kasan/kasan.c:260 [inline] check_memory_region+0x13e/0x1b0 mm/kasan/kasan.c:267 kasan_check_read+0x11/0x20 mm/kasan/kasan.c:272 atomic_read include/asm-generic/atomic-instrumented.h:21 [inline] atomic_fetch_add_unless include/linux/atomic.h:575 [inline] atomic_add_unless include/linux/atomic.h:597 [inline] dst_hold_safe include/net/dst.h:308 [inline] ip6_hold_safe+0xe6/0x670 net/ipv6/route.c:1029 rt6_get_pcpu_route net/ipv6/route.c:1249 [inline] ip6_pol_route+0x354/0xd20 net/ipv6/route.c:1922 ip6_pol_route_output+0x54/0x70 net/ipv6/route.c:2098 fib6_rule_lookup+0x283/0x890 net/ipv6/fib6_rules.c:122 ip6_route_output_flags+0x2c5/0x350 net/ipv6/route.c:2126 ip6_dst_lookup_tail+0x1278/0x1da0 net/ipv6/ip6_output.c:978 ip6_dst_lookup_flow+0xc8/0x270 net/ipv6/ip6_output.c:1079 ip6_sk_dst_lookup_flow+0x5ed/0xc50 net/ipv6/ip6_output.c:1117 udpv6_sendmsg+0x2163/0x36b0 net/ipv6/udp.c:1354 inet_sendmsg+0x1a1/0x690 net/ipv4/af_inet.c:798 sock_sendmsg_nosec net/socket.c:622 [inline] sock_sendmsg+0xd5/0x120 net/socket.c:632 ___sys_sendmsg+0x51d/0x930 net/socket.c:2115 __sys_sendmmsg+0x240/0x6f0 net/socket.c:2210 __do_sys_sendmmsg net/socket.c:2239 [inline] __se_sys_sendmmsg net/socket.c:2236 [inline] __x64_sys_sendmmsg+0x9d/0x100 net/socket.c:2236 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x446a29 Code: e8 ac b8 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 eb 08 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f4de5532db8 EFLAGS: 00000246 ORIG_RAX: 0000000000000133 RAX: ffffffffffffffda RBX: 00000000006dcc38 RCX: 0000000000446a29 RDX: 00000000000000b8 RSI: 0000000020001b00 RDI: 0000000000000003 RBP: 00000000006dcc30 R08: 00007f4de5533700 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000006dcc3c R13: 00007ffe2b830fdf R14: 00007f4de55339c0 R15: 0000000000000001 Fixes: 71b1391a4128 ("l2tp: ensure sk->dst is still valid") Reported-by: syzbot+05f840f3b04f211bad55@syzkaller.appspotmail.com Signed-off-by: Wei Wang Signed-off-by: Martin KaFai Lau Cc: Guillaume Nault Cc: David Ahern Cc: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 92df832a1896..591d18785285 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1145,7 +1145,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len /* Get routing info from the tunnel socket */ skb_dst_drop(skb); - skb_dst_set(skb, dst_clone(__sk_dst_check(sk, 0))); + skb_dst_set(skb, sk_dst_check(sk, 0)); inet = inet_sk(sk); fl = &inet->cork.fl; From 813fb06fe60d0a56a5481fb81793142fc00bf4bd Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 7 Aug 2018 12:41:38 -0700 Subject: [PATCH 333/519] llc: use refcount_inc_not_zero() for llc_sap_find() [ Upstream commit 0dcb82254d65f72333aa50ad626d1e9665ad093b ] llc_sap_put() decreases the refcnt before deleting sap from the global list. Therefore, there is a chance llc_sap_find() could find a sap with zero refcnt in this global list. Close this race condition by checking if refcnt is zero or not in llc_sap_find(), if it is zero then it is being removed so we can just treat it as gone. Reported-by: Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/llc.h | 5 +++++ net/llc/llc_core.c | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/include/net/llc.h b/include/net/llc.h index e8e61d4fb458..82d989995d18 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -116,6 +116,11 @@ static inline void llc_sap_hold(struct llc_sap *sap) atomic_inc(&sap->refcnt); } +static inline bool llc_sap_hold_safe(struct llc_sap *sap) +{ + return atomic_inc_not_zero(&sap->refcnt); +} + void llc_sap_close(struct llc_sap *sap); static inline void llc_sap_put(struct llc_sap *sap) diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index 842851cef698..e896a2c53b12 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -73,8 +73,8 @@ struct llc_sap *llc_sap_find(unsigned char sap_value) rcu_read_lock_bh(); sap = __llc_sap_find(sap_value); - if (sap) - llc_sap_hold(sap); + if (!sap || !llc_sap_hold_safe(sap)) + sap = NULL; rcu_read_unlock_bh(); return sap; } From 0adfdb9af8ec7cc0c8a35dfe8a20d6f8d27ddd08 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 13 Aug 2018 18:44:04 +0800 Subject: [PATCH 334/519] net_sched: Fix missing res info when create new tc_index filter [ Upstream commit 008369dcc5f7bfba526c98054f8525322acf0ea3 ] Li Shuang reported the following warn: [ 733.484610] WARNING: CPU: 6 PID: 21123 at net/sched/sch_cbq.c:1418 cbq_destroy_class+0x5d/0x70 [sch_cbq] [ 733.495190] Modules linked in: sch_cbq cls_tcindex sch_dsmark rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat l [ 733.574155] syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm igb ixgbe ahci libahci i2c_algo_bit libata i40e i2c_core dca mdio megaraid_sas dm_mirror dm_region_hash dm_log dm_mod [ 733.592500] CPU: 6 PID: 21123 Comm: tc Not tainted 4.18.0-rc8.latest+ #131 [ 733.600169] Hardware name: Dell Inc. PowerEdge R730/0WCJNT, BIOS 2.1.5 04/11/2016 [ 733.608518] RIP: 0010:cbq_destroy_class+0x5d/0x70 [sch_cbq] [ 733.614734] Code: e7 d9 d2 48 8b 7b 48 e8 61 05 da d2 48 8d bb f8 00 00 00 e8 75 ae d5 d2 48 39 eb 74 0a 48 89 df 5b 5d e9 16 6c 94 d2 5b 5d c3 <0f> 0b eb b6 0f 1f 44 00 00 66 2e 0f 1f 84 [ 733.635798] RSP: 0018:ffffbfbb066bb9d8 EFLAGS: 00010202 [ 733.641627] RAX: 0000000000000001 RBX: ffff9cdd17392800 RCX: 000000008010000f [ 733.649588] RDX: ffff9cdd1df547e0 RSI: ffff9cdd17392800 RDI: ffff9cdd0f84c800 [ 733.657547] RBP: ffff9cdd0f84c800 R08: 0000000000000001 R09: 0000000000000000 [ 733.665508] R10: ffff9cdd0f84d000 R11: 0000000000000001 R12: 0000000000000001 [ 733.673469] R13: 0000000000000000 R14: 0000000000000001 R15: ffff9cdd17392200 [ 733.681430] FS: 00007f911890a740(0000) GS:ffff9cdd1f8c0000(0000) knlGS:0000000000000000 [ 733.690456] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 733.696864] CR2: 0000000000b5544c CR3: 0000000859374002 CR4: 00000000001606e0 [ 733.704826] Call Trace: [ 733.707554] cbq_destroy+0xa1/0xd0 [sch_cbq] [ 733.712318] qdisc_destroy+0x62/0x130 [ 733.716401] dsmark_destroy+0x2a/0x70 [sch_dsmark] [ 733.721745] qdisc_destroy+0x62/0x130 [ 733.725829] qdisc_graft+0x3ba/0x470 [ 733.729817] tc_get_qdisc+0x2a6/0x2c0 [ 733.733901] ? cred_has_capability+0x7d/0x130 [ 733.738761] rtnetlink_rcv_msg+0x263/0x2d0 [ 733.743330] ? rtnl_calcit.isra.30+0x110/0x110 [ 733.748287] netlink_rcv_skb+0x4d/0x130 [ 733.752576] netlink_unicast+0x1a3/0x250 [ 733.756949] netlink_sendmsg+0x2ae/0x3a0 [ 733.761324] sock_sendmsg+0x36/0x40 [ 733.765213] ___sys_sendmsg+0x26f/0x2d0 [ 733.769493] ? handle_pte_fault+0x586/0xdf0 [ 733.774158] ? __handle_mm_fault+0x389/0x500 [ 733.778919] ? __sys_sendmsg+0x5e/0xa0 [ 733.783099] __sys_sendmsg+0x5e/0xa0 [ 733.787087] do_syscall_64+0x5b/0x180 [ 733.791171] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 733.796805] RIP: 0033:0x7f9117f23f10 [ 733.800791] Code: c3 48 8b 05 82 6f 2c 00 f7 db 64 89 18 48 83 cb ff eb dd 0f 1f 80 00 00 00 00 83 3d 8d d0 2c 00 00 75 10 b8 2e 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 [ 733.821873] RSP: 002b:00007ffe96818398 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 733.830319] RAX: ffffffffffffffda RBX: 000000005b71244c RCX: 00007f9117f23f10 [ 733.838280] RDX: 0000000000000000 RSI: 00007ffe968183e0 RDI: 0000000000000003 [ 733.846241] RBP: 00007ffe968183e0 R08: 000000000000ffff R09: 0000000000000003 [ 733.854202] R10: 00007ffe96817e20 R11: 0000000000000246 R12: 0000000000000000 [ 733.862161] R13: 0000000000662ee0 R14: 0000000000000000 R15: 0000000000000000 [ 733.870121] ---[ end trace 28edd4aad712ddca ]--- This is because we didn't update f->result.res when create new filter. Then in tcindex_delete() -> tcf_unbind_filter(), we will failed to find out the res and unbind filter, which will trigger the WARN_ON() in cbq_destroy_class(). Fix it by updating f->result.res when create new filter. Fixes: 6e0565697a106 ("net_sched: fix another crash in cls_tcindex") Reported-by: Li Shuang Signed-off-by: Hangbin Liu Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/cls_tcindex.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 403746b20263..e3b48ddadb45 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -398,6 +398,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct tcindex_filter *nfp; struct tcindex_filter __rcu **fp; + f->result.res = r->res; tcf_exts_change(tp, &f->result.exts, &r->exts); fp = cp->h + (handle % cp->hash); From 62209d1f272c2b134765301f279fda7364b07ddd Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Aug 2018 11:06:02 -0700 Subject: [PATCH 335/519] vsock: split dwork to avoid reinitializations [ Upstream commit 455f05ecd2b219e9a216050796d30c830d9bc393 ] syzbot reported that we reinitialize an active delayed work in vsock_stream_connect(): ODEBUG: init active (active state 0) object type: timer_list hint: delayed_work_timer_fn+0x0/0x90 kernel/workqueue.c:1414 WARNING: CPU: 1 PID: 11518 at lib/debugobjects.c:329 debug_print_object+0x16a/0x210 lib/debugobjects.c:326 The pattern is apparently wrong, we should only initialize the dealyed work once and could repeatly schedule it. So we have to move out the initializations to allocation side. And to avoid confusion, we can split the shared dwork into two, instead of re-using the same one. Fixes: d021c344051a ("VSOCK: Introduce VM Sockets") Reported-by: Cc: Andy king Cc: Stefan Hajnoczi Cc: Jorgen Hansen Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/af_vsock.h | 4 ++-- net/vmw_vsock/af_vsock.c | 15 ++++++++------- net/vmw_vsock/vmci_transport.c | 3 +-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index e9eb2d6791b3..f7a35fcaaaf6 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -62,7 +62,8 @@ struct vsock_sock { struct list_head pending_links; struct list_head accept_queue; bool rejected; - struct delayed_work dwork; + struct delayed_work connect_work; + struct delayed_work pending_work; u32 peer_shutdown; bool sent_request; bool ignore_connecting_rst; @@ -73,7 +74,6 @@ struct vsock_sock { s64 vsock_stream_has_data(struct vsock_sock *vsk); s64 vsock_stream_has_space(struct vsock_sock *vsk); -void vsock_pending_work(struct work_struct *work); struct sock *__vsock_create(struct net *net, struct socket *sock, struct sock *parent, diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 60324f7c72bd..7f1d166ce612 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -430,14 +430,14 @@ static int vsock_send_shutdown(struct sock *sk, int mode) return transport->shutdown(vsock_sk(sk), mode); } -void vsock_pending_work(struct work_struct *work) +static void vsock_pending_work(struct work_struct *work) { struct sock *sk; struct sock *listener; struct vsock_sock *vsk; bool cleanup; - vsk = container_of(work, struct vsock_sock, dwork.work); + vsk = container_of(work, struct vsock_sock, pending_work.work); sk = sk_vsock(vsk); listener = vsk->listener; cleanup = true; @@ -477,7 +477,6 @@ void vsock_pending_work(struct work_struct *work) sock_put(sk); sock_put(listener); } -EXPORT_SYMBOL_GPL(vsock_pending_work); /**** SOCKET OPERATIONS ****/ @@ -576,6 +575,8 @@ static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr) return retval; } +static void vsock_connect_timeout(struct work_struct *work); + struct sock *__vsock_create(struct net *net, struct socket *sock, struct sock *parent, @@ -618,6 +619,8 @@ struct sock *__vsock_create(struct net *net, vsk->sent_request = false; vsk->ignore_connecting_rst = false; vsk->peer_shutdown = 0; + INIT_DELAYED_WORK(&vsk->connect_work, vsock_connect_timeout); + INIT_DELAYED_WORK(&vsk->pending_work, vsock_pending_work); psk = parent ? vsock_sk(parent) : NULL; if (parent) { @@ -1094,7 +1097,7 @@ static void vsock_connect_timeout(struct work_struct *work) struct sock *sk; struct vsock_sock *vsk; - vsk = container_of(work, struct vsock_sock, dwork.work); + vsk = container_of(work, struct vsock_sock, connect_work.work); sk = sk_vsock(vsk); lock_sock(sk); @@ -1195,9 +1198,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, * timeout fires. */ sock_hold(sk); - INIT_DELAYED_WORK(&vsk->dwork, - vsock_connect_timeout); - schedule_delayed_work(&vsk->dwork, timeout); + schedule_delayed_work(&vsk->connect_work, timeout); /* Skip ahead to preserve error code set above. */ goto out_wait; diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 662bdd20a748..589c8b9908a5 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1099,8 +1099,7 @@ static int vmci_transport_recv_listen(struct sock *sk, vpending->listener = sk; sock_hold(sk); sock_hold(pending); - INIT_DELAYED_WORK(&vpending->dwork, vsock_pending_work); - schedule_delayed_work(&vpending->dwork, HZ); + schedule_delayed_work(&vpending->pending_work, HZ); out: return err; From 51f6a134cfd1731baccecd49b8221833f782e4c9 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 13 Aug 2018 18:44:03 +0800 Subject: [PATCH 336/519] net_sched: fix NULL pointer dereference when delete tcindex filter [ Upstream commit 2df8bee5654bb2b7312662ca6810d4dc16b0b67f ] Li Shuang reported the following crash: [ 71.267724] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004 [ 71.276456] PGD 800000085d9bd067 P4D 800000085d9bd067 PUD 859a0b067 PMD 0 [ 71.284127] Oops: 0000 [#1] SMP PTI [ 71.288015] CPU: 12 PID: 2386 Comm: tc Not tainted 4.18.0-rc8.latest+ #131 [ 71.295686] Hardware name: Dell Inc. PowerEdge R730/0WCJNT, BIOS 2.1.5 04/11/2016 [ 71.304037] RIP: 0010:tcindex_delete+0x72/0x280 [cls_tcindex] [ 71.310446] Code: 00 31 f6 48 87 75 20 48 85 f6 74 11 48 8b 47 18 48 8b 40 08 48 8b 40 50 e8 fb a6 f8 fc 48 85 db 0f 84 dc 00 00 00 48 8b 73 18 <8b> 56 04 48 8d 7e 04 85 d2 0f 84 7b 01 00 [ 71.331517] RSP: 0018:ffffb45207b3f898 EFLAGS: 00010282 [ 71.337345] RAX: ffff8ad3d72d6360 RBX: ffff8acc84393680 RCX: 000000000000002e [ 71.345306] RDX: ffff8ad3d72c8570 RSI: 0000000000000000 RDI: ffff8ad847a45800 [ 71.353277] RBP: ffff8acc84393688 R08: ffff8ad3d72c8400 R09: 0000000000000000 [ 71.361238] R10: ffff8ad3de786e00 R11: 0000000000000000 R12: ffffb45207b3f8c7 [ 71.369199] R13: ffff8ad3d93bd2a0 R14: 000000000000002e R15: ffff8ad3d72c9600 [ 71.377161] FS: 00007f9d3ec3e740(0000) GS:ffff8ad3df980000(0000) knlGS:0000000000000000 [ 71.386188] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 71.392597] CR2: 0000000000000004 CR3: 0000000852f06003 CR4: 00000000001606e0 [ 71.400558] Call Trace: [ 71.403299] tcindex_destroy_element+0x25/0x40 [cls_tcindex] [ 71.409611] tcindex_walk+0xbb/0x110 [cls_tcindex] [ 71.414953] tcindex_destroy+0x44/0x90 [cls_tcindex] [ 71.420492] ? tcindex_delete+0x280/0x280 [cls_tcindex] [ 71.426323] tcf_proto_destroy+0x16/0x40 [ 71.430696] tcf_chain_flush+0x51/0x70 [ 71.434876] tcf_block_put_ext.part.30+0x8f/0x1b0 [ 71.440122] tcf_block_put+0x4d/0x70 [ 71.444108] cbq_destroy+0x4d/0xd0 [sch_cbq] [ 71.448869] qdisc_destroy+0x62/0x130 [ 71.452951] dsmark_destroy+0x2a/0x70 [sch_dsmark] [ 71.458300] qdisc_destroy+0x62/0x130 [ 71.462373] qdisc_graft+0x3ba/0x470 [ 71.466359] tc_get_qdisc+0x2a6/0x2c0 [ 71.470443] ? cred_has_capability+0x7d/0x130 [ 71.475307] rtnetlink_rcv_msg+0x263/0x2d0 [ 71.479875] ? rtnl_calcit.isra.30+0x110/0x110 [ 71.484832] netlink_rcv_skb+0x4d/0x130 [ 71.489109] netlink_unicast+0x1a3/0x250 [ 71.493482] netlink_sendmsg+0x2ae/0x3a0 [ 71.497859] sock_sendmsg+0x36/0x40 [ 71.501748] ___sys_sendmsg+0x26f/0x2d0 [ 71.506029] ? handle_pte_fault+0x586/0xdf0 [ 71.510694] ? __handle_mm_fault+0x389/0x500 [ 71.515457] ? __sys_sendmsg+0x5e/0xa0 [ 71.519636] __sys_sendmsg+0x5e/0xa0 [ 71.523626] do_syscall_64+0x5b/0x180 [ 71.527711] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 71.533345] RIP: 0033:0x7f9d3e257f10 [ 71.537331] Code: c3 48 8b 05 82 6f 2c 00 f7 db 64 89 18 48 83 cb ff eb dd 0f 1f 80 00 00 00 00 83 3d 8d d0 2c 00 00 75 10 b8 2e 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 [ 71.558401] RSP: 002b:00007fff6f893398 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 71.566848] RAX: ffffffffffffffda RBX: 000000005b71274d RCX: 00007f9d3e257f10 [ 71.574810] RDX: 0000000000000000 RSI: 00007fff6f8933e0 RDI: 0000000000000003 [ 71.582770] RBP: 00007fff6f8933e0 R08: 000000000000ffff R09: 0000000000000003 [ 71.590729] R10: 00007fff6f892e20 R11: 0000000000000246 R12: 0000000000000000 [ 71.598689] R13: 0000000000662ee0 R14: 0000000000000000 R15: 0000000000000000 [ 71.606651] Modules linked in: sch_cbq cls_tcindex sch_dsmark xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_coni [ 71.685425] libahci i2c_algo_bit i2c_core i40e libata dca mdio megaraid_sas dm_mirror dm_region_hash dm_log dm_mod [ 71.697075] CR2: 0000000000000004 [ 71.700792] ---[ end trace f604eb1acacd978b ]--- Reproducer: tc qdisc add dev lo handle 1:0 root dsmark indices 64 set_tc_index tc filter add dev lo parent 1:0 protocol ip prio 1 tcindex mask 0xfc shift 2 tc qdisc add dev lo parent 1:0 handle 2:0 cbq bandwidth 10Mbit cell 8 avpkt 1000 mpu 64 tc class add dev lo parent 2:0 classid 2:1 cbq bandwidth 10Mbit rate 1500Kbit avpkt 1000 prio 1 bounded isolated allot 1514 weight 1 maxburst 10 tc filter add dev lo parent 2:0 protocol ip prio 1 handle 0x2e tcindex classid 2:1 pass_on tc qdisc add dev lo parent 2:1 pfifo limit 5 tc qdisc del dev lo root This is because in tcindex_set_parms, when there is no old_r, we set new exts to cr.exts. And we didn't set it to filter when r == &new_filter_result. Then in tcindex_delete() -> tcf_exts_get_net(), we will get NULL pointer dereference as we didn't init exts. Fix it by moving tcf_exts_change() after "if (old_r && old_r != r)" check. Then we don't need "cr" as there is no errout after that. Fixes: bf63ac73b3e13 ("net_sched: fix an oops in tcindex filter") Reported-by: Li Shuang Signed-off-by: Hangbin Liu Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/cls_tcindex.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index e3b48ddadb45..040d853f48b9 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -382,16 +382,13 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, tcf_bind_filter(tp, &cr.res, base); } - if (old_r) - tcf_exts_change(tp, &r->exts, &e); - else - tcf_exts_change(tp, &cr.exts, &e); - if (old_r && old_r != r) tcindex_filter_result_init(old_r); oldp = p; r->res = cr.res; + tcf_exts_change(tp, &r->exts, &e); + rcu_assign_pointer(tp->root, cp); if (r == &new_filter_result) { From 1b692b786b0c9b43959c7627fae2985fda1bed56 Mon Sep 17 00:00:00 2001 From: Park Ju Hyung Date: Sat, 28 Jul 2018 03:16:42 +0900 Subject: [PATCH 337/519] ALSA: hda - Sleep for 10ms after entering D3 on Conexant codecs commit f59cf9a0551dd954ad8b752461cf19d9789f4b1d upstream. On rare occasions, we are still noticing that the internal speaker spitting out spurious noises even after adding the problematic codec to the list. Adding a 10ms artificial delay before rebooting fixes the issue entirely. Patch for Realtek codecs also adds the same amount of delay after entering D3. Signed-off-by: Park Ju Hyung Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_conexant.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index cb19af145f46..fa9f85c1c6fb 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -219,6 +219,7 @@ static void cx_auto_reboot_notify(struct hda_codec *codec) snd_hda_codec_set_power_to_all(codec, codec->core.afg, AC_PWRST_D3); snd_hda_codec_write(codec, codec->core.afg, 0, AC_VERB_SET_POWER_STATE, AC_PWRST_D3); + msleep(10); } static void cx_auto_free(struct hda_codec *codec) From caf8fe5173f9bd20b14612c6a44221707f8a628d Mon Sep 17 00:00:00 2001 From: Park Ju Hyung Date: Sat, 28 Jul 2018 03:16:21 +0900 Subject: [PATCH 338/519] ALSA: hda - Turn CX8200 into D3 as well upon reboot commit d77a4b4a5b0b2ebcbc9840995d91311ef28302ab upstream. As an equivalent codec with CX20724, CX8200 is also subject to the reboot bug. Late 2017 and 2018 LG Gram and some HP Spectre laptops are known victims to this issue, causing extremely loud noises upon reboot. Now that we know that this bug is subject to multiple codecs, fix the comment as well. Signed-off-by: Park Ju Hyung Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_conexant.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index fa9f85c1c6fb..a1a3ce8c3f56 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -205,6 +205,7 @@ static void cx_auto_reboot_notify(struct hda_codec *codec) struct conexant_spec *spec = codec->spec; switch (codec->core.vendor_id) { + case 0x14f12008: /* CX8200 */ case 0x14f150f2: /* CX20722 */ case 0x14f150f4: /* CX20724 */ break; @@ -212,7 +213,7 @@ static void cx_auto_reboot_notify(struct hda_codec *codec) return; } - /* Turn the CX20722 codec into D3 to avoid spurious noises + /* Turn the problematic codec into D3 to avoid spurious noises from the internal speaker during (and after) reboot */ cx_auto_turn_eapd(codec, spec->num_eapds, spec->eapds, false); From c48a18584d81b2fbdc1d7deba41525bfc9114255 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 25 Jul 2018 17:10:11 +0200 Subject: [PATCH 339/519] ALSA: vx222: Fix invalid endian conversions commit fff71a4c050ba46e305d910c837b99ba1728135e upstream. The endian conversions used in vx2_dma_read() and vx2_dma_write() are superfluous and even wrong on big-endian machines, as inl() and outl() already do conversions. Kill them. Spotted by sparse, a warning like: sound/pci/vx222/vx222_ops.c:278:30: warning: incorrect type in argument 1 (different base types) Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/vx222/vx222_ops.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/pci/vx222/vx222_ops.c b/sound/pci/vx222/vx222_ops.c index 8e457ea27f89..1997bb048d8b 100644 --- a/sound/pci/vx222/vx222_ops.c +++ b/sound/pci/vx222/vx222_ops.c @@ -275,7 +275,7 @@ static void vx2_dma_write(struct vx_core *chip, struct snd_pcm_runtime *runtime, length >>= 2; /* in 32bit words */ /* Transfer using pseudo-dma. */ for (; length > 0; length--) { - outl(cpu_to_le32(*addr), port); + outl(*addr, port); addr++; } addr = (u32 *)runtime->dma_area; @@ -285,7 +285,7 @@ static void vx2_dma_write(struct vx_core *chip, struct snd_pcm_runtime *runtime, count >>= 2; /* in 32bit words */ /* Transfer using pseudo-dma. */ for (; count > 0; count--) { - outl(cpu_to_le32(*addr), port); + outl(*addr, port); addr++; } @@ -313,7 +313,7 @@ static void vx2_dma_read(struct vx_core *chip, struct snd_pcm_runtime *runtime, length >>= 2; /* in 32bit words */ /* Transfer using pseudo-dma. */ for (; length > 0; length--) - *addr++ = le32_to_cpu(inl(port)); + *addr++ = inl(port); addr = (u32 *)runtime->dma_area; pipe->hw_ptr = 0; } @@ -321,7 +321,7 @@ static void vx2_dma_read(struct vx_core *chip, struct snd_pcm_runtime *runtime, count >>= 2; /* in 32bit words */ /* Transfer using pseudo-dma. */ for (; count > 0; count--) - *addr++ = le32_to_cpu(inl(port)); + *addr++ = inl(port); vx2_release_pseudo_dma(chip); } From 8419b74a43fe4d6a3a4a6d33b00746f5446fb472 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 26 Jul 2018 14:27:59 +0200 Subject: [PATCH 340/519] ALSA: virmidi: Fix too long output trigger loop commit 50e9ffb1996a5d11ff5040a266585bad4ceeca0a upstream. The virmidi output trigger tries to parse the all available bytes and process sequencer events as much as possible. In a normal situation, this is supposed to be relatively short, but a program may give a huge buffer and it'll take a long time in a single spin lock, which may eventually lead to a soft lockup. This patch simply adds a workaround, a cond_resched() call in the loop if applicable. A better solution would be to move the event processor into a work, but let's put a duct-tape quickly at first. Reported-and-tested-by: Dae R. Jeong Reported-by: syzbot+619d9f40141d826b097e@syzkaller.appspotmail.com Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/seq/seq_virmidi.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/core/seq/seq_virmidi.c b/sound/core/seq/seq_virmidi.c index ef494ffc1369..975a7c939d2f 100644 --- a/sound/core/seq/seq_virmidi.c +++ b/sound/core/seq/seq_virmidi.c @@ -163,6 +163,7 @@ static void snd_virmidi_output_trigger(struct snd_rawmidi_substream *substream, int count, res; unsigned char buf[32], *pbuf; unsigned long flags; + bool check_resched = !in_atomic(); if (up) { vmidi->trigger = 1; @@ -200,6 +201,15 @@ static void snd_virmidi_output_trigger(struct snd_rawmidi_substream *substream, vmidi->event.type = SNDRV_SEQ_EVENT_NONE; } } + if (!check_resched) + continue; + /* do temporary unlock & cond_resched() for avoiding + * CPU soft lockup, which may happen via a write from + * a huge rawmidi buffer + */ + spin_unlock_irqrestore(&substream->runtime->lock, flags); + cond_resched(); + spin_lock_irqsave(&substream->runtime->lock, flags); } out: spin_unlock_irqrestore(&substream->runtime->lock, flags); From dbc8ab895259cc04ccd3406ccf4280418cd75b6b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 25 Jul 2018 17:59:26 +0200 Subject: [PATCH 341/519] ALSA: cs5535audio: Fix invalid endian conversion commit 69756930f2de0457d51db7d505a1e4f40e9fd116 upstream. One place in cs5535audio_build_dma_packets() does an extra conversion via cpu_to_le32(); namely jmpprd_addr is passed to setup_prd() ops, which writes the value via cs_writel(). That is, the callback does the conversion by itself, and we don't need to convert beforehand. This patch fixes that bogus conversion. Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/cs5535audio/cs5535audio.h | 6 +++--- sound/pci/cs5535audio/cs5535audio_pcm.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sound/pci/cs5535audio/cs5535audio.h b/sound/pci/cs5535audio/cs5535audio.h index 0579daa62215..425d1b664029 100644 --- a/sound/pci/cs5535audio/cs5535audio.h +++ b/sound/pci/cs5535audio/cs5535audio.h @@ -66,9 +66,9 @@ struct cs5535audio_dma_ops { }; struct cs5535audio_dma_desc { - u32 addr; - u16 size; - u16 ctlreserved; + __le32 addr; + __le16 size; + __le16 ctlreserved; }; struct cs5535audio_dma { diff --git a/sound/pci/cs5535audio/cs5535audio_pcm.c b/sound/pci/cs5535audio/cs5535audio_pcm.c index 9c2dc911d8d7..709f1c584d3e 100644 --- a/sound/pci/cs5535audio/cs5535audio_pcm.c +++ b/sound/pci/cs5535audio/cs5535audio_pcm.c @@ -158,8 +158,8 @@ static int cs5535audio_build_dma_packets(struct cs5535audio *cs5535au, lastdesc->addr = cpu_to_le32((u32) dma->desc_buf.addr); lastdesc->size = 0; lastdesc->ctlreserved = cpu_to_le16(PRD_JMP); - jmpprd_addr = cpu_to_le32(lastdesc->addr + - (sizeof(struct cs5535audio_dma_desc)*periods)); + jmpprd_addr = (u32)dma->desc_buf.addr + + sizeof(struct cs5535audio_dma_desc) * periods; dma->substream = substream; dma->period_bytes = period_bytes; From ad76ea373eb5f1232767242076ffef8b23518602 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 2 Aug 2018 14:04:45 +0200 Subject: [PATCH 342/519] ALSA: hda: Correct Asrock B85M-ITX power_save blacklist entry commit 8e82a728792bf66b9f0a29c9d4c4b0630f7b9c79 upstream. I added the subsys product-id for the HDMI HDA device rather then for the PCH one, this commit fixes this. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1525104 Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index d0b55c866370..cabccb10210e 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2069,7 +2069,7 @@ static int azx_probe(struct pci_dev *pci, */ static struct snd_pci_quirk power_save_blacklist[] = { /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */ - SND_PCI_QUIRK(0x1849, 0x0c0c, "Asrock B85M-ITX", 0), + SND_PCI_QUIRK(0x1849, 0xc892, "Asrock B85M-ITX", 0), /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */ SND_PCI_QUIRK(0x1043, 0x8733, "Asus Prime X370-Pro", 0), /* https://bugzilla.redhat.com/show_bug.cgi?id=1572975 */ From dd65a18b208fb2e03824ecf044e41c74b74423c1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 19 Jul 2018 11:01:04 +0200 Subject: [PATCH 343/519] ALSA: memalloc: Don't exceed over the requested size commit dfef01e150824b0e6da750cacda8958188d29aea upstream. snd_dma_alloc_pages_fallback() tries to allocate pages again when the allocation fails with reduced size. But the first try actually *increases* the size to power-of-two, which may give back a larger chunk than the requested size. This confuses the callers, e.g. sgbuf assumes that the size is equal or less, and it may result in a bad loop due to the underflow and eventually lead to Oops. The code of this function seems incorrectly assuming the usage of get_order(). We need to decrease at first, then align to power-of-two. Reported-and-tested-by: he, bo Reported-by: zhang jun Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/memalloc.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index f05cb6a8cbe0..78ffe445d775 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -239,16 +239,12 @@ int snd_dma_alloc_pages_fallback(int type, struct device *device, size_t size, int err; while ((err = snd_dma_alloc_pages(type, device, size, dmab)) < 0) { - size_t aligned_size; if (err != -ENOMEM) return err; if (size <= PAGE_SIZE) return -ENOMEM; - aligned_size = PAGE_SIZE << get_order(size); - if (size != aligned_size) - size = aligned_size; - else - size >>= 1; + size >>= 1; + size = PAGE_SIZE << get_order(size); } if (! dmab->area) return -ENOMEM; From 09b56641dddc92088bc2dd8d61ec229ce4bab8b0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 25 Jul 2018 17:11:38 +0200 Subject: [PATCH 344/519] ALSA: vxpocket: Fix invalid endian conversions commit 3acd3e3bab95ec3622ff98da313290ee823a0f68 upstream. The endian conversions used in vxp_dma_read() and vxp_dma_write() are superfluous and even wrong on big-endian machines, as inw() and outw() already do conversions. Kill them. Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pcmcia/vx/vxp_ops.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sound/pcmcia/vx/vxp_ops.c b/sound/pcmcia/vx/vxp_ops.c index 56aa1ba73ccc..49a883341eff 100644 --- a/sound/pcmcia/vx/vxp_ops.c +++ b/sound/pcmcia/vx/vxp_ops.c @@ -375,7 +375,7 @@ static void vxp_dma_write(struct vx_core *chip, struct snd_pcm_runtime *runtime, length >>= 1; /* in 16bit words */ /* Transfer using pseudo-dma. */ for (; length > 0; length--) { - outw(cpu_to_le16(*addr), port); + outw(*addr, port); addr++; } addr = (unsigned short *)runtime->dma_area; @@ -385,7 +385,7 @@ static void vxp_dma_write(struct vx_core *chip, struct snd_pcm_runtime *runtime, count >>= 1; /* in 16bit words */ /* Transfer using pseudo-dma. */ for (; count > 0; count--) { - outw(cpu_to_le16(*addr), port); + outw(*addr, port); addr++; } vx_release_pseudo_dma(chip); @@ -417,7 +417,7 @@ static void vxp_dma_read(struct vx_core *chip, struct snd_pcm_runtime *runtime, length >>= 1; /* in 16bit words */ /* Transfer using pseudo-dma. */ for (; length > 0; length--) - *addr++ = le16_to_cpu(inw(port)); + *addr++ = inw(port); addr = (unsigned short *)runtime->dma_area; pipe->hw_ptr = 0; } @@ -425,12 +425,12 @@ static void vxp_dma_read(struct vx_core *chip, struct snd_pcm_runtime *runtime, count >>= 1; /* in 16bit words */ /* Transfer using pseudo-dma. */ for (; count > 1; count--) - *addr++ = le16_to_cpu(inw(port)); + *addr++ = inw(port); /* Disable DMA */ pchip->regDIALOG &= ~VXP_DLG_DMAREAD_SEL_MASK; vx_outb(chip, DIALOG, pchip->regDIALOG); /* Read the last word (16 bits) */ - *addr = le16_to_cpu(inw(port)); + *addr = inw(port); /* Disable 16-bit accesses */ pchip->regDIALOG &= ~VXP_DLG_DMA16_SEL_MASK; vx_outb(chip, DIALOG, pchip->regDIALOG); From a8587cb96a79f90514b2c538498ed57541eb68ab Mon Sep 17 00:00:00 2001 From: John Ogness Date: Sun, 24 Jun 2018 00:32:11 +0200 Subject: [PATCH 345/519] USB: serial: sierra: fix potential deadlock at close commit e60870012e5a35b1506d7b376fddfb30e9da0b27 upstream. The portdata spinlock can be taken in interrupt context (via sierra_outdat_callback()). Disable interrupts when taking the portdata spinlock when discarding deferred URBs during close to prevent a possible deadlock. Fixes: 014333f77c0b ("USB: sierra: fix urb and memory leak on disconnect") Cc: stable Signed-off-by: John Ogness Signed-off-by: Sebastian Andrzej Siewior [ johan: amend commit message and add fixes and stable tags ] Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/sierra.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c index 07d1ecd564f7..8960a46c83bb 100644 --- a/drivers/usb/serial/sierra.c +++ b/drivers/usb/serial/sierra.c @@ -790,9 +790,9 @@ static void sierra_close(struct usb_serial_port *port) kfree(urb->transfer_buffer); usb_free_urb(urb); usb_autopm_put_interface_async(serial->interface); - spin_lock(&portdata->lock); + spin_lock_irq(&portdata->lock); portdata->outstanding_urbs--; - spin_unlock(&portdata->lock); + spin_unlock_irq(&portdata->lock); } sierra_stop_rx_urbs(port); From 39cd328e84cbafa28a068e9a82e05dfb8b5c7512 Mon Sep 17 00:00:00 2001 From: Aleksander Morgado Date: Tue, 24 Jul 2018 01:34:01 +0200 Subject: [PATCH 346/519] USB: option: add support for DW5821e commit 7bab01ecc6c43da882333c6db39741cb43677004 upstream. The device exposes AT, NMEA and DIAG ports in both USB configurations. The patch explicitly ignores interfaces 0 and 1, as they're bound to other drivers already; and also interface 6, which is a GNSS interface for which we don't have a driver yet. T: Bus=01 Lev=03 Prnt=04 Port=00 Cnt=01 Dev#= 18 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 2 P: Vendor=413c ProdID=81d7 Rev=03.18 S: Manufacturer=DELL S: Product=DW5821e Snapdragon X20 LTE S: SerialNumber=0123456789ABCDEF C: #Ifs= 7 Cfg#= 2 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 6 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) T: Bus=01 Lev=03 Prnt=04 Port=00 Cnt=01 Dev#= 16 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 2 P: Vendor=413c ProdID=81d7 Rev=03.18 S: Manufacturer=DELL S: Product=DW5821e Snapdragon X20 LTE S: SerialNumber=0123456789ABCDEF C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#= 1 Alt= 0 #EPs= 1 Cls=03(HID ) Sub=00 Prot=00 Driver=usbhid I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option Signed-off-by: Aleksander Morgado Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index d982c455e18e..2b81939fecd7 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -199,6 +199,8 @@ static void option_instat_callback(struct urb *urb); #define DELL_PRODUCT_5800_V2_MINICARD_VZW 0x8196 /* Novatel E362 */ #define DELL_PRODUCT_5804_MINICARD_ATT 0x819b /* Novatel E371 */ +#define DELL_PRODUCT_5821E 0x81d7 + #define KYOCERA_VENDOR_ID 0x0c88 #define KYOCERA_PRODUCT_KPC650 0x17da #define KYOCERA_PRODUCT_KPC680 0x180a @@ -1033,6 +1035,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, DELL_PRODUCT_5800_MINICARD_VZW, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, DELL_PRODUCT_5800_V2_MINICARD_VZW, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, DELL_PRODUCT_5804_MINICARD_ATT, 0xff, 0xff, 0xff) }, + { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5821E), + .driver_info = RSVD(0) | RSVD(1) | RSVD(6) }, { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_E100A) }, /* ADU-E100, ADU-310 */ { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_500A) }, { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_620UW) }, From 3f9ca472b2a7fc0e78b714fe1687c82206b20a8e Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Mon, 16 Jan 2017 10:55:45 +0800 Subject: [PATCH 347/519] ACPI: save NVS memory for Lenovo G50-45 commit cbc00c1310d34139a63946482b40a6b261a03fb9 upstream. In commit 821d6f0359b0 (ACPI / sleep: Do not save NVS for new machines to accelerate S3), to optimize S3 suspend/resume speed, code is introduced to ignore NVS memory saving during S3 for all the platforms later than 2012. But, Lenovo G50-45, a platform released in 2015, still needs NVS memory saving during S3. A quirk is introduced for this platform. Link: https://bugzilla.kernel.org/show_bug.cgi?id=189431 Tested-by: Przemek Signed-off-by: Zhang Rui [ rjw: Drop unnecessary code ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/sleep.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index e3322adaaae0..84ddae30fc34 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -124,6 +124,12 @@ void __init acpi_nvs_nosave_s3(void) nvs_nosave_s3 = true; } +static int __init init_nvs_save_s3(const struct dmi_system_id *d) +{ + nvs_nosave_s3 = false; + return 0; +} + /* * ACPI 1.0 wants us to execute _PTS before suspending devices, so we allow the * user to request that behavior by using the 'acpi_old_suspend_ordering' @@ -318,6 +324,19 @@ static struct dmi_system_id acpisleep_dmi_table[] __initdata = { DMI_MATCH(DMI_PRODUCT_NAME, "K54HR"), }, }, + /* + * https://bugzilla.kernel.org/show_bug.cgi?id=189431 + * Lenovo G50-45 is a platform later than 2012, but needs nvs memory + * saving during S3. + */ + { + .callback = init_nvs_save_s3, + .ident = "Lenovo G50-45", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "80E3"), + }, + }, {}, }; From c2650d43a4dae0a6e678af42d8056a484784f29c Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 9 Jul 2018 14:03:55 +0200 Subject: [PATCH 348/519] ACPI / PM: save NVS memory for ASUS 1025C laptop commit 231f9415001138a000cd0f881c46654b7ea3f8c5 upstream. Every time I tried to upgrade my laptop from 3.10.x to 4.x I faced an issue by which the fan would run at full speed upon resume. Bisecting it showed me the issue was introduced in 3.17 by commit 821d6f0359b0 (ACPI / sleep: Do not save NVS for new machines to accelerate S3). This code only affects machines built starting as of 2012, but this Asus 1025C laptop was made in 2012 and apparently needs the NVS data to be saved, otherwise the CPU's thermal state is not properly reported on resume and the fan runs at full speed upon resume. Here's a very simple way to check if such a machine is affected : # cat /sys/class/thermal/thermal_zone0/temp 55000 ( now suspend, wait one second and resume ) # cat /sys/class/thermal/thermal_zone0/temp 0 (and after ~15 seconds the fan starts to spin) Let's apply the same quirk as commit cbc00c13 (ACPI: save NVS memory for Lenovo G50-45) and reuse the function it provides. Note that this commit was already backported to 4.9.x but not 4.4.x. Cc: 3.17+ # 3.17+: requires cbc00c13 Signed-off-by: Willy Tarreau Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/sleep.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 84ddae30fc34..4f07029de209 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -324,6 +324,14 @@ static struct dmi_system_id acpisleep_dmi_table[] __initdata = { DMI_MATCH(DMI_PRODUCT_NAME, "K54HR"), }, }, + { + .callback = init_nvs_save_s3, + .ident = "Asus 1025C", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "1025C"), + }, + }, /* * https://bugzilla.kernel.org/show_bug.cgi?id=189431 * Lenovo G50-45 is a platform later than 2012, but needs nvs memory From b4d2c57717fd0c905a4b8ebd5c534e770aeb93a2 Mon Sep 17 00:00:00 2001 From: Chen Hu Date: Fri, 27 Jul 2018 18:32:41 +0800 Subject: [PATCH 349/519] serial: 8250_dw: always set baud rate in dw8250_set_termios commit dfcab6ba573445c703235ab6c83758eec12d7f28 upstream. dw8250_set_termios() doesn't set baud rate if the arg "old ktermios" is NULL. This happens during resume. Call Trace: ... [ 54.928108] dw8250_set_termios+0x162/0x170 [ 54.928114] serial8250_set_termios+0x17/0x20 [ 54.928117] uart_change_speed+0x64/0x160 [ 54.928119] uart_resume_port ... So the baud rate is not restored after S3 and breaks the apps who use UART, for example, console and bluetooth etc. We address this issue by setting the baud rate irrespective of arg "old", just like the drivers for other 8250 IPs. This is tested with Intel Broxton platform. Signed-off-by: Chen Hu Fixes: 4e26b134bd17 ("serial: 8250_dw: clock rate handling for all ACPI platforms") Cc: Heikki Krogerus Cc: stable Reviewed-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_dw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index 8435c3f204c1..a30d68c4b689 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -224,7 +224,7 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios, unsigned int rate; int ret; - if (IS_ERR(d->clk) || !old) + if (IS_ERR(d->clk)) goto out; clk_disable_unprepare(d->clk); From 5069ddd8f9ace901f385fd42a75246e7a3727bc5 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 17 Jul 2017 16:10:06 -0500 Subject: [PATCH 350/519] x86/mm: Simplify p[g4um]d_page() macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit fd7e315988b784509ba3f1b42f539bd0b1fca9bb upstream. Create a pgd_pfn() macro similar to the p[4um]d_pfn() macros and then use the p[g4um]d_pfn() macros in the p[g4um]d_page() macros instead of duplicating the code. Signed-off-by: Tom Lendacky Reviewed-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Brijesh Singh Cc: Dave Young Cc: Dmitry Vyukov Cc: Jonathan Corbet Cc: Konrad Rzeszutek Wilk Cc: Larry Woodman Cc: Linus Torvalds Cc: Matt Fleming Cc: Michael S. Tsirkin Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Radim Krčmář Cc: Rik van Riel Cc: Toshimitsu Kani Cc: kasan-dev@googlegroups.com Cc: kvm@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-efi@vger.kernel.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/e61eb533a6d0aac941db2723d8aa63ef6b882dee.1500319216.git.thomas.lendacky@amd.com Signed-off-by: Ingo Molnar [Backported to 4.9 stable by AK, suggested by Michael Hocko] Signed-off-by: Andi Kleen Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pgtable.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 4de6c282c02a..68a55273ce0f 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -173,6 +173,11 @@ static inline unsigned long pud_pfn(pud_t pud) return (pfn & pud_pfn_mask(pud)) >> PAGE_SHIFT; } +static inline unsigned long pgd_pfn(pgd_t pgd) +{ + return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT; +} + #define pte_page(pte) pfn_to_page(pte_pfn(pte)) static inline int pmd_large(pmd_t pte) @@ -578,8 +583,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) * Currently stuck as a macro due to indirect forward reference to * linux/mmzone.h's __section_mem_map_addr() definition: */ -#define pmd_page(pmd) \ - pfn_to_page((pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT) +#define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd)) /* * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] @@ -647,8 +651,7 @@ static inline unsigned long pud_page_vaddr(pud_t pud) * Currently stuck as a macro due to indirect forward reference to * linux/mmzone.h's __section_mem_map_addr() definition: */ -#define pud_page(pud) \ - pfn_to_page((pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT) +#define pud_page(pud) pfn_to_page(pud_pfn(pud)) /* Find an entry in the second-level page table.. */ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) @@ -688,7 +691,7 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd) * Currently stuck as a macro due to indirect forward reference to * linux/mmzone.h's __section_mem_map_addr() definition: */ -#define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT) +#define pgd_page(pgd) pfn_to_page(pgd_pfn(pgd)) /* to find an entry in a page-table-directory. */ static inline unsigned long pud_index(unsigned long address) From 9aeef6b667f2ffc4c5725ea05e180e62fb307a31 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Sun, 15 Jul 2018 20:36:50 +0100 Subject: [PATCH 351/519] Bluetooth: avoid killing an already killed socket commit 4e1a720d0312fd510699032c7694a362a010170f upstream. slub debug reported: [ 440.648642] ============================================================================= [ 440.648649] BUG kmalloc-1024 (Tainted: G BU O ): Poison overwritten [ 440.648651] ----------------------------------------------------------------------------- [ 440.648655] INFO: 0xe70f4bec-0xe70f4bec. First byte 0x6a instead of 0x6b [ 440.648665] INFO: Allocated in sk_prot_alloc+0x6b/0xc6 age=33155 cpu=1 pid=1047 [ 440.648671] ___slab_alloc.constprop.24+0x1fc/0x292 [ 440.648675] __slab_alloc.isra.18.constprop.23+0x1c/0x25 [ 440.648677] __kmalloc+0xb6/0x17f [ 440.648680] sk_prot_alloc+0x6b/0xc6 [ 440.648683] sk_alloc+0x1e/0xa1 [ 440.648700] sco_sock_alloc.constprop.6+0x26/0xaf [bluetooth] [ 440.648716] sco_connect_cfm+0x166/0x281 [bluetooth] [ 440.648731] hci_conn_request_evt.isra.53+0x258/0x281 [bluetooth] [ 440.648746] hci_event_packet+0x28b/0x2326 [bluetooth] [ 440.648759] hci_rx_work+0x161/0x291 [bluetooth] [ 440.648764] process_one_work+0x163/0x2b2 [ 440.648767] worker_thread+0x1a9/0x25c [ 440.648770] kthread+0xf8/0xfd [ 440.648774] ret_from_fork+0x2e/0x38 [ 440.648779] INFO: Freed in __sk_destruct+0xd3/0xdf age=3815 cpu=1 pid=1047 [ 440.648782] __slab_free+0x4b/0x27a [ 440.648784] kfree+0x12e/0x155 [ 440.648787] __sk_destruct+0xd3/0xdf [ 440.648790] sk_destruct+0x27/0x29 [ 440.648793] __sk_free+0x75/0x91 [ 440.648795] sk_free+0x1c/0x1e [ 440.648810] sco_sock_kill+0x5a/0x5f [bluetooth] [ 440.648825] sco_conn_del+0x8e/0xba [bluetooth] [ 440.648840] sco_disconn_cfm+0x3a/0x41 [bluetooth] [ 440.648855] hci_event_packet+0x45e/0x2326 [bluetooth] [ 440.648868] hci_rx_work+0x161/0x291 [bluetooth] [ 440.648872] process_one_work+0x163/0x2b2 [ 440.648875] worker_thread+0x1a9/0x25c [ 440.648877] kthread+0xf8/0xfd [ 440.648880] ret_from_fork+0x2e/0x38 [ 440.648884] INFO: Slab 0xf4718580 objects=27 used=27 fp=0x (null) flags=0x40008100 [ 440.648886] INFO: Object 0xe70f4b88 @offset=19336 fp=0xe70f54f8 When KASAN was enabled, it reported: [ 210.096613] ================================================================== [ 210.096634] BUG: KASAN: use-after-free in ex_handler_refcount+0x5b/0x127 [ 210.096641] Write of size 4 at addr ffff880107e17160 by task kworker/u9:1/2040 [ 210.096651] CPU: 1 PID: 2040 Comm: kworker/u9:1 Tainted: G U O 4.14.47-20180606+ #2 [ 210.096654] Hardware name: , BIOS 2017.01-00087-g43e04de 08/30/2017 [ 210.096693] Workqueue: hci0 hci_rx_work [bluetooth] [ 210.096698] Call Trace: [ 210.096711] dump_stack+0x46/0x59 [ 210.096722] print_address_description+0x6b/0x23b [ 210.096729] ? ex_handler_refcount+0x5b/0x127 [ 210.096736] kasan_report+0x220/0x246 [ 210.096744] ex_handler_refcount+0x5b/0x127 [ 210.096751] ? ex_handler_clear_fs+0x85/0x85 [ 210.096757] fixup_exception+0x8c/0x96 [ 210.096766] do_trap+0x66/0x2c1 [ 210.096773] do_error_trap+0x152/0x180 [ 210.096781] ? fixup_bug+0x78/0x78 [ 210.096817] ? hci_debugfs_create_conn+0x244/0x26a [bluetooth] [ 210.096824] ? __schedule+0x113b/0x1453 [ 210.096830] ? sysctl_net_exit+0xe/0xe [ 210.096837] ? __wake_up_common+0x343/0x343 [ 210.096843] ? insert_work+0x107/0x163 [ 210.096850] invalid_op+0x1b/0x40 [ 210.096888] RIP: 0010:hci_debugfs_create_conn+0x244/0x26a [bluetooth] [ 210.096892] RSP: 0018:ffff880094a0f970 EFLAGS: 00010296 [ 210.096898] RAX: 0000000000000000 RBX: ffff880107e170e8 RCX: ffff880107e17160 [ 210.096902] RDX: 000000000000002f RSI: ffff88013b80ed40 RDI: ffffffffa058b940 [ 210.096906] RBP: ffff88011b2b0578 R08: 00000000852f0ec9 R09: ffffffff81cfcf9b [ 210.096909] R10: 00000000d21bdad7 R11: 0000000000000001 R12: ffff8800967b0488 [ 210.096913] R13: ffff880107e17168 R14: 0000000000000068 R15: ffff8800949c0008 [ 210.096920] ? __sk_destruct+0x2c6/0x2d4 [ 210.096959] hci_event_packet+0xff5/0x7de2 [bluetooth] [ 210.096969] ? __local_bh_enable_ip+0x43/0x5b [ 210.097004] ? l2cap_sock_recv_cb+0x158/0x166 [bluetooth] [ 210.097039] ? hci_le_meta_evt+0x2bb3/0x2bb3 [bluetooth] [ 210.097075] ? l2cap_ertm_init+0x94e/0x94e [bluetooth] [ 210.097093] ? xhci_urb_enqueue+0xbd8/0xcf5 [xhci_hcd] [ 210.097102] ? __accumulate_pelt_segments+0x24/0x33 [ 210.097109] ? __accumulate_pelt_segments+0x24/0x33 [ 210.097115] ? __update_load_avg_se.isra.2+0x217/0x3a4 [ 210.097122] ? set_next_entity+0x7c3/0x12cd [ 210.097128] ? pick_next_entity+0x25e/0x26c [ 210.097135] ? pick_next_task_fair+0x2ca/0xc1a [ 210.097141] ? switch_mm_irqs_off+0x346/0xb4f [ 210.097147] ? __switch_to+0x769/0xbc4 [ 210.097153] ? compat_start_thread+0x66/0x66 [ 210.097188] ? hci_conn_check_link_mode+0x1cd/0x1cd [bluetooth] [ 210.097195] ? finish_task_switch+0x392/0x431 [ 210.097228] ? hci_rx_work+0x154/0x487 [bluetooth] [ 210.097260] hci_rx_work+0x154/0x487 [bluetooth] [ 210.097269] process_one_work+0x579/0x9e9 [ 210.097277] worker_thread+0x68f/0x804 [ 210.097285] kthread+0x31c/0x32b [ 210.097292] ? rescuer_thread+0x70c/0x70c [ 210.097299] ? kthread_create_on_node+0xa3/0xa3 [ 210.097306] ret_from_fork+0x35/0x40 [ 210.097314] Allocated by task 2040: [ 210.097323] kasan_kmalloc.part.1+0x51/0xc7 [ 210.097328] __kmalloc+0x17f/0x1b6 [ 210.097335] sk_prot_alloc+0xf2/0x1a3 [ 210.097340] sk_alloc+0x22/0x297 [ 210.097375] sco_sock_alloc.constprop.7+0x23/0x202 [bluetooth] [ 210.097410] sco_connect_cfm+0x2d0/0x566 [bluetooth] [ 210.097443] hci_conn_request_evt.isra.53+0x6d3/0x762 [bluetooth] [ 210.097476] hci_event_packet+0x85e/0x7de2 [bluetooth] [ 210.097507] hci_rx_work+0x154/0x487 [bluetooth] [ 210.097512] process_one_work+0x579/0x9e9 [ 210.097517] worker_thread+0x68f/0x804 [ 210.097523] kthread+0x31c/0x32b [ 210.097529] ret_from_fork+0x35/0x40 [ 210.097533] Freed by task 2040: [ 210.097539] kasan_slab_free+0xb3/0x15e [ 210.097544] kfree+0x103/0x1a9 [ 210.097549] __sk_destruct+0x2c6/0x2d4 [ 210.097584] sco_conn_del.isra.1+0xba/0x10e [bluetooth] [ 210.097617] hci_event_packet+0xff5/0x7de2 [bluetooth] [ 210.097648] hci_rx_work+0x154/0x487 [bluetooth] [ 210.097653] process_one_work+0x579/0x9e9 [ 210.097658] worker_thread+0x68f/0x804 [ 210.097663] kthread+0x31c/0x32b [ 210.097670] ret_from_fork+0x35/0x40 [ 210.097676] The buggy address belongs to the object at ffff880107e170e8 which belongs to the cache kmalloc-1024 of size 1024 [ 210.097681] The buggy address is located 120 bytes inside of 1024-byte region [ffff880107e170e8, ffff880107e174e8) [ 210.097683] The buggy address belongs to the page: [ 210.097689] page:ffffea00041f8400 count:1 mapcount:0 mapping: (null) index:0xffff880107e15b68 compound_mapcount: 0 [ 210.110194] flags: 0x8000000000008100(slab|head) [ 210.115441] raw: 8000000000008100 0000000000000000 ffff880107e15b68 0000000100170016 [ 210.115448] raw: ffffea0004a47620 ffffea0004b48e20 ffff88013b80ed40 0000000000000000 [ 210.115451] page dumped because: kasan: bad access detected [ 210.115454] Memory state around the buggy address: [ 210.115460] ffff880107e17000: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 210.115465] ffff880107e17080: fc fc fc fc fc fc fc fc fc fc fc fc fc fb fb fb [ 210.115469] >ffff880107e17100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 210.115472] ^ [ 210.115477] ffff880107e17180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 210.115481] ffff880107e17200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 210.115483] ================================================================== And finally when BT_DBG() and ftrace was enabled it showed: <...>-14979 [001] .... 186.104191: sco_sock_kill <-sco_sock_close <...>-14979 [001] .... 186.104191: sco_sock_kill <-sco_sock_release <...>-14979 [001] .... 186.104192: sco_sock_kill: sk ef0497a0 state 9 <...>-14979 [001] .... 186.104193: bt_sock_unlink <-sco_sock_kill kworker/u9:2-792 [001] .... 186.104246: sco_sock_kill <-sco_conn_del kworker/u9:2-792 [001] .... 186.104248: sco_sock_kill: sk ef0497a0 state 9 kworker/u9:2-792 [001] .... 186.104249: bt_sock_unlink <-sco_sock_kill kworker/u9:2-792 [001] .... 186.104250: sco_sock_destruct <-__sk_destruct kworker/u9:2-792 [001] .... 186.104250: sco_sock_destruct: sk ef0497a0 kworker/u9:2-792 [001] .... 186.104860: hci_conn_del <-hci_event_packet kworker/u9:2-792 [001] .... 186.104864: hci_conn_del: hci0 hcon ef0484c0 handle 266 Only in the failed case, sco_sock_kill() gets called with the same sock pointer two times. Add a check for SOCK_DEAD to avoid continue killing a socket which has already been killed. Signed-off-by: Sudip Mukherjee Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/sco.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index f52bcbf2e58c..2209fd2ff2e3 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -392,7 +392,8 @@ static void sco_sock_cleanup_listen(struct sock *parent) */ static void sco_sock_kill(struct sock *sk) { - if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket) + if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket || + sock_flag(sk, SOCK_DEAD)) return; BT_DBG("sk %p state %d", sk, sk->sk_state); From 3b6393e30ec95bb5044cb4506ac468fb08ccf647 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 15 Aug 2018 12:14:05 -0700 Subject: [PATCH 352/519] isdn: Disable IIOCDBGVAR [ Upstream commit 5e22002aa8809e2efab2da95855f73f63e14a36c ] It was possible to directly leak the kernel address where the isdn_dev structure pointer was stored. This is a kernel ASLR bypass for anyone with access to the ioctl. The code had been present since the beginning of git history, though this shouldn't ever be needed for normal operation, therefore remove it. Reported-by: Al Viro Cc: Karsten Keil Signed-off-by: Kees Cook Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/isdn/i4l/isdn_common.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/isdn/i4l/isdn_common.c b/drivers/isdn/i4l/isdn_common.c index e4c43a17b333..8088c34336aa 100644 --- a/drivers/isdn/i4l/isdn_common.c +++ b/drivers/isdn/i4l/isdn_common.c @@ -1655,13 +1655,7 @@ isdn_ioctl(struct file *file, uint cmd, ulong arg) } else return -EINVAL; case IIOCDBGVAR: - if (arg) { - if (copy_to_user(argp, &dev, sizeof(ulong))) - return -EFAULT; - return 0; - } else - return -EINVAL; - break; + return -EINVAL; default: if ((cmd & IIOCDRVCTL) == IIOCDRVCTL) cmd = ((cmd >> _IOC_NRSHIFT) & _IOC_NRMASK) & ISDN_DRVIOCTL_MASK; From 78f654f6cce3442937b8c7eb4b640357871363c1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 22 Aug 2018 07:48:38 +0200 Subject: [PATCH 353/519] Linux 4.4.151 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7789195c6a59..04199cf99dd5 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 150 +SUBLEVEL = 151 EXTRAVERSION = NAME = Blurry Fish Butt From 97aaf73394cce0cf85c393c2a5e1d9ad6ddb2245 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Fri, 1 Jun 2018 14:34:33 +0300 Subject: [PATCH 354/519] ARC: Explicitly add -mmedium-calls to CFLAGS [ Upstream commit 74c11e300c103af47db5b658fdcf28002421e250 ] GCC built for arc*-*-linux has "-mmedium-calls" implicitly enabled by default thus we don't see any problems during Linux kernel compilation. ----------------------------->8------------------------ arc-linux-gcc -mcpu=arc700 -Q --help=target | grep calls -mlong-calls [disabled] -mmedium-calls [enabled] ----------------------------->8------------------------ But if we try to use so-called Elf32 toolchain with GCC configured for arc*-*-elf* then we'd see the following failure: ----------------------------->8------------------------ init/do_mounts.o: In function 'init_rootfs': do_mounts.c:(.init.text+0x108): relocation truncated to fit: R_ARC_S21W_PCREL against symbol 'unregister_filesystem' defined in .text section in fs/filesystems.o arc-elf32-ld: final link failed: Symbol needs debug section which does not exist make: *** [vmlinux] Error 1 ----------------------------->8------------------------ That happens because neither "-mmedium-calls" nor "-mlong-calls" are enabled in Elf32 GCC: ----------------------------->8------------------------ arc-elf32-gcc -mcpu=arc700 -Q --help=target | grep calls -mlong-calls [disabled] -mmedium-calls [disabled] ----------------------------->8------------------------ Now to make it possible to use Elf32 toolchain for building Linux kernel we're explicitly add "-mmedium-calls" to CFLAGS. And since we add "-mmedium-calls" to the global CFLAGS there's no point in having per-file copies thus removing them. Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arc/Makefile | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/arch/arc/Makefile b/arch/arc/Makefile index c05ea2b54276..b9f7306412e5 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -14,7 +14,7 @@ endif KBUILD_DEFCONFIG := nsim_700_defconfig -cflags-y += -fno-common -pipe -fno-builtin -D__linux__ +cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__ cflags-$(CONFIG_ISA_ARCOMPACT) += -mA7 cflags-$(CONFIG_ISA_ARCV2) += -mcpu=archs @@ -137,16 +137,3 @@ dtbs: scripts archclean: $(Q)$(MAKE) $(clean)=$(boot) - -# Hacks to enable final link due to absence of link-time branch relexation -# and gcc choosing optimal(shorter) branches at -O3 -# -# vineetg Feb 2010: -mlong-calls switched off for overall kernel build -# However lib/decompress_inflate.o (.init.text) calls -# zlib_inflate_workspacesize (.text) causing relocation errors. -# Thus forcing all exten calls in this file to be long calls -export CFLAGS_decompress_inflate.o = -mmedium-calls -export CFLAGS_initramfs.o = -mmedium-calls -ifdef CONFIG_SMP -export CFLAGS_core.o = -mmedium-calls -endif From 8747d9e7d45420350524dc8a8343837b8ac92776 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 13 Jun 2018 10:11:56 -0700 Subject: [PATCH 355/519] netfilter: ipv6: nf_defrag: reduce struct net memory waste [ Upstream commit 9ce7bc036ae4cfe3393232c86e9e1fea2153c237 ] It is a waste of memory to use a full "struct netns_sysctl_ipv6" while only one pointer is really used, considering netns_sysctl_ipv6 keeps growing. Also, since "struct netns_frags" has cache line alignment, it is better to move the frags_hdr pointer outside, otherwise we spend a full cache line for this pointer. This saves 192 bytes of memory per netns. Fixes: c038a767cd69 ("ipv6: add a new namespace for nf_conntrack_reasm") Signed-off-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/net/net_namespace.h | 1 + include/net/netns/ipv6.h | 1 - net/ipv6/netfilter/nf_conntrack_reasm.c | 6 +++--- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 93328c61934a..6965dfe7e88b 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -115,6 +115,7 @@ struct net { #endif #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) struct netns_nf_frag nf_frag; + struct ctl_table_header *nf_frag_frags_hdr; #endif struct sock *nfnl; struct sock *nfnl_stash; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index c0368db6df54..d235722c0d92 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -86,7 +86,6 @@ struct netns_ipv6 { #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) struct netns_nf_frag { - struct netns_sysctl_ipv6 sysctl; struct netns_frags frags; }; #endif diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index eb2dc39f7066..838b65a59a73 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -118,7 +118,7 @@ static int nf_ct_frag6_sysctl_register(struct net *net) if (hdr == NULL) goto err_reg; - net->nf_frag.sysctl.frags_hdr = hdr; + net->nf_frag_frags_hdr = hdr; return 0; err_reg: @@ -132,8 +132,8 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) { struct ctl_table *table; - table = net->nf_frag.sysctl.frags_hdr->ctl_table_arg; - unregister_net_sysctl_table(net->nf_frag.sysctl.frags_hdr); + table = net->nf_frag_frags_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->nf_frag_frags_hdr); if (!net_eq(net, &init_net)) kfree(table); } From c22a9ad9d0dfa4a72aeb08bc5275239d4281f077 Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Tue, 12 Jun 2018 16:46:03 -0600 Subject: [PATCH 356/519] selftests: pstore: return Kselftest Skip code for skipped tests [ Upstream commit 856e7c4b619af622d56b3b454f7bec32a170ac99 ] When pstore_post_reboot test gets skipped because of unmet dependencies and/or unsupported configuration, it returns 0 which is treated as a pass by the Kselftest framework. This leads to false positive result even when the test could not be run. Change it to return kselftest skip code when a test gets skipped to clearly report that the test could not be run. Kselftest framework SKIP code is 4 and the framework prints appropriate messages to indicate that the test is skipped. Signed-off-by: Shuah Khan (Samsung OSG) Reviewed-by: Kees Cook Signed-off-by: Shuah Khan (Samsung OSG) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/pstore/pstore_post_reboot_tests | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/pstore/pstore_post_reboot_tests b/tools/testing/selftests/pstore/pstore_post_reboot_tests index 6ccb154cb4aa..22f8df1ad7d4 100755 --- a/tools/testing/selftests/pstore/pstore_post_reboot_tests +++ b/tools/testing/selftests/pstore/pstore_post_reboot_tests @@ -7,13 +7,16 @@ # # Released under the terms of the GPL v2. +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + . ./common_tests if [ -e $REBOOT_FLAG ]; then rm $REBOOT_FLAG else prlog "pstore_crash_test has not been executed yet. we skip further tests." - exit 0 + exit $ksft_skip fi prlog -n "Mounting pstore filesystem ... " From 68f5b8ff63022b1d6b7019092c66c916a499dc84 Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Tue, 12 Jun 2018 17:40:31 -0600 Subject: [PATCH 357/519] selftests: static_keys: return Kselftest Skip code for skipped tests [ Upstream commit 8781578087b8fb8829558bac96c3c24e5ba26f82 ] When static_keys test is skipped because of unmet dependencies and/or unsupported configuration, it exits with error which is treated as a fail by the Kselftest framework. This leads to false negative result even when the test could not be run. Change it to return kselftest skip code when a test gets skipped to clearly report that the test could not be run. Added an explicit searches for test_static_key_base and test_static_keys modules and return skip code if they aren't found to differentiate between the failure to load the module condition and module not found condition. Kselftest framework SKIP code is 4 and the framework prints appropriate messages to indicate that the test is skipped. Signed-off-by: Shuah Khan (Samsung OSG) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../selftests/static_keys/test_static_keys.sh | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/testing/selftests/static_keys/test_static_keys.sh b/tools/testing/selftests/static_keys/test_static_keys.sh index 1261e3fa1e3a..5bba7796fb34 100755 --- a/tools/testing/selftests/static_keys/test_static_keys.sh +++ b/tools/testing/selftests/static_keys/test_static_keys.sh @@ -1,6 +1,19 @@ #!/bin/sh # Runs static keys kernel module tests +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +if ! /sbin/modprobe -q -n test_static_key_base; then + echo "static_key: module test_static_key_base is not found [SKIP]" + exit $ksft_skip +fi + +if ! /sbin/modprobe -q -n test_static_keys; then + echo "static_key: module test_static_keys is not found [SKIP]" + exit $ksft_skip +fi + if /sbin/modprobe -q test_static_key_base; then if /sbin/modprobe -q test_static_keys; then echo "static_key: ok" From 01024c01171b1c5c18f1269c124886a273230db4 Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Wed, 13 Jun 2018 21:10:48 -0600 Subject: [PATCH 358/519] selftests: user: return Kselftest Skip code for skipped tests [ Upstream commit d7d5311d4aa9611fe1a5a851e6f75733237a668a ] When user test is skipped because of unmet dependencies and/or unsupported configuration, it exits with error which is treated as a fail by the Kselftest framework. This leads to false negative result even when the test could not be run. Change it to return kselftest skip code when a test gets skipped to clearly report that the test could not be run. Add an explicit check for module presence and return skip code if module isn't present. Kselftest framework SKIP code is 4 and the framework prints appropriate messages to indicate that the test is skipped. Signed-off-by: Shuah Khan (Samsung OSG) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/user/test_user_copy.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/testing/selftests/user/test_user_copy.sh b/tools/testing/selftests/user/test_user_copy.sh index 350107f40c1d..0409270f998c 100755 --- a/tools/testing/selftests/user/test_user_copy.sh +++ b/tools/testing/selftests/user/test_user_copy.sh @@ -1,6 +1,13 @@ #!/bin/sh # Runs copy_to/from_user infrastructure using test_user_copy kernel module +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +if ! /sbin/modprobe -q -n test_user_copy; then + echo "user: module test_user_copy is not found [SKIP]" + exit $ksft_skip +fi if /sbin/modprobe -q test_user_copy; then /sbin/modprobe -q -r test_user_copy echo "user_copy: ok" From 8e54d87ffc5170acfe72b371198ddcb685f88e63 Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Thu, 14 Jun 2018 16:56:13 -0600 Subject: [PATCH 359/519] selftests: zram: return Kselftest Skip code for skipped tests [ Upstream commit 685814466bf8398192cf855415a0bb2cefc1930e ] When zram test is skipped because of unmet dependencies and/or unsupported configuration, it exits with error which is treated as a fail by the Kselftest framework. This leads to false negative result even when the test could not be run. Change it to return kselftest skip code when a test gets skipped to clearly report that the test could not be run. Kselftest framework SKIP code is 4 and the framework prints appropriate messages to indicate that the test is skipped. Signed-off-by: Shuah Khan (Samsung OSG) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/zram/zram.sh | 5 ++++- tools/testing/selftests/zram/zram_lib.sh | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/zram/zram.sh b/tools/testing/selftests/zram/zram.sh index 683a292e3290..9399c4aeaa26 100755 --- a/tools/testing/selftests/zram/zram.sh +++ b/tools/testing/selftests/zram/zram.sh @@ -1,6 +1,9 @@ #!/bin/bash TCID="zram.sh" +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + . ./zram_lib.sh run_zram () { @@ -23,5 +26,5 @@ elif [ -b /dev/zram0 ]; then else echo "$TCID : No zram.ko module or /dev/zram0 device file not found" echo "$TCID : CONFIG_ZRAM is not set" - exit 1 + exit $ksft_skip fi diff --git a/tools/testing/selftests/zram/zram_lib.sh b/tools/testing/selftests/zram/zram_lib.sh index f6a9c73e7a44..9e73a4fb9b0a 100755 --- a/tools/testing/selftests/zram/zram_lib.sh +++ b/tools/testing/selftests/zram/zram_lib.sh @@ -18,6 +18,9 @@ MODULE=0 dev_makeswap=-1 dev_mounted=-1 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + trap INT check_prereqs() @@ -27,7 +30,7 @@ check_prereqs() if [ $uid -ne 0 ]; then echo $msg must be run as root >&2 - exit 0 + exit $ksft_skip fi } From 67726a2e9cd7e16503e8d54c54b5885a8737663d Mon Sep 17 00:00:00 2001 From: Fathi Boudra Date: Thu, 14 Jun 2018 11:57:08 +0200 Subject: [PATCH 360/519] selftests: sync: add config fragment for testing sync framework [ Upstream commit d6a3e55131fcb1e5ca1753f4b6f297a177b2fc91 ] Unless the software synchronization objects (CONFIG_SW_SYNC) is enabled, the sync test will be skipped: TAP version 13 1..0 # Skipped: Sync framework not supported by kernel Add a config fragment file to be able to run "make kselftest-merge" to enable relevant configuration required in order to run the sync test. Signed-off-by: Fathi Boudra Link: https://lkml.org/lkml/2017/5/5/14 Signed-off-by: Anders Roxell Signed-off-by: Shuah Khan (Samsung OSG) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/sync/config | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 tools/testing/selftests/sync/config diff --git a/tools/testing/selftests/sync/config b/tools/testing/selftests/sync/config new file mode 100644 index 000000000000..1ab7e8130db2 --- /dev/null +++ b/tools/testing/selftests/sync/config @@ -0,0 +1,4 @@ +CONFIG_STAGING=y +CONFIG_ANDROID=y +CONFIG_SYNC=y +CONFIG_SW_SYNC=y From 0a6914e81cfaf4b77ba6ebeb09f87dab4ad2336c Mon Sep 17 00:00:00 2001 From: Ray Jui Date: Tue, 12 Jun 2018 13:21:27 -0700 Subject: [PATCH 361/519] ARM: dts: Cygnus: Fix I2C controller interrupt type [ Upstream commit 71ca3409703b62b6a092d0d9d13f366c121bc5d3 ] Fix I2C controller interrupt to use IRQ_TYPE_LEVEL_HIGH for Broadcom Cygnus SoC. Fixes: b51c05a331ff ("ARM: dts: add I2C device nodes for Broadcom Cygnus") Signed-off-by: Ray Jui Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/bcm-cygnus.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/bcm-cygnus.dtsi b/arch/arm/boot/dts/bcm-cygnus.dtsi index 2778533502d9..5ce200860c89 100644 --- a/arch/arm/boot/dts/bcm-cygnus.dtsi +++ b/arch/arm/boot/dts/bcm-cygnus.dtsi @@ -110,7 +110,7 @@ i2c0: i2c@18008000 { reg = <0x18008000 0x100>; #address-cells = <1>; #size-cells = <0>; - interrupts = ; + interrupts = ; clock-frequency = <100000>; status = "disabled"; }; @@ -138,7 +138,7 @@ i2c1: i2c@1800b000 { reg = <0x1800b000 0x100>; #address-cells = <1>; #size-cells = <0>; - interrupts = ; + interrupts = ; clock-frequency = <100000>; status = "disabled"; }; From 879384b1fc452c7de5f3c027c7530f0379a8a3a2 Mon Sep 17 00:00:00 2001 From: William Wu Date: Fri, 11 May 2018 17:46:32 +0800 Subject: [PATCH 362/519] usb: dwc2: fix isoc split in transfer with no data [ Upstream commit 70c3c8cb83856758025c2a211dd022bc0478922a ] If isoc split in transfer with no data (the length of DATA0 packet is zero), we can't simply return immediately. Because the DATA0 can be the first transaction or the second transaction for the isoc split in transaction. If the DATA0 packet with no data is in the first transaction, we can return immediately. But if the DATA0 packet with no data is in the second transaction of isoc split in transaction sequence, we need to increase the qtd->isoc_frame_index and giveback urb to device driver if needed, otherwise, the MDATA packet will be lost. A typical test case is that connect the dwc2 controller with an usb hs Hub (GL852G-12), and plug an usb fs audio device (Plantronics headset) into the downstream port of Hub. Then use the usb mic to record, we can find noise when playback. In the case, the isoc split in transaction sequence like this: - SSPLIT IN transaction - CSPLIT IN transaction - MDATA packet (176 bytes) - CSPLIT IN transaction - DATA0 packet (0 byte) This patch use both the length of DATA0 and qtd->isoc_split_offset to check if the DATA0 is in the second transaction. Tested-by: Gevorg Sahakyan Tested-by: Heiko Stuebner Acked-by: Minas Harutyunyan hminas@synopsys.com> Signed-off-by: William Wu Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/hcd_intr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/dwc2/hcd_intr.c b/drivers/usb/dwc2/hcd_intr.c index bda0b21b850f..51866f3f2052 100644 --- a/drivers/usb/dwc2/hcd_intr.c +++ b/drivers/usb/dwc2/hcd_intr.c @@ -931,9 +931,8 @@ static int dwc2_xfercomp_isoc_split_in(struct dwc2_hsotg *hsotg, frame_desc = &qtd->urb->iso_descs[qtd->isoc_frame_index]; len = dwc2_get_actual_xfer_length(hsotg, chan, chnum, qtd, DWC2_HC_XFER_COMPLETE, NULL); - if (!len) { + if (!len && !qtd->isoc_split_offset) { qtd->complete_split = 0; - qtd->isoc_split_offset = 0; return 0; } From 3c29ae7ce74b4a3fa9719c4ccc244b59c0906a65 Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Fri, 25 May 2018 17:24:57 +0800 Subject: [PATCH 363/519] usb: gadget: composite: fix delayed_status race condition when set_interface [ Upstream commit 980900d6318066b9f8314bfb87329a20fd0d1ca4 ] It happens when enable debug log, if set_alt() returns USB_GADGET_DELAYED_STATUS and usb_composite_setup_continue() is called before increasing count of @delayed_status, so fix it by using spinlock of @cdev->lock. Signed-off-by: Chunfeng Yun Tested-by: Jay Hsu Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index eb445c2ab15e..58f5fbdb6959 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1619,6 +1619,8 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) */ if (w_value && !f->get_alt) break; + + spin_lock(&cdev->lock); value = f->set_alt(f, w_index, w_value); if (value == USB_GADGET_DELAYED_STATUS) { DBG(cdev, @@ -1628,6 +1630,7 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) DBG(cdev, "delayed_status count %d\n", cdev->delayed_status); } + spin_unlock(&cdev->lock); break; case USB_REQ_GET_INTERFACE: if (ctrl->bRequestType != (USB_DIR_IN|USB_RECIP_INTERFACE)) From d51ce9e03e6341a036b44e38d91adaf48c82ec9b Mon Sep 17 00:00:00 2001 From: Grigor Tovmasyan Date: Thu, 24 May 2018 18:22:30 +0400 Subject: [PATCH 364/519] usb: gadget: dwc2: fix memory leak in gadget_init() [ Upstream commit 9bb073a053f0464ea74a4d4c331fdb7da58568d6 ] Freed allocated request for ep0 to prevent memory leak in case when dwc2_driver_probe() failed. Cc: Stefan Wahren Cc: Marek Szyprowski Tested-by: Stefan Wahren Tested-by: Marek Szyprowski Acked-by: Minas Harutyunyan Signed-off-by: Grigor Tovmasyan Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/gadget.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 98705b83d2dc..842c1ae7a291 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -3657,9 +3657,11 @@ int dwc2_gadget_init(struct dwc2_hsotg *hsotg, int irq) } ret = usb_add_gadget_udc(dev, &hsotg->gadget); - if (ret) + if (ret) { + dwc2_hsotg_ep_free_request(&hsotg->eps_out[0]->ep, + hsotg->ctrl_req); return ret; - + } dwc2_hsotg_dump(hsotg); return 0; @@ -3672,6 +3674,7 @@ int dwc2_gadget_init(struct dwc2_hsotg *hsotg, int irq) int dwc2_hsotg_remove(struct dwc2_hsotg *hsotg) { usb_del_gadget_udc(&hsotg->gadget); + dwc2_hsotg_ep_free_request(&hsotg->eps_out[0]->ep, hsotg->ctrl_req); return 0; } From 7d082f109ec260ad0f13b9e77bd41b38d0c34326 Mon Sep 17 00:00:00 2001 From: Zhouyang Jia Date: Sat, 16 Jun 2018 01:05:01 +0800 Subject: [PATCH 365/519] scsi: xen-scsifront: add error handling for xenbus_printf [ Upstream commit 93efbd39870474cc536b9caf4a6efeb03b0bc56f ] When xenbus_printf fails, the lack of error-handling code may cause unexpected results. This patch adds error-handling code after calling xenbus_printf. Signed-off-by: Zhouyang Jia Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/xen-scsifront.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c index 9dc8687bf048..e1b32ed0aa20 100644 --- a/drivers/scsi/xen-scsifront.c +++ b/drivers/scsi/xen-scsifront.c @@ -676,10 +676,17 @@ static int scsifront_dev_reset_handler(struct scsi_cmnd *sc) static int scsifront_sdev_configure(struct scsi_device *sdev) { struct vscsifrnt_info *info = shost_priv(sdev->host); + int err; - if (info && current == info->curr) - xenbus_printf(XBT_NIL, info->dev->nodename, + if (info && current == info->curr) { + err = xenbus_printf(XBT_NIL, info->dev->nodename, info->dev_state_path, "%d", XenbusStateConnected); + if (err) { + xenbus_dev_error(info->dev, err, + "%s: writing dev_state_path", __func__); + return err; + } + } return 0; } @@ -687,10 +694,15 @@ static int scsifront_sdev_configure(struct scsi_device *sdev) static void scsifront_sdev_destroy(struct scsi_device *sdev) { struct vscsifrnt_info *info = shost_priv(sdev->host); + int err; - if (info && current == info->curr) - xenbus_printf(XBT_NIL, info->dev->nodename, + if (info && current == info->curr) { + err = xenbus_printf(XBT_NIL, info->dev->nodename, info->dev_state_path, "%d", XenbusStateClosed); + if (err) + xenbus_dev_error(info->dev, err, + "%s: writing dev_state_path", __func__); + } } static struct scsi_host_template scsifront_sht = { @@ -1025,9 +1037,12 @@ static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op) if (scsi_add_device(info->host, chn, tgt, lun)) { dev_err(&dev->dev, "scsi_add_device\n"); - xenbus_printf(XBT_NIL, dev->nodename, + err = xenbus_printf(XBT_NIL, dev->nodename, info->dev_state_path, "%d", XenbusStateClosed); + if (err) + xenbus_dev_error(dev, err, + "%s: writing dev_state_path", __func__); } break; case VSCSIFRONT_OP_DEL_LUN: @@ -1041,10 +1056,14 @@ static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op) } break; case VSCSIFRONT_OP_READD_LUN: - if (device_state == XenbusStateConnected) - xenbus_printf(XBT_NIL, dev->nodename, + if (device_state == XenbusStateConnected) { + err = xenbus_printf(XBT_NIL, dev->nodename, info->dev_state_path, "%d", XenbusStateConnected); + if (err) + xenbus_dev_error(dev, err, + "%s: writing dev_state_path", __func__); + } break; default: break; From d5a362036c85990648c726716f4a569a504b89fb Mon Sep 17 00:00:00 2001 From: Zhizhou Zhang Date: Tue, 12 Jun 2018 17:07:37 +0800 Subject: [PATCH 366/519] arm64: make secondary_start_kernel() notrace [ Upstream commit b154886f7892499d0d3054026e19dfb9a731df61 ] We can't call function trace hook before setup percpu offset. When entering secondary_start_kernel(), percpu offset has not been initialized. So this lead hotplug malfunction. Here is the flow to reproduce this bug: echo 0 > /sys/devices/system/cpu/cpu1/online echo function > /sys/kernel/debug/tracing/current_tracer echo 1 > /sys/kernel/debug/tracing/tracing_on echo 1 > /sys/devices/system/cpu/cpu1/online Acked-by: Mark Rutland Tested-by: Suzuki K Poulose Signed-off-by: Zhizhou Zhang Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index f3c3d8fee5ba..03c0946b79d2 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -131,7 +131,7 @@ static void smp_store_cpu_info(unsigned int cpuid) * This is the secondary CPU boot entry. We're using this CPUs * idle thread stack, but a set of temporary page tables. */ -asmlinkage void secondary_start_kernel(void) +asmlinkage notrace void secondary_start_kernel(void) { struct mm_struct *mm = &init_mm; unsigned int cpu = smp_processor_id(); From f0e199f330ea7d059713be165edcf86a3f725b3e Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Mon, 18 Jun 2018 21:58:01 -0700 Subject: [PATCH 367/519] qed: Add sanity check for SIMD fastpath handler. [ Upstream commit 3935a70968820c3994db4de7e6e1c7e814bff875 ] Avoid calling a SIMD fastpath handler if it is NULL. The check is needed to handle an unlikely scenario where unsolicited interrupt is destined to a PF in INTa mode. Fixes: fe56b9e6a ("qed: Add module with basic common support") Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qed/qed_main.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 688b6da5a9bb..35e1468d8196 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -461,8 +461,16 @@ static irqreturn_t qed_single_int(int irq, void *dev_instance) /* Fastpath interrupts */ for (j = 0; j < 64; j++) { if ((0x2ULL << j) & status) { - hwfn->simd_proto_handler[j].func( - hwfn->simd_proto_handler[j].token); + struct qed_simd_fp_handler *p_handler = + &hwfn->simd_proto_handler[j]; + + if (p_handler->func) + p_handler->func(p_handler->token); + else + DP_NOTICE(hwfn, + "Not calling fastpath handler as it is NULL [handler #%d, status 0x%llx]\n", + j, status); + status &= ~(0x2ULL << j); rc = IRQ_HANDLED; } From d89657e052abc1ef49acd1bab3d66bfe3efd25e1 Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan Date: Tue, 19 Jun 2018 08:15:24 -0700 Subject: [PATCH 368/519] enic: initialize enic->rfs_h.lock in enic_probe [ Upstream commit 3256d29fc7aecdf99feb1cb9475ed2252769a8a7 ] lockdep spotted that we are using rfs_h.lock in enic_get_rxnfc() without initializing. rfs_h.lock is initialized in enic_open(). But ethtool_ops can be called when interface is down. Move enic_rfs_flw_tbl_init to enic_probe. INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. CPU: 18 PID: 1189 Comm: ethtool Not tainted 4.17.0-rc7-devel+ #27 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-20171110_100015-anatol 04/01/2014 Call Trace: dump_stack+0x85/0xc0 register_lock_class+0x550/0x560 ? __handle_mm_fault+0xa8b/0x1100 __lock_acquire+0x81/0x670 lock_acquire+0xb9/0x1e0 ? enic_get_rxnfc+0x139/0x2b0 [enic] _raw_spin_lock_bh+0x38/0x80 ? enic_get_rxnfc+0x139/0x2b0 [enic] enic_get_rxnfc+0x139/0x2b0 [enic] ethtool_get_rxnfc+0x8d/0x1c0 dev_ethtool+0x16c8/0x2400 ? __mutex_lock+0x64d/0xa00 ? dev_load+0x6a/0x150 dev_ioctl+0x253/0x4b0 sock_do_ioctl+0x9a/0x130 sock_ioctl+0x1af/0x350 do_vfs_ioctl+0x8e/0x670 ? syscall_trace_enter+0x1e2/0x380 ksys_ioctl+0x60/0x90 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x5a/0x170 entry_SYSCALL_64_after_hwframe+0x49/0xbe Signed-off-by: Govindarajulu Varadarajan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cisco/enic/enic_clsf.c | 3 +-- drivers/net/ethernet/cisco/enic/enic_main.c | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_clsf.c b/drivers/net/ethernet/cisco/enic/enic_clsf.c index 3c677ed3c29e..4d9014d5b36d 100644 --- a/drivers/net/ethernet/cisco/enic/enic_clsf.c +++ b/drivers/net/ethernet/cisco/enic/enic_clsf.c @@ -78,7 +78,6 @@ void enic_rfs_flw_tbl_init(struct enic *enic) enic->rfs_h.max = enic->config.num_arfs; enic->rfs_h.free = enic->rfs_h.max; enic->rfs_h.toclean = 0; - enic_rfs_timer_start(enic); } void enic_rfs_flw_tbl_free(struct enic *enic) @@ -87,7 +86,6 @@ void enic_rfs_flw_tbl_free(struct enic *enic) enic_rfs_timer_stop(enic); spin_lock_bh(&enic->rfs_h.lock); - enic->rfs_h.free = 0; for (i = 0; i < (1 << ENIC_RFS_FLW_BITSHIFT); i++) { struct hlist_head *hhead; struct hlist_node *tmp; @@ -98,6 +96,7 @@ void enic_rfs_flw_tbl_free(struct enic *enic) enic_delfltr(enic, n->fltr_id); hlist_del(&n->node); kfree(n); + enic->rfs_h.free++; } } spin_unlock_bh(&enic->rfs_h.lock); diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 029fa5bee520..8390597aecb8 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1760,7 +1760,7 @@ static int enic_open(struct net_device *netdev) vnic_intr_unmask(&enic->intr[i]); enic_notify_timer_start(enic); - enic_rfs_flw_tbl_init(enic); + enic_rfs_timer_start(enic); return 0; @@ -2694,6 +2694,7 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) enic->notify_timer.function = enic_notify_timer; enic->notify_timer.data = (unsigned long)enic; + enic_rfs_flw_tbl_init(enic); enic_set_rx_coal_setting(enic); INIT_WORK(&enic->reset, enic_reset); INIT_WORK(&enic->tx_hang_reset, enic_tx_hang_reset); From e5bfd8e707375eaa67c775e69ded5b8f104a80db Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Sun, 17 Jun 2018 23:40:53 +0200 Subject: [PATCH 369/519] net: hamradio: use eth_broadcast_addr [ Upstream commit 4e8439aa34802deab11cee68b0ecb18f887fb153 ] The array bpq_eth_addr is only used to get the size of an address, whereas the bcast_addr is used to set the broadcast address. This leads to a warning when using clang: drivers/net/hamradio/bpqether.c:94:13: warning: variable 'bpq_eth_addr' is not needed and will not be emitted [-Wunneeded-internal-declaration] static char bpq_eth_addr[6]; ^ Remove both variables and use the common eth_broadcast_addr to set the broadcast address. Signed-off-by: Stefan Agner Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/hamradio/bpqether.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index d95a50ae996d..8748e8c9ce96 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -89,10 +89,6 @@ static const char banner[] __initconst = KERN_INFO \ "AX.25: bpqether driver version 004\n"; -static char bcast_addr[6]={0xFF,0xFF,0xFF,0xFF,0xFF,0xFF}; - -static char bpq_eth_addr[6]; - static int bpq_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); static int bpq_device_event(struct notifier_block *, unsigned long, void *); @@ -515,8 +511,8 @@ static int bpq_new_device(struct net_device *edev) bpq->ethdev = edev; bpq->axdev = ndev; - memcpy(bpq->dest_addr, bcast_addr, sizeof(bpq_eth_addr)); - memcpy(bpq->acpt_addr, bcast_addr, sizeof(bpq_eth_addr)); + eth_broadcast_addr(bpq->dest_addr); + eth_broadcast_addr(bpq->acpt_addr); err = register_netdevice(ndev); if (err) From c5df06baac86afb3dfd69d7b865cf9e5d13c81f4 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 19 Jun 2018 17:23:17 +0800 Subject: [PATCH 370/519] net: propagate dev_get_valid_name return code [ Upstream commit 7892bd081045222b9e4027fec279a28d6fe7aa66 ] if dev_get_valid_name failed, propagate its return code and remove the setting err to ENODEV, it will be set to 0 again before dev_change_net_namespace exits. Signed-off-by: Li RongQing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 3bcbf931a910..191cf880d805 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7420,7 +7420,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char /* We get here if we can't use the current device name */ if (!pat) goto out; - if (dev_get_valid_name(net, dev, pat) < 0) + err = dev_get_valid_name(net, dev, pat); + if (err < 0) goto out; } @@ -7432,7 +7433,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char dev_close(dev); /* And unlink it from device chain */ - err = -ENODEV; unlist_netdevice(dev); synchronize_net(); From d68db3d9974fdc31664a46bbd12a98915eeec1f6 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Wed, 29 Nov 2017 11:21:45 +0300 Subject: [PATCH 371/519] ARC: Enable machine_desc->init_per_cpu for !CONFIG_SMP [ Upstream commit 2f24ef7413a4d91657ef04e77c27ce0b313e6c95 ] machine_desc->init_per_cpu() hook is supposed to be per cpu initialization and would seem to apply equally to UP and/or SMP. Infact the comment in header file seems to suggest it works for UP too, which was not the case and this patch. This enables !CONFIG_SMP build for platforms such as hsdk. Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta [vgupta: trimmeed changelog] Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/mach_desc.h | 2 -- arch/arc/kernel/irq.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arc/include/asm/mach_desc.h b/arch/arc/include/asm/mach_desc.h index c28e6c347b49..871f3cb16af9 100644 --- a/arch/arc/include/asm/mach_desc.h +++ b/arch/arc/include/asm/mach_desc.h @@ -34,9 +34,7 @@ struct machine_desc { const char *name; const char **dt_compat; void (*init_early)(void); -#ifdef CONFIG_SMP void (*init_per_cpu)(unsigned int); -#endif void (*init_machine)(void); void (*init_late)(void); diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c index ba17f85285cf..dd42c6feaba5 100644 --- a/arch/arc/kernel/irq.c +++ b/arch/arc/kernel/irq.c @@ -31,10 +31,10 @@ void __init init_IRQ(void) /* a SMP H/w block could do IPI IRQ request here */ if (plat_smp_ops.init_per_cpu) plat_smp_ops.init_per_cpu(smp_processor_id()); +#endif if (machine_desc->init_per_cpu) machine_desc->init_per_cpu(smp_processor_id()); -#endif } /* From 39565a7d63fb3ccf98d5745499760c1000012599 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 20 Jun 2018 10:03:56 +0200 Subject: [PATCH 372/519] net: davinci_emac: match the mdio device against its compatible if possible [ Upstream commit ea0820bb771175c7d4192fc6f5b5c56b3c6d5239 ] Device tree based systems without of_dev_auxdata will have the mdio device named differently than "davinci_mdio(.0)". In this case use the device's parent's compatible string for matching Signed-off-by: Bartosz Golaszewski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/ti/davinci_emac.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 33bd3b902304..6be315303d61 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -1517,6 +1517,10 @@ static int emac_devioctl(struct net_device *ndev, struct ifreq *ifrq, int cmd) static int match_first_device(struct device *dev, void *data) { + if (dev->parent && dev->parent->of_node) + return of_device_is_compatible(dev->parent->of_node, + "ti,davinci_mdio"); + return !strncmp(dev_name(dev), "davinci_mdio", 12); } From c40dc96f7f7e29d7c1e520c46c87178c2d4b1dcc Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 4 Apr 2018 14:06:30 -0400 Subject: [PATCH 373/519] locking/lockdep: Do not record IRQ state within lockdep code [ Upstream commit fcc784be837714a9173b372ff9fb9b514590dad9 ] While debugging where things were going wrong with mapping enabling/disabling interrupts with the lockdep state and actual real enabling and disabling interrupts, I had to silent the IRQ disabling/enabling in debug_check_no_locks_freed() because it was always showing up as it was called before the splat was. Use raw_local_irq_save/restore() for not only debug_check_no_locks_freed() but for all internal lockdep functions, as they hide useful information about where interrupts were used incorrectly last. Signed-off-by: Steven Rostedt (VMware) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: https://lkml.kernel.org/lkml/20180404140630.3f4f4c7a@gandalf.local.home Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/locking/lockdep.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 0e2c4911ba61..6e171b547a80 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1264,11 +1264,11 @@ unsigned long lockdep_count_forward_deps(struct lock_class *class) this.parent = NULL; this.class = class; - local_irq_save(flags); + raw_local_irq_save(flags); arch_spin_lock(&lockdep_lock); ret = __lockdep_count_forward_deps(&this); arch_spin_unlock(&lockdep_lock); - local_irq_restore(flags); + raw_local_irq_restore(flags); return ret; } @@ -1291,11 +1291,11 @@ unsigned long lockdep_count_backward_deps(struct lock_class *class) this.parent = NULL; this.class = class; - local_irq_save(flags); + raw_local_irq_save(flags); arch_spin_lock(&lockdep_lock); ret = __lockdep_count_backward_deps(&this); arch_spin_unlock(&lockdep_lock); - local_irq_restore(flags); + raw_local_irq_restore(flags); return ret; } @@ -4123,7 +4123,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len) if (unlikely(!debug_locks)) return; - local_irq_save(flags); + raw_local_irq_save(flags); for (i = 0; i < curr->lockdep_depth; i++) { hlock = curr->held_locks + i; @@ -4134,7 +4134,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len) print_freed_lock_bug(curr, mem_from, mem_from + mem_len, hlock); break; } - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL_GPL(debug_check_no_locks_freed); From 55989af9dfa0ce11ef9694899e649fdf38b9aab2 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 21 Jun 2018 19:49:36 +0800 Subject: [PATCH 374/519] ipv6: mcast: fix unsolicited report interval after receiving querys [ Upstream commit 6c6da92808442908287fae8ebb0ca041a52469f4 ] After recieving MLD querys, we update idev->mc_maxdelay with max_delay from query header. This make the later unsolicited reports have the same interval with mc_maxdelay, which means we may send unsolicited reports with long interval time instead of default configured interval time. Also as we will not call ipv6_mc_reset() after device up. This issue will be there even after leave the group and join other groups. Fixes: fc4eba58b4c14 ("ipv6: make unsolicited report intervals configurable for mld") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv6/mcast.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 06640685ff43..091cee551cd9 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2061,7 +2061,8 @@ void ipv6_mc_dad_complete(struct inet6_dev *idev) mld_send_initial_cr(idev); idev->mc_dad_count--; if (idev->mc_dad_count) - mld_dad_start_timer(idev, idev->mc_maxdelay); + mld_dad_start_timer(idev, + unsolicited_report_interval(idev)); } } @@ -2073,7 +2074,8 @@ static void mld_dad_timer_expire(unsigned long data) if (idev->mc_dad_count) { idev->mc_dad_count--; if (idev->mc_dad_count) - mld_dad_start_timer(idev, idev->mc_maxdelay); + mld_dad_start_timer(idev, + unsolicited_report_interval(idev)); } in6_dev_put(idev); } @@ -2431,7 +2433,8 @@ static void mld_ifc_timer_expire(unsigned long data) if (idev->mc_ifc_count) { idev->mc_ifc_count--; if (idev->mc_ifc_count) - mld_ifc_start_timer(idev, idev->mc_maxdelay); + mld_ifc_start_timer(idev, + unsolicited_report_interval(idev)); } in6_dev_put(idev); } From 091cb7dec89b1bbd4989a30f5ae9ab93248d2a93 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Fri, 22 Jun 2018 10:54:45 -0700 Subject: [PATCH 375/519] Smack: Mark inode instant in smack_task_to_inode [ Upstream commit 7b4e88434c4e7982fb053c49657e1c8bbb8692d9 ] Smack: Mark inode instant in smack_task_to_inode /proc clean-up in commit 1bbc55131e59bd099fdc568d3aa0b42634dbd188 resulted in smack_task_to_inode() being called before smack_d_instantiate. This resulted in the smk_inode value being ignored, even while present for files in /proc/self. Marking the inode as instant here fixes that. Signed-off-by: Casey Schaufler Signed-off-by: James Morris Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- security/smack/smack_lsm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 735a1a9386d6..c73361859d11 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -2260,6 +2260,7 @@ static void smack_task_to_inode(struct task_struct *p, struct inode *inode) struct smack_known *skp = smk_of_task_struct(p); isp->smk_inode = skp; + isp->smk_flags |= SMK_INODE_INSTANT; } /* From 86c43df5bfc34c5dd910b9257f1d5cc03d367e15 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Sat, 23 Jun 2018 20:28:26 +0530 Subject: [PATCH 376/519] cxgb4: when disabling dcb set txq dcb priority to 0 [ Upstream commit 5ce36338a30f9814fc4824f9fe6c20cd83d872c7 ] When we are disabling DCB, store "0" in txq->dcb_prio since that's used for future TX Work Request "OVLAN_IDX" values. Setting non zero priority upon disabling DCB would halt the traffic. Reported-by: AMG Zollner Robert CC: David Ahern Signed-off-by: Casey Leedom Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 090e00650601..a3e1498ca67c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -338,7 +338,7 @@ static void dcb_tx_queue_prio_enable(struct net_device *dev, int enable) "Can't %s DCB Priority on port %d, TX Queue %d: err=%d\n", enable ? "set" : "unset", pi->port_id, i, -err); else - txq->dcb_prio = value; + txq->dcb_prio = enable ? value : 0; } } #endif /* CONFIG_CHELSIO_T4_DCB */ From 852f7cdbc8f78a8e740d2164aad7e6ac6e355dc3 Mon Sep 17 00:00:00 2001 From: Michael Trimarchi Date: Wed, 30 May 2018 11:06:34 +0200 Subject: [PATCH 377/519] brcmfmac: stop watchdog before detach and free everything [ Upstream commit 373c83a801f15b1e3d02d855fad89112bd4ccbe0 ] Using built-in in kernel image without a firmware in filesystem or in the kernel image can lead to a kernel NULL pointer deference. Watchdog need to be stopped in brcmf_sdio_remove The system is going down NOW! [ 1348.110759] Unable to handle kernel NULL pointer dereference at virtual address 000002f8 Sent SIGTERM to all processes [ 1348.121412] Mem abort info: [ 1348.126962] ESR = 0x96000004 [ 1348.130023] Exception class = DABT (current EL), IL = 32 bits [ 1348.135948] SET = 0, FnV = 0 [ 1348.138997] EA = 0, S1PTW = 0 [ 1348.142154] Data abort info: [ 1348.145045] ISV = 0, ISS = 0x00000004 [ 1348.148884] CM = 0, WnR = 0 [ 1348.151861] user pgtable: 4k pages, 48-bit VAs, pgdp = (____ptrval____) [ 1348.158475] [00000000000002f8] pgd=0000000000000000 [ 1348.163364] Internal error: Oops: 96000004 [#1] PREEMPT SMP [ 1348.168927] Modules linked in: ipv6 [ 1348.172421] CPU: 3 PID: 1421 Comm: brcmf_wdog/mmc0 Not tainted 4.17.0-rc5-next-20180517 #18 [ 1348.180757] Hardware name: Amarula A64-Relic (DT) [ 1348.185455] pstate: 60000005 (nZCv daif -PAN -UAO) [ 1348.190251] pc : brcmf_sdiod_freezer_count+0x0/0x20 [ 1348.195124] lr : brcmf_sdio_watchdog_thread+0x64/0x290 [ 1348.200253] sp : ffff00000b85be30 [ 1348.203561] x29: ffff00000b85be30 x28: 0000000000000000 [ 1348.208868] x27: ffff00000b6cb918 x26: ffff80003b990638 [ 1348.214176] x25: ffff0000087b1a20 x24: ffff80003b94f800 [ 1348.219483] x23: ffff000008e620c8 x22: ffff000008f0b660 [ 1348.224790] x21: ffff000008c6a858 x20: 00000000fffffe00 [ 1348.230097] x19: ffff80003b94f800 x18: 0000000000000001 [ 1348.235404] x17: 0000ffffab2e8a74 x16: ffff0000080d7de8 [ 1348.240711] x15: 0000000000000000 x14: 0000000000000400 [ 1348.246018] x13: 0000000000000400 x12: 0000000000000001 [ 1348.251324] x11: 00000000000002c4 x10: 0000000000000a10 [ 1348.256631] x9 : ffff00000b85bc40 x8 : ffff80003be11870 [ 1348.261937] x7 : ffff80003dfc7308 x6 : 000000078ff08b55 [ 1348.267243] x5 : 00000139e1058400 x4 : 0000000000000000 [ 1348.272550] x3 : dead000000000100 x2 : 958f2788d6618100 [ 1348.277856] x1 : 00000000fffffe00 x0 : 0000000000000000 Signed-off-by: Michael Trimarchi Acked-by: Arend van Spriel Tested-by: Andy Shevchenko Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/brcm80211/brcmfmac/sdio.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/wireless/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/brcm80211/brcmfmac/sdio.c index 7e74ac3ad815..35f62b00f1df 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/sdio.c @@ -4291,6 +4291,13 @@ void brcmf_sdio_remove(struct brcmf_sdio *bus) brcmf_dbg(TRACE, "Enter\n"); if (bus) { + /* Stop watchdog task */ + if (bus->watchdog_tsk) { + send_sig(SIGTERM, bus->watchdog_tsk, 1); + kthread_stop(bus->watchdog_tsk); + bus->watchdog_tsk = NULL; + } + /* De-register interrupt handler */ brcmf_sdiod_intr_unregister(bus->sdiodev); From 27edb6791ce2ccf1345940b9dec72d4617dcf07c Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Sun, 17 Jun 2018 13:53:09 +0200 Subject: [PATCH 378/519] ARM: dts: am437x: make edt-ft5x06 a wakeup source [ Upstream commit 49a6ec5b807ea4ad7ebe1f58080ebb8497cb2d2c ] The touchscreen driver no longer configures the device as wakeup source by default. A "wakeup-source" property is needed. Signed-off-by: Daniel Mack Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/am437x-sk-evm.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/am437x-sk-evm.dts b/arch/arm/boot/dts/am437x-sk-evm.dts index 63de2a1b4315..648236c5281b 100644 --- a/arch/arm/boot/dts/am437x-sk-evm.dts +++ b/arch/arm/boot/dts/am437x-sk-evm.dts @@ -508,6 +508,8 @@ edt-ft5306@38 { touchscreen-size-x = <480>; touchscreen-size-y = <272>; + + wakeup-source; }; tlv320aic3106: tlv320aic3106@1b { From 1748d3f1e3fcb2d125d111cace612e1e39b665cf Mon Sep 17 00:00:00 2001 From: Ajay Gupta Date: Thu, 21 Jun 2018 16:19:45 +0300 Subject: [PATCH 379/519] usb: xhci: increase CRS timeout value [ Upstream commit 305886ca87be480ae159908c2affd135c04215cf ] Some controllers take almost 55ms to complete controller restore state (CRS). There is no timeout limit mentioned in xhci specification so fixing the issue by increasing the timeout limit to 100ms [reformat code comment -Mathias] Signed-off-by: Ajay Gupta Signed-off-by: Nagaraj Annaiah Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 2d837b6bd495..128a3c0a9286 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1052,8 +1052,13 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) command = readl(&xhci->op_regs->command); command |= CMD_CRS; writel(command, &xhci->op_regs->command); + /* + * Some controllers take up to 55+ ms to complete the controller + * restore so setting the timeout to 100ms. Xhci specification + * doesn't mention any timeout value. + */ if (xhci_handshake(&xhci->op_regs->status, - STS_RESTORE, 0, 10 * 1000)) { + STS_RESTORE, 0, 100 * 1000)) { xhci_warn(xhci, "WARN: xHC restore state timeout\n"); spin_unlock_irq(&xhci->lock); return -ETIMEDOUT; From 6ba7ddb12aed6bf43e9749842785dc8cd4f4e39b Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 11 Jun 2018 09:31:53 +0200 Subject: [PATCH 380/519] perf test session topology: Fix test on s390 [ Upstream commit b930e62ecd362843002bdf84c2940439822af321 ] On s390 this test case fails because the socket identifiction numbers assigned to the CPU are higher than the CPU identification numbers. F/ix this by adding the platform architecture into the perf data header flag information. This helps identifiing the test platform and handles s390 specifics in process_cpu_topology(). Before: [root@p23lp27 perf]# perf test -vvvvv -F 39 39: Session topology : --- start --- templ file: /tmp/perf-test-iUv755 socket_id number is too big.You may need to upgrade the perf tool. ---- end ---- Session topology: Skip [root@p23lp27 perf]# After: [root@p23lp27 perf]# perf test -vvvvv -F 39 39: Session topology : --- start --- templ file: /tmp/perf-test-8X8VTs CPU 0, core 0, socket 6 CPU 1, core 1, socket 3 ---- end ---- Session topology: Ok [root@p23lp27 perf]# Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Cc: Heiko Carstens Cc: Martin Schwidefsky Fixes: c84974ed9fb6 ("perf test: Add entry to test cpu topology") Link: http://lkml.kernel.org/r/20180611073153.15592-2-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/tests/topology.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index f5bb096c3bd9..bf67343c7795 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -42,6 +42,7 @@ static int session_write_header(char *path) perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY); perf_header__set_feat(&session->header, HEADER_NRCPUS); + perf_header__set_feat(&session->header, HEADER_ARCH); session->header.data_size += DATA_SIZE; From 7cef243ef30b20c9ba802af138e0fa1c3e5d0ebc Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Mon, 11 Jun 2018 16:10:49 +0530 Subject: [PATCH 381/519] perf report powerpc: Fix crash if callchain is empty [ Upstream commit 143c99f6ac6812d23254e80844d6e34be897d3e1 ] For some cases, the callchain provided by the kernel may be empty. So, the callchain ip filtering code will cause a crash if we do not check whether the struct ip_callchain pointer is NULL before accessing any members. This can be observed on a powerpc64le system running Fedora 27 as shown below. # perf record -b -e cycles:u ls Before: # perf report --branch-history perf: Segmentation fault -------- backtrace -------- perf[0x1027615c] linux-vdso64.so.1(__kernel_sigtramp_rt64+0x0)[0x7fff856304d8] perf(arch_skip_callchain_idx+0x44)[0x10257c58] perf[0x1017f2e4] perf(thread__resolve_callchain+0x124)[0x1017ff5c] perf(sample__resolve_callchain+0xf0)[0x10172788] ... After: # perf report --branch-history Samples: 25 of event 'cycles:u', Event count (approx.): 2306870 Overhead Source:Line Symbol Shared Object + 11.60% _init+35736 [.] _init ls + 9.84% strcoll_l.c:137 [.] __strcoll_l libc-2.26.so + 9.16% memcpy.S:175 [.] __memcpy_power7 libc-2.26.so + 9.01% gconv_charset.h:54 [.] _nl_find_locale libc-2.26.so + 8.87% dl-addr.c:52 [.] _dl_addr libc-2.26.so + 8.83% _init+236 [.] _init ls ... Reported-by: Ravi Bangoria Signed-off-by: Sandipan Das Acked-by: Ravi Bangoria Cc: Jiri Olsa Cc: Naveen N. Rao Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/20180611104049.11048-1-sandipan@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/arch/powerpc/util/skip-callchain-idx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c index 0c370f81e002..bd630c222e65 100644 --- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c +++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c @@ -243,7 +243,7 @@ int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain) u64 ip; u64 skip_slot = -1; - if (chain->nr < 3) + if (!chain || chain->nr < 3) return skip_slot; ip = chain->ips[2]; From 9a406f22b050bd41e803ede3ced9750d6eeaf61f Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 26 Jun 2018 22:17:17 -0700 Subject: [PATCH 382/519] selftests/x86/sigreturn/64: Fix spurious failures on AMD CPUs [ Upstream commit ec348020566009d3da9b99f07c05814d13969c78 ] When I wrote the sigreturn test, I didn't realize that AMD's busted IRET behavior was different from Intel's busted IRET behavior: On AMD CPUs, the CPU leaks the high 32 bits of the kernel stack pointer to certain userspace contexts. Gee, thanks. There's very little the kernel can do about it. Modify the test so it passes. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/86e7fd3564497f657de30a36da4505799eebef01.1530076529.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/x86/sigreturn.c | 46 ++++++++++++++++--------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c index b5aa1bab7416..97ad2d40324a 100644 --- a/tools/testing/selftests/x86/sigreturn.c +++ b/tools/testing/selftests/x86/sigreturn.c @@ -456,19 +456,38 @@ static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss) greg_t req = requested_regs[i], res = resulting_regs[i]; if (i == REG_TRAPNO || i == REG_IP) continue; /* don't care */ - if (i == REG_SP) { - printf("\tSP: %llx -> %llx\n", (unsigned long long)req, - (unsigned long long)res); + if (i == REG_SP) { /* - * In many circumstances, the high 32 bits of rsp - * are zeroed. For example, we could be a real - * 32-bit program, or we could hit any of a number - * of poorly-documented IRET or segmented ESP - * oddities. If this happens, it's okay. + * If we were using a 16-bit stack segment, then + * the kernel is a bit stuck: IRET only restores + * the low 16 bits of ESP/RSP if SS is 16-bit. + * The kernel uses a hack to restore bits 31:16, + * but that hack doesn't help with bits 63:32. + * On Intel CPUs, bits 63:32 end up zeroed, and, on + * AMD CPUs, they leak the high bits of the kernel + * espfix64 stack pointer. There's very little that + * the kernel can do about it. + * + * Similarly, if we are returning to a 32-bit context, + * the CPU will often lose the high 32 bits of RSP. */ - if (res == (req & 0xFFFFFFFF)) - continue; /* OK; not expected to work */ + + if (res == req) + continue; + + if (cs_bits != 64 && ((res ^ req) & 0xFFFFFFFF) == 0) { + printf("[NOTE]\tSP: %llx -> %llx\n", + (unsigned long long)req, + (unsigned long long)res); + continue; + } + + printf("[FAIL]\tSP mismatch: requested 0x%llx; got 0x%llx\n", + (unsigned long long)requested_regs[i], + (unsigned long long)resulting_regs[i]); + nerrs++; + continue; } bool ignore_reg = false; @@ -507,13 +526,6 @@ static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss) } if (requested_regs[i] != resulting_regs[i] && !ignore_reg) { - /* - * SP is particularly interesting here. The - * usual cause of failures is that we hit the - * nasty IRET case of returning to a 16-bit SS, - * in which case bits 16:31 of the *kernel* - * stack pointer persist in ESP. - */ printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n", i, (unsigned long long)requested_regs[i], (unsigned long long)resulting_regs[i]); From d238b2e06f390de4a3a15058a9e0c538fb6e35e8 Mon Sep 17 00:00:00 2001 From: Keerthy Date: Tue, 5 Jun 2018 15:37:51 +0530 Subject: [PATCH 383/519] ARM: dts: da850: Fix interrups property for gpio [ Upstream commit 3eb1b955cd7ed1e621ace856710006c2a8a7f231 ] The intc #interrupt-cells is equal to 1. Currently gpio node has 2 cells per IRQ which is wrong. Remove the additional cell for each of the interrupts. Signed-off-by: Keerthy Fixes: 2e38b946dc54 ("ARM: davinci: da850: add GPIO DT node") Signed-off-by: Sekhar Nori Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/da850.dtsi | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/arm/boot/dts/da850.dtsi b/arch/arm/boot/dts/da850.dtsi index 0bd98cd00816..4ef5c3410fcc 100644 --- a/arch/arm/boot/dts/da850.dtsi +++ b/arch/arm/boot/dts/da850.dtsi @@ -267,11 +267,7 @@ gpio: gpio@1e26000 { compatible = "ti,dm6441-gpio"; gpio-controller; reg = <0x226000 0x1000>; - interrupts = <42 IRQ_TYPE_EDGE_BOTH - 43 IRQ_TYPE_EDGE_BOTH 44 IRQ_TYPE_EDGE_BOTH - 45 IRQ_TYPE_EDGE_BOTH 46 IRQ_TYPE_EDGE_BOTH - 47 IRQ_TYPE_EDGE_BOTH 48 IRQ_TYPE_EDGE_BOTH - 49 IRQ_TYPE_EDGE_BOTH 50 IRQ_TYPE_EDGE_BOTH>; + interrupts = <42 43 44 45 46 47 48 49 50>; ti,ngpio = <144>; ti,davinci-gpio-unbanked = <0>; status = "disabled"; From 0430caf5ccc880dfacba544accbddc839d70ded1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 22 Jun 2018 14:15:47 +0300 Subject: [PATCH 384/519] dmaengine: k3dma: Off by one in k3_of_dma_simple_xlate() [ Upstream commit c4c2b7644cc9a41f17a8cc8904efe3f66ae4c7ed ] The d->chans[] array has d->dma_requests elements so the > should be >= here. Fixes: 8e6152bc660e ("dmaengine: Add hisilicon k3 DMA engine driver") Signed-off-by: Dan Carpenter Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/dma/k3dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/k3dma.c b/drivers/dma/k3dma.c index 1ba2fd73852d..0f0c06ab414b 100644 --- a/drivers/dma/k3dma.c +++ b/drivers/dma/k3dma.c @@ -660,7 +660,7 @@ static struct dma_chan *k3_of_dma_simple_xlate(struct of_phandle_args *dma_spec, struct k3_dma_dev *d = ofdma->of_dma_data; unsigned int request = dma_spec->args[0]; - if (request > d->dma_requests) + if (request >= d->dma_requests) return NULL; return dma_get_slave_channel(&(d->chans[request].vc.chan)); From 66de11067753fbc562b7d2dba550d563c02449ec Mon Sep 17 00:00:00 2001 From: BingJing Chang Date: Thu, 28 Jun 2018 18:40:11 +0800 Subject: [PATCH 385/519] md/raid10: fix that replacement cannot complete recovery after reassemble [ Upstream commit bda3153998f3eb2cafa4a6311971143628eacdbc ] During assemble, the spare marked for replacement is not checked. conf->fullsync cannot be updated to be 1. As a result, recovery will treat it as a clean array. All recovering sectors are skipped. Original device is replaced with the not-recovered spare. mdadm -C /dev/md0 -l10 -n4 -pn2 /dev/loop[0123] mdadm /dev/md0 -a /dev/loop4 mdadm /dev/md0 --replace /dev/loop0 mdadm -S /dev/md0 # stop array during recovery mdadm -A /dev/md0 /dev/loop[01234] After reassemble, you can see recovery go on, but it completes immediately. In fact, recovery is not actually processed. To solve this problem, we just add the missing logics for replacment spares. (In raid1.c or raid5.c, they have already been checked.) Reported-by: Alex Chen Reviewed-by: Alex Wu Reviewed-by: Chung-Chiang Cheng Signed-off-by: BingJing Chang Signed-off-by: Shaohua Li Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid10.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 7b6acedc89c1..8a731bdd268e 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3691,6 +3691,13 @@ static int run(struct mddev *mddev) disk->rdev->saved_raid_disk < 0) conf->fullsync = 1; } + + if (disk->replacement && + !test_bit(In_sync, &disk->replacement->flags) && + disk->replacement->saved_raid_disk < 0) { + conf->fullsync = 1; + } + disk->recovery_disabled = mddev->recovery_disabled - 1; } From 048115cfba050e34667d7e3d05ce5f09fd54524f Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 7 Jun 2018 13:06:13 +0200 Subject: [PATCH 386/519] drm/exynos: gsc: Fix support for NV16/61, YUV420/YVU420 and YUV422 modes [ Upstream commit dd209ef809080ced903e7747ee3ef640c923a1d2 ] Fix following issues related to planar YUV pixel format configuration: - NV16/61 modes were incorrectly programmed as NV12/21, - YVU420 was programmed as YUV420 on source, - YVU420 and YUV422 were programmed as YUV420 on output. Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/exynos/exynos_drm_gsc.c | 29 +++++++++++++++++-------- drivers/gpu/drm/exynos/regs-gsc.h | 1 + 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index 11b87d2a7913..ba69d1c72221 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -526,21 +526,25 @@ static int gsc_src_set_fmt(struct device *dev, u32 fmt) GSC_IN_CHROMA_ORDER_CRCB); break; case DRM_FORMAT_NV21: + cfg |= (GSC_IN_CHROMA_ORDER_CRCB | GSC_IN_YUV420_2P); + break; case DRM_FORMAT_NV61: - cfg |= (GSC_IN_CHROMA_ORDER_CRCB | - GSC_IN_YUV420_2P); + cfg |= (GSC_IN_CHROMA_ORDER_CRCB | GSC_IN_YUV422_2P); break; case DRM_FORMAT_YUV422: cfg |= GSC_IN_YUV422_3P; break; case DRM_FORMAT_YUV420: + cfg |= (GSC_IN_CHROMA_ORDER_CBCR | GSC_IN_YUV420_3P); + break; case DRM_FORMAT_YVU420: - cfg |= GSC_IN_YUV420_3P; + cfg |= (GSC_IN_CHROMA_ORDER_CRCB | GSC_IN_YUV420_3P); break; case DRM_FORMAT_NV12: + cfg |= (GSC_IN_CHROMA_ORDER_CBCR | GSC_IN_YUV420_2P); + break; case DRM_FORMAT_NV16: - cfg |= (GSC_IN_CHROMA_ORDER_CBCR | - GSC_IN_YUV420_2P); + cfg |= (GSC_IN_CHROMA_ORDER_CBCR | GSC_IN_YUV422_2P); break; default: dev_err(ippdrv->dev, "invalid target yuv order 0x%x.\n", fmt); @@ -800,18 +804,25 @@ static int gsc_dst_set_fmt(struct device *dev, u32 fmt) GSC_OUT_CHROMA_ORDER_CRCB); break; case DRM_FORMAT_NV21: - case DRM_FORMAT_NV61: cfg |= (GSC_OUT_CHROMA_ORDER_CRCB | GSC_OUT_YUV420_2P); break; + case DRM_FORMAT_NV61: + cfg |= (GSC_OUT_CHROMA_ORDER_CRCB | GSC_OUT_YUV422_2P); + break; case DRM_FORMAT_YUV422: + cfg |= GSC_OUT_YUV422_3P; + break; case DRM_FORMAT_YUV420: + cfg |= (GSC_OUT_CHROMA_ORDER_CBCR | GSC_OUT_YUV420_3P); + break; case DRM_FORMAT_YVU420: - cfg |= GSC_OUT_YUV420_3P; + cfg |= (GSC_OUT_CHROMA_ORDER_CRCB | GSC_OUT_YUV420_3P); break; case DRM_FORMAT_NV12: + cfg |= (GSC_OUT_CHROMA_ORDER_CBCR | GSC_OUT_YUV420_2P); + break; case DRM_FORMAT_NV16: - cfg |= (GSC_OUT_CHROMA_ORDER_CBCR | - GSC_OUT_YUV420_2P); + cfg |= (GSC_OUT_CHROMA_ORDER_CBCR | GSC_OUT_YUV422_2P); break; default: dev_err(ippdrv->dev, "invalid target yuv order 0x%x.\n", fmt); diff --git a/drivers/gpu/drm/exynos/regs-gsc.h b/drivers/gpu/drm/exynos/regs-gsc.h index 9ad592707aaf..ade10966d6af 100644 --- a/drivers/gpu/drm/exynos/regs-gsc.h +++ b/drivers/gpu/drm/exynos/regs-gsc.h @@ -138,6 +138,7 @@ #define GSC_OUT_YUV420_3P (3 << 4) #define GSC_OUT_YUV422_1P (4 << 4) #define GSC_OUT_YUV422_2P (5 << 4) +#define GSC_OUT_YUV422_3P (6 << 4) #define GSC_OUT_YUV444 (7 << 4) #define GSC_OUT_TILE_TYPE_MASK (1 << 2) #define GSC_OUT_TILE_C_16x8 (0 << 2) From c46030269f686fc3ca92d93140c9f2957aefee3b Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 7 Jun 2018 13:07:40 +0200 Subject: [PATCH 387/519] drm/exynos: decon5433: Fix per-plane global alpha for XRGB modes [ Upstream commit ab337fc274a1957ff0771f19e826c736253f7c39 ] Set per-plane global alpha to maximum value to get proper blending of XRGB and ARGB planes. This fixes the strange order of overlapping planes. Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 34cebcdc2fc4..784ef54f5a1b 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -278,8 +278,8 @@ static void decon_update_plane(struct exynos_drm_crtc *crtc, COORDINATE_Y(plane->crtc_y + plane->crtc_h - 1); writel(val, ctx->addr + DECON_VIDOSDxB(win)); - val = VIDOSD_Wx_ALPHA_R_F(0x0) | VIDOSD_Wx_ALPHA_G_F(0x0) | - VIDOSD_Wx_ALPHA_B_F(0x0); + val = VIDOSD_Wx_ALPHA_R_F(0xff) | VIDOSD_Wx_ALPHA_G_F(0xff) | + VIDOSD_Wx_ALPHA_B_F(0xff); writel(val, ctx->addr + DECON_VIDOSDxC(win)); val = VIDOSD_Wx_ALPHA_R_F(0x0) | VIDOSD_Wx_ALPHA_G_F(0x0) | From acc83070ba75b3ab93bf46f711246e9b97ed46c0 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 7 Jun 2018 13:07:49 +0200 Subject: [PATCH 388/519] drm/exynos: decon5433: Fix WINCONx reset value [ Upstream commit 7b7aa62c05eac9789c208b946f515983a9255d8d ] The only bits that should be preserved in decon_win_set_fmt() is WINCONx_ENWIN_F. All other bits depends on the selected pixel formats and are set by the mentioned function. Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 784ef54f5a1b..9cae5f69b07c 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -190,7 +190,7 @@ static void decon_win_set_pixfmt(struct decon_context *ctx, unsigned int win, unsigned long val; val = readl(ctx->addr + DECON_WINCONx(win)); - val &= ~WINCONx_BPPMODE_MASK; + val &= WINCONx_ENWIN_F; switch (fb->pixel_format) { case DRM_FORMAT_XRGB1555: From aba71e6a936a62126d0c084d4add455db697ee24 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Thu, 28 Jun 2018 04:52:15 -0700 Subject: [PATCH 389/519] bnx2x: Fix receiving tx-timeout in error or recovery state. [ Upstream commit 484c016d9392786ce5c74017c206c706f29f823d ] Driver performs the internal reload when it receives tx-timeout event from the OS. Internal reload might fail in some scenarios e.g., fatal HW issues. In such cases OS still see the link, which would result in undesirable functionalities such as re-generation of tx-timeouts. The patch addresses this issue by indicating the link-down to OS when tx-timeout is detected, and keeping the link in down state till the internal reload is successful. Please consider applying it to 'net' branch. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 1 + drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 6 ++++++ drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 6 ++++++ 3 files changed, 13 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index b5e64b02200c..1ea068815419 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -1634,6 +1634,7 @@ struct bnx2x { struct link_vars link_vars; u32 link_cnt; struct bnx2x_link_report_data last_reported_link; + bool force_link_down; struct mdio_if_info mdio; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 949a82458a29..ebc4518d598a 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -1277,6 +1277,11 @@ void __bnx2x_link_report(struct bnx2x *bp) { struct bnx2x_link_report_data cur_data; + if (bp->force_link_down) { + bp->link_vars.link_up = 0; + return; + } + /* reread mf_cfg */ if (IS_PF(bp) && !CHIP_IS_E1(bp)) bnx2x_read_mf_cfg(bp); @@ -2840,6 +2845,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) bp->pending_max = 0; } + bp->force_link_down = false; if (bp->port.pmf) { rc = bnx2x_initial_phy_init(bp, load_mode); if (rc) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 8ddb68a3fdb6..403fa8d98aa3 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -10222,6 +10222,12 @@ static void bnx2x_sp_rtnl_task(struct work_struct *work) bp->sp_rtnl_state = 0; smp_mb(); + /* Immediately indicate link as down */ + bp->link_vars.link_up = 0; + bp->force_link_down = true; + netif_carrier_off(bp->dev); + BNX2X_ERR("Indicating link is down due to Tx-timeout\n"); + bnx2x_nic_unload(bp, UNLOAD_NORMAL, true); bnx2x_nic_load(bp, LOAD_NORMAL); From 8f2f46791e28b7058a32fb7eab32e498ff838627 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Mon, 18 Jun 2018 15:34:14 +1000 Subject: [PATCH 390/519] m68k: fix "bad page state" oops on ColdFire boot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ecd60532e060e45c63c57ecf1c8549b1d656d34d ] Booting a ColdFire m68k core with MMU enabled causes a "bad page state" oops since commit 1d40a5ea01d5 ("mm: mark pages in use for page tables"): BUG: Bad page state in process sh pfn:01ce2 page:004fefc8 count:0 mapcount:-1024 mapping:00000000 index:0x0 flags: 0x0() raw: 00000000 00000000 00000000 fffffbff 00000000 00000100 00000200 00000000 raw: 039c4000 page dumped because: nonzero mapcount Modules linked in: CPU: 0 PID: 22 Comm: sh Not tainted 4.17.0-07461-g1d40a5ea01d5 #13 Fix by calling pgtable_page_dtor() in our __pte_free_tlb() code path, so that the PG_table flag is cleared before we free the pte page. Note that I had to change the type of pte_free() to be static from extern. Otherwise you get a lot of warnings like this: ./arch/m68k/include/asm/mcf_pgalloc.h:80:2: warning: ‘pgtable_page_dtor’ is static but used in inline function ‘pte_free’ which is not static pgtable_page_dtor(page); ^ And making it static is consistent with our use of this in the other m68k pgalloc definitions of pte_free(). Signed-off-by: Greg Ungerer CC: Matthew Wilcox Reviewed-by: Geert Uytterhoeven Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/m68k/include/asm/mcf_pgalloc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h index f9924fbcfe42..456e3f75ef3b 100644 --- a/arch/m68k/include/asm/mcf_pgalloc.h +++ b/arch/m68k/include/asm/mcf_pgalloc.h @@ -43,6 +43,7 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address) static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page, unsigned long address) { + pgtable_page_dtor(page); __free_page(page); } @@ -73,8 +74,9 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, return page; } -extern inline void pte_free(struct mm_struct *mm, struct page *page) +static inline void pte_free(struct mm_struct *mm, struct page *page) { + pgtable_page_dtor(page); __free_page(page); } From 1bdab67ddfa7b4e9e7a90637a22f9abc6ca88cf4 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Tue, 26 Jun 2018 09:58:02 -0700 Subject: [PATCH 391/519] HID: wacom: Correct touch maximum XY of 2nd-gen Intuos [ Upstream commit 3b8d573586d1b9dee33edf6cb6f2ca05f4bca568 ] The touch sensors on the 2nd-gen Intuos tablets don't use a 4096x4096 sensor like other similar tablets (3rd-gen Bamboo, Intuos5, etc.). The incorrect maximum XY values don't normally affect userspace since touch input from these devices is typically relative rather than absolute. It does, however, cause problems when absolute distances need to be measured, e.g. for gesture recognition. Since the resolution of the touch sensor on these devices is 10 units / mm (versus 100 for the pen sensor), the proper maximum values can be calculated by simply dividing by 10. Fixes: b5fd2a3e92 ("Input: wacom - add support for three new Intuos devices") Signed-off-by: Jason Gerecke Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hid/wacom_wac.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index b62c50d1b1e4..b184956bd430 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -2487,8 +2487,14 @@ void wacom_setup_device_quirks(struct wacom *wacom) if (features->type >= INTUOSHT && features->type <= BAMBOO_PT) features->device_type |= WACOM_DEVICETYPE_PAD; - features->x_max = 4096; - features->y_max = 4096; + if (features->type == INTUOSHT2) { + features->x_max = features->x_max / 10; + features->y_max = features->y_max / 10; + } + else { + features->x_max = 4096; + features->y_max = 4096; + } } else if (features->pktlen == WACOM_PKGLEN_BBTOUCH) { features->device_type |= WACOM_DEVICETYPE_PAD; From 0d0af17ae83d6feb29d676c72423461419df5110 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 25 Jun 2018 09:34:03 -0300 Subject: [PATCH 392/519] ARM: imx_v6_v7_defconfig: Select ULPI support [ Upstream commit 157bcc06094c3c5800d3f4676527047b79b618e7 ] Select CONFIG_USB_CHIPIDEA_ULPI and CONFIG_USB_ULPI_BUS so that USB ULPI can be functional on some boards like imx51-babbge. This fixes a kernel hang in 4.18-rc1 on i.mx51-babbage, caused by commit 03e6275ae381 ("usb: chipidea: Fix ULPI on imx51"). Suggested-by: Andrey Smirnov Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/configs/imx_v6_v7_defconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index 4187f69f6630..b3490c1c49d1 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -261,6 +261,7 @@ CONFIG_USB_STORAGE=y CONFIG_USB_CHIPIDEA=y CONFIG_USB_CHIPIDEA_UDC=y CONFIG_USB_CHIPIDEA_HOST=y +CONFIG_USB_CHIPIDEA_ULPI=y CONFIG_USB_SERIAL=m CONFIG_USB_SERIAL_GENERIC=y CONFIG_USB_SERIAL_FTDI_SIO=m @@ -287,6 +288,7 @@ CONFIG_USB_G_NCM=m CONFIG_USB_GADGETFS=m CONFIG_USB_MASS_STORAGE=m CONFIG_USB_G_SERIAL=m +CONFIG_USB_ULPI_BUS=y CONFIG_MMC=y CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y From be38b9556d9ba051adae074367acb3ee362180b2 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 26 Jun 2018 08:37:09 -0300 Subject: [PATCH 393/519] ARM: imx_v4_v5_defconfig: Select ULPI support [ Upstream commit 2ceb2780b790b74bc408a949f6aedbad8afa693e ] Select CONFIG_USB_CHIPIDEA_ULPI and CONFIG_USB_ULPI_BUS so that USB ULPI can be functional on some boards like that use ULPI interface. Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/configs/imx_v4_v5_defconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/configs/imx_v4_v5_defconfig b/arch/arm/configs/imx_v4_v5_defconfig index d3a8018639de..f4a2d28936e1 100644 --- a/arch/arm/configs/imx_v4_v5_defconfig +++ b/arch/arm/configs/imx_v4_v5_defconfig @@ -145,9 +145,11 @@ CONFIG_USB_STORAGE=y CONFIG_USB_CHIPIDEA=y CONFIG_USB_CHIPIDEA_UDC=y CONFIG_USB_CHIPIDEA_HOST=y +CONFIG_USB_CHIPIDEA_ULPI=y CONFIG_NOP_USB_XCEIV=y CONFIG_USB_GADGET=y CONFIG_USB_ETH=m +CONFIG_USB_ULPI_BUS=y CONFIG_MMC=y CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y From bfb1c3470bcb05537fca601a0101d759d054b822 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Thu, 8 Mar 2018 21:58:43 +0100 Subject: [PATCH 394/519] tracing: Use __printf markup to silence compiler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 26b68dd2f48fe7699a89f0cfbb9f4a650dc1c837 ] Silence warnings (triggered at W=1) by adding relevant __printf attributes. CC kernel/trace/trace.o kernel/trace/trace.c: In function ‘__trace_array_vprintk’: kernel/trace/trace.c:2979:2: warning: function might be possible candidate for ‘gnu_printf’ format attribute [-Wsuggest-attribute=format] len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args); ^~~ AR kernel/trace/built-in.o Link: http://lkml.kernel.org/r/20180308205843.27447-1-malat@debian.org Signed-off-by: Mathieu Malaterre Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 1b980a8ef791..11761b3dd7ba 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2186,6 +2186,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) } EXPORT_SYMBOL_GPL(trace_vbprintk); +__printf(3, 0) static int __trace_array_vprintk(struct ring_buffer *buffer, unsigned long ip, const char *fmt, va_list args) @@ -2236,12 +2237,14 @@ __trace_array_vprintk(struct ring_buffer *buffer, return len; } +__printf(3, 0) int trace_array_vprintk(struct trace_array *tr, unsigned long ip, const char *fmt, va_list args) { return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args); } +__printf(3, 0) int trace_array_printk(struct trace_array *tr, unsigned long ip, const char *fmt, ...) { @@ -2257,6 +2260,7 @@ int trace_array_printk(struct trace_array *tr, return ret; } +__printf(3, 4) int trace_array_printk_buf(struct ring_buffer *buffer, unsigned long ip, const char *fmt, ...) { @@ -2272,6 +2276,7 @@ int trace_array_printk_buf(struct ring_buffer *buffer, return ret; } +__printf(2, 0) int trace_vprintk(unsigned long ip, const char *fmt, va_list args) { return trace_array_vprintk(&global_trace, ip, fmt, args); From 1acb2ad5d9d0fc66f18c74e22af3c07e41a5dbca Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Tue, 3 Jul 2018 17:02:46 -0700 Subject: [PATCH 395/519] kasan: fix shadow_size calculation error in kasan_module_alloc [ Upstream commit 1e8e18f694a52d703665012ca486826f64bac29d ] There is a special case that the size is "(N << KASAN_SHADOW_SCALE_SHIFT) Pages plus X", the value of X is [1, KASAN_SHADOW_SCALE_SIZE-1]. The operation "size >> KASAN_SHADOW_SCALE_SHIFT" will drop X, and the roundup operation can not retrieve the missed one page. For example: size=0x28006, PAGE_SIZE=0x1000, KASAN_SHADOW_SCALE_SHIFT=3, we will get shadow_size=0x5000, but actually we need 6 pages. shadow_size = round_up(size >> KASAN_SHADOW_SCALE_SHIFT, PAGE_SIZE); This can lead to a kernel crash when kasan is enabled and the value of mod->core_layout.size or mod->init_layout.size is like above. Because the shadow memory of X has not been allocated and mapped. move_module: ptr = module_alloc(mod->core_layout.size); ... memset(ptr, 0, mod->core_layout.size); //crashed Unable to handle kernel paging request at virtual address ffff0fffff97b000 ...... Call trace: __asan_storeN+0x174/0x1a8 memset+0x24/0x48 layout_and_allocate+0xcd8/0x1800 load_module+0x190/0x23e8 SyS_finit_module+0x148/0x180 Link: http://lkml.kernel.org/r/1529659626-12660-1-git-send-email-thunder.leizhen@huawei.com Signed-off-by: Zhen Lei Reviewed-by: Dmitriy Vyukov Acked-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: Hanjun Guo Cc: Libin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- mm/kasan/kasan.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index ba9adce1422a..b7397b459960 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -427,12 +427,13 @@ void kasan_kfree_large(const void *ptr) int kasan_module_alloc(void *addr, size_t size) { void *ret; + size_t scaled_size; size_t shadow_size; unsigned long shadow_start; shadow_start = (unsigned long)kasan_mem_to_shadow(addr); - shadow_size = round_up(size >> KASAN_SHADOW_SCALE_SHIFT, - PAGE_SIZE); + scaled_size = (size + KASAN_SHADOW_MASK) >> KASAN_SHADOW_SCALE_SHIFT; + shadow_size = round_up(scaled_size, PAGE_SIZE); if (WARN_ON(!PAGE_ALIGNED(shadow_start))) return -EINVAL; From c7fda06308d6d1ed5d094a5f22b3e1e33852edbf Mon Sep 17 00:00:00 2001 From: Yuiko Oshino Date: Tue, 3 Jul 2018 11:21:46 -0400 Subject: [PATCH 396/519] smsc75xx: Add workaround for gigabit link up hardware errata. [ Upstream commit d461e3da905332189aad546b2ad9adbe6071c7cc ] In certain conditions, the device may not be able to link in gigabit mode. This software workaround ensures that the device will not enter the failure state. Fixes: d0cad871703b898a442e4049c532ec39168e5b57 ("SMSC75XX USB 2.0 Gigabit Ethernet Devices") Signed-off-by: Yuiko Oshino Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/smsc75xx.c | 62 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index 7337e6c0e126..478937418a33 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -81,6 +81,9 @@ static bool turbo_mode = true; module_param(turbo_mode, bool, 0644); MODULE_PARM_DESC(turbo_mode, "Enable multiple frames per Rx transaction"); +static int smsc75xx_link_ok_nopm(struct usbnet *dev); +static int smsc75xx_phy_gig_workaround(struct usbnet *dev); + static int __must_check __smsc75xx_read_reg(struct usbnet *dev, u32 index, u32 *data, int in_pm) { @@ -840,6 +843,9 @@ static int smsc75xx_phy_initialize(struct usbnet *dev) return -EIO; } + /* phy workaround for gig link */ + smsc75xx_phy_gig_workaround(dev); + smsc75xx_mdio_write(dev->net, dev->mii.phy_id, MII_ADVERTISE, ADVERTISE_ALL | ADVERTISE_CSMA | ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM); @@ -978,6 +984,62 @@ static int smsc75xx_wait_ready(struct usbnet *dev, int in_pm) return -EIO; } +static int smsc75xx_phy_gig_workaround(struct usbnet *dev) +{ + struct mii_if_info *mii = &dev->mii; + int ret = 0, timeout = 0; + u32 buf, link_up = 0; + + /* Set the phy in Gig loopback */ + smsc75xx_mdio_write(dev->net, mii->phy_id, MII_BMCR, 0x4040); + + /* Wait for the link up */ + do { + link_up = smsc75xx_link_ok_nopm(dev); + usleep_range(10000, 20000); + timeout++; + } while ((!link_up) && (timeout < 1000)); + + if (timeout >= 1000) { + netdev_warn(dev->net, "Timeout waiting for PHY link up\n"); + return -EIO; + } + + /* phy reset */ + ret = smsc75xx_read_reg(dev, PMT_CTL, &buf); + if (ret < 0) { + netdev_warn(dev->net, "Failed to read PMT_CTL: %d\n", ret); + return ret; + } + + buf |= PMT_CTL_PHY_RST; + + ret = smsc75xx_write_reg(dev, PMT_CTL, buf); + if (ret < 0) { + netdev_warn(dev->net, "Failed to write PMT_CTL: %d\n", ret); + return ret; + } + + timeout = 0; + do { + usleep_range(10000, 20000); + ret = smsc75xx_read_reg(dev, PMT_CTL, &buf); + if (ret < 0) { + netdev_warn(dev->net, "Failed to read PMT_CTL: %d\n", + ret); + return ret; + } + timeout++; + } while ((buf & PMT_CTL_PHY_RST) && (timeout < 100)); + + if (timeout >= 100) { + netdev_warn(dev->net, "timeout waiting for PHY Reset\n"); + return -EIO; + } + + return 0; +} + static int smsc75xx_reset(struct usbnet *dev) { struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]); From 7e8f97b07a3be3493072f1cabe888f2d770b8077 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 4 Jul 2018 20:25:32 +0200 Subject: [PATCH 397/519] netfilter: x_tables: set module owner for icmp(6) matches [ Upstream commit d376bef9c29b3c65aeee4e785fffcd97ef0a9a81 ] nft_compat relies on xt_request_find_match to increment refcount of the module that provides the match/target. The (builtin) icmp matches did't set the module owner so it was possible to rmmod ip(6)tables while icmp extensions were still in use. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv4/netfilter/ip_tables.c | 1 + net/ipv6/netfilter/ip6_tables.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 9363c1a70f16..8adb6e9ba8f5 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -2072,6 +2072,7 @@ static struct xt_match ipt_builtin_mt[] __read_mostly = { .checkentry = icmp_checkentry, .proto = IPPROTO_ICMP, .family = NFPROTO_IPV4, + .me = THIS_MODULE, }, }; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 6cb9e35d23ac..96de322fe5e2 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -2073,6 +2073,7 @@ static struct xt_match ip6t_builtin_mt[] __read_mostly = { .checkentry = icmp6_checkentry, .proto = IPPROTO_ICMPV6, .family = NFPROTO_IPV6, + .me = THIS_MODULE, }, }; From be4691a7c58b40ddcdad5f82fb652475afc3440e Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Fri, 6 Jul 2018 22:15:00 +0200 Subject: [PATCH 398/519] ARM: pxa: irq: fix handling of ICMR registers in suspend/resume [ Upstream commit 0c1049dcb4ceec640d8bd797335bcbebdcab44d2 ] PXA3xx platforms have 56 interrupts that are stored in two ICMR registers. The code in pxa_irq_suspend() and pxa_irq_resume() however does a simple division by 32 which only leads to one register being saved at suspend and restored at resume time. The NAND interrupt setting, for instance, is lost. Fix this by using DIV_ROUND_UP() instead. Signed-off-by: Daniel Mack Signed-off-by: Robert Jarzmik Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-pxa/irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-pxa/irq.c b/arch/arm/mach-pxa/irq.c index 9c10248fadcc..4e8c2116808e 100644 --- a/arch/arm/mach-pxa/irq.c +++ b/arch/arm/mach-pxa/irq.c @@ -185,7 +185,7 @@ static int pxa_irq_suspend(void) { int i; - for (i = 0; i < pxa_internal_irq_nr / 32; i++) { + for (i = 0; i < DIV_ROUND_UP(pxa_internal_irq_nr, 32); i++) { void __iomem *base = irq_base(i); saved_icmr[i] = __raw_readl(base + ICMR); @@ -204,7 +204,7 @@ static void pxa_irq_resume(void) { int i; - for (i = 0; i < pxa_internal_irq_nr / 32; i++) { + for (i = 0; i < DIV_ROUND_UP(pxa_internal_irq_nr, 32); i++) { void __iomem *base = irq_base(i); __raw_writel(saved_icmr[i], base + ICMR); From 691a13ac70e31e3004310bf56360ee69c62514cb Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Fri, 22 Sep 2017 14:13:53 +0200 Subject: [PATCH 399/519] ieee802154: at86rf230: switch from BUG_ON() to WARN_ON() on problem [ Upstream commit 20f330452ad8814f2289a589baf65e21270879a7 ] The check is valid but it does not warrant to crash the kernel. A WARN_ON() is good enough here. Found by checkpatch. Signed-off-by: Stefan Schmidt Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ieee802154/at86rf230.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index 0fbbba7a0cae..184185da2f8b 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -932,7 +932,7 @@ at86rf230_xmit(struct ieee802154_hw *hw, struct sk_buff *skb) static int at86rf230_ed(struct ieee802154_hw *hw, u8 *level) { - BUG_ON(!level); + WARN_ON(!level); *level = 0xbe; return 0; } From 24e3a53c0d2c6be3385c5676056124b44f7c06c2 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Fri, 22 Sep 2017 14:13:54 +0200 Subject: [PATCH 400/519] ieee802154: at86rf230: use __func__ macro for debug messages [ Upstream commit 8a81388ec27c4c0adbdecd20e67bb5f411ab46b2 ] Instead of having the function name hard-coded (it might change and we forgot to update them in the debug output) we can use __func__ instead and also shorter the line so we do not need to break it. Also fix an extra blank line while being here. Found by checkpatch. Signed-off-by: Stefan Schmidt Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ieee802154/at86rf230.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index 184185da2f8b..f72c2967ae82 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -1108,8 +1108,7 @@ at86rf230_set_hw_addr_filt(struct ieee802154_hw *hw, if (changed & IEEE802154_AFILT_SADDR_CHANGED) { u16 addr = le16_to_cpu(filt->short_addr); - dev_vdbg(&lp->spi->dev, - "at86rf230_set_hw_addr_filt called for saddr\n"); + dev_vdbg(&lp->spi->dev, "%s called for saddr\n", __func__); __at86rf230_write(lp, RG_SHORT_ADDR_0, addr); __at86rf230_write(lp, RG_SHORT_ADDR_1, addr >> 8); } @@ -1117,8 +1116,7 @@ at86rf230_set_hw_addr_filt(struct ieee802154_hw *hw, if (changed & IEEE802154_AFILT_PANID_CHANGED) { u16 pan = le16_to_cpu(filt->pan_id); - dev_vdbg(&lp->spi->dev, - "at86rf230_set_hw_addr_filt called for pan id\n"); + dev_vdbg(&lp->spi->dev, "%s called for pan id\n", __func__); __at86rf230_write(lp, RG_PAN_ID_0, pan); __at86rf230_write(lp, RG_PAN_ID_1, pan >> 8); } @@ -1127,15 +1125,13 @@ at86rf230_set_hw_addr_filt(struct ieee802154_hw *hw, u8 i, addr[8]; memcpy(addr, &filt->ieee_addr, 8); - dev_vdbg(&lp->spi->dev, - "at86rf230_set_hw_addr_filt called for IEEE addr\n"); + dev_vdbg(&lp->spi->dev, "%s called for IEEE addr\n", __func__); for (i = 0; i < 8; i++) __at86rf230_write(lp, RG_IEEE_ADDR_0 + i, addr[i]); } if (changed & IEEE802154_AFILT_PANC_CHANGED) { - dev_vdbg(&lp->spi->dev, - "at86rf230_set_hw_addr_filt called for panc change\n"); + dev_vdbg(&lp->spi->dev, "%s called for panc change\n", __func__); if (filt->pan_coord) at86rf230_write_subreg(lp, SR_AACK_I_AM_COORD, 1); else @@ -1239,7 +1235,6 @@ at86rf230_set_cca_mode(struct ieee802154_hw *hw, return at86rf230_write_subreg(lp, SR_CCA_MODE, val); } - static int at86rf230_set_cca_ed_level(struct ieee802154_hw *hw, s32 mbm) { From fe9ee61f5a1b9413ad3862bfa5a63c633d84f38a Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Fri, 22 Sep 2017 14:14:05 +0200 Subject: [PATCH 401/519] ieee802154: fakelb: switch from BUG_ON() to WARN_ON() on problem [ Upstream commit 8f2fbc6c60ff213369e06a73610fc882a42fdf20 ] The check is valid but it does not warrant to crash the kernel. A WARN_ON() is good enough here. Found by checkpatch. Signed-off-by: Stefan Schmidt Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ieee802154/fakelb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/fakelb.c b/drivers/net/ieee802154/fakelb.c index 43617ded3773..91de25c53274 100644 --- a/drivers/net/ieee802154/fakelb.c +++ b/drivers/net/ieee802154/fakelb.c @@ -49,7 +49,7 @@ struct fakelb_phy { static int fakelb_hw_ed(struct ieee802154_hw *hw, u8 *level) { - BUG_ON(!level); + WARN_ON(!level); *level = 0xbe; return 0; From 2cb585f9c5d6b70bfcd12beb314d9ba060c3208a Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 24 Jun 2018 14:35:10 +0100 Subject: [PATCH 402/519] drm/armada: fix colorkey mode property [ Upstream commit d378859a667edc99e3473704847698cae97ca2b1 ] The colorkey mode property was not correctly disabling the colorkeying when "disabled" mode was selected. Arrange for this to work as one would expect. Signed-off-by: Russell King Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/armada/armada_hw.h | 1 + drivers/gpu/drm/armada/armada_overlay.c | 30 ++++++++++++++++++------- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/armada/armada_hw.h b/drivers/gpu/drm/armada/armada_hw.h index 27319a8335e2..345dc4d0851e 100644 --- a/drivers/gpu/drm/armada/armada_hw.h +++ b/drivers/gpu/drm/armada/armada_hw.h @@ -160,6 +160,7 @@ enum { CFG_ALPHAM_GRA = 0x1 << 16, CFG_ALPHAM_CFG = 0x2 << 16, CFG_ALPHA_MASK = 0xff << 8, +#define CFG_ALPHA(x) ((x) << 8) CFG_PIXCMD_MASK = 0xff, }; diff --git a/drivers/gpu/drm/armada/armada_overlay.c b/drivers/gpu/drm/armada/armada_overlay.c index 5c22b380f8f3..f8a69ec63550 100644 --- a/drivers/gpu/drm/armada/armada_overlay.c +++ b/drivers/gpu/drm/armada/armada_overlay.c @@ -27,6 +27,7 @@ struct armada_ovl_plane_properties { uint16_t contrast; uint16_t saturation; uint32_t colorkey_mode; + uint32_t colorkey_enable; }; struct armada_ovl_plane { @@ -62,11 +63,13 @@ armada_ovl_update_attr(struct armada_ovl_plane_properties *prop, writel_relaxed(0x00002000, dcrtc->base + LCD_SPU_CBSH_HUE); spin_lock_irq(&dcrtc->irq_lock); - armada_updatel(prop->colorkey_mode | CFG_ALPHAM_GRA, - CFG_CKMODE_MASK | CFG_ALPHAM_MASK | CFG_ALPHA_MASK, - dcrtc->base + LCD_SPU_DMA_CTRL1); - - armada_updatel(ADV_GRACOLORKEY, 0, dcrtc->base + LCD_SPU_ADV_REG); + armada_updatel(prop->colorkey_mode, + CFG_CKMODE_MASK | CFG_ALPHAM_MASK | CFG_ALPHA_MASK, + dcrtc->base + LCD_SPU_DMA_CTRL1); + if (dcrtc->variant->has_spu_adv_reg) + armada_updatel(prop->colorkey_enable, + ADV_GRACOLORKEY | ADV_VIDCOLORKEY, + dcrtc->base + LCD_SPU_ADV_REG); spin_unlock_irq(&dcrtc->irq_lock); } @@ -339,8 +342,17 @@ static int armada_ovl_plane_set_property(struct drm_plane *plane, dplane->prop.colorkey_vb |= K2B(val); update_attr = true; } else if (property == priv->colorkey_mode_prop) { - dplane->prop.colorkey_mode &= ~CFG_CKMODE_MASK; - dplane->prop.colorkey_mode |= CFG_CKMODE(val); + if (val == CKMODE_DISABLE) { + dplane->prop.colorkey_mode = + CFG_CKMODE(CKMODE_DISABLE) | + CFG_ALPHAM_CFG | CFG_ALPHA(255); + dplane->prop.colorkey_enable = 0; + } else { + dplane->prop.colorkey_mode = + CFG_CKMODE(val) | + CFG_ALPHAM_GRA | CFG_ALPHA(0); + dplane->prop.colorkey_enable = ADV_GRACOLORKEY; + } update_attr = true; } else if (property == priv->brightness_prop) { dplane->prop.brightness = val - 256; @@ -469,7 +481,9 @@ int armada_overlay_plane_create(struct drm_device *dev, unsigned long crtcs) dplane->prop.colorkey_yr = 0xfefefe00; dplane->prop.colorkey_ug = 0x01010100; dplane->prop.colorkey_vb = 0x01010100; - dplane->prop.colorkey_mode = CFG_CKMODE(CKMODE_RGB); + dplane->prop.colorkey_mode = CFG_CKMODE(CKMODE_RGB) | + CFG_ALPHAM_GRA | CFG_ALPHA(0); + dplane->prop.colorkey_enable = ADV_GRACOLORKEY; dplane->prop.brightness = 0; dplane->prop.contrast = 0x4000; dplane->prop.saturation = 0x4000; From 149751b516c07eb15f9378bbed175d23589b6215 Mon Sep 17 00:00:00 2001 From: Vikas Gupta Date: Mon, 9 Jul 2018 02:24:52 -0400 Subject: [PATCH 403/519] bnxt_en: Fix for system hang if request_irq fails [ Upstream commit c58387ab1614f6d7fb9e244f214b61e7631421fc ] Fix bug in the error code path when bnxt_request_irq() returns failure. bnxt_disable_napi() should not be called in this error path because NAPI has not been enabled yet. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Signed-off-by: Vikas Gupta Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 9904d768a20a..4ffacafddacb 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4591,7 +4591,7 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) rc = bnxt_request_irq(bp); if (rc) { netdev_err(bp->dev, "bnxt_request_irq err: %x\n", rc); - goto open_err; + goto open_err_irq; } } @@ -4629,6 +4629,8 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) open_err: bnxt_disable_napi(bp); + +open_err_irq: bnxt_del_napi(bp); open_err_free_mem: From c0cd6f4de95a8fee74131bac79c444f8120c93e9 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Fri, 29 Jun 2018 12:46:52 -0500 Subject: [PATCH 404/519] perf llvm-utils: Remove bashism from kernel include fetch script [ Upstream commit f6432b9f65001651412dbc3589d251534822d4ab ] Like system(), popen() calls /bin/sh, which may/may not be bash. Script when run on dash and encounters the line, yields: exit: Illegal number: -1 checkbashisms report on script content: possible bashism (exit|return with negative status code): exit -1 Remove the bashism and use the more portable non-zero failure status code 1. Signed-off-by: Kim Phillips Cc: Alexander Shishkin Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sandipan Das Cc: Thomas Richter Link: http://lkml.kernel.org/r/20180629124652.8d0af7e2281fd3fd8262cacc@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/llvm-utils.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index 00724d496d38..62f6d7dc2dda 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -254,16 +254,16 @@ static const char *kinc_fetch_script = "#!/usr/bin/env sh\n" "if ! test -d \"$KBUILD_DIR\"\n" "then\n" -" exit -1\n" +" exit 1\n" "fi\n" "if ! test -f \"$KBUILD_DIR/include/generated/autoconf.h\"\n" "then\n" -" exit -1\n" +" exit 1\n" "fi\n" "TMPDIR=`mktemp -d`\n" "if test -z \"$TMPDIR\"\n" "then\n" -" exit -1\n" +" exit 1\n" "fi\n" "cat << EOF > $TMPDIR/Makefile\n" "obj-y := dummy.o\n" From 97d53c81980eaba74690868efd3160fb635b8d42 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 10 Jul 2018 08:22:40 +0100 Subject: [PATCH 405/519] ARM: 8780/1: ftrace: Only set kernel memory back to read-only after boot [ Upstream commit b4c7e2bd2eb4764afe3af9409ff3b1b87116fa30 ] Dynamic ftrace requires modifying the code segments that are usually set to read-only. To do this, a per arch function is called both before and after the ftrace modifications are performed. The "before" function will set kernel code text to read-write to allow for ftrace to make the modifications, and the "after" function will set the kernel code text back to "read-only" to keep the kernel code text protected. The issue happens when dynamic ftrace is tested at boot up. The test is done before the kernel code text has been set to read-only. But the "before" and "after" calls are still performed. The "after" call will change the kernel code text to read-only prematurely, and other boot code that expects this code to be read-write will fail. The solution is to add a variable that is set when the kernel code text is expected to be converted to read-only, and make the ftrace "before" and "after" calls do nothing if that variable is not yet set. This is similar to the x86 solution from commit 162396309745 ("ftrace, x86: make kernel text writable only for conversions"). Link: http://lkml.kernel.org/r/20180620212906.24b7b66e@vmware.local.home Reported-by: Stefan Agner Tested-by: Stefan Agner Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Russell King Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/init.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index c29ad610311b..a9f6705aea23 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -716,19 +716,28 @@ int __mark_rodata_ro(void *unused) return 0; } +static int kernel_set_to_readonly __read_mostly; + void mark_rodata_ro(void) { + kernel_set_to_readonly = 1; stop_machine(__mark_rodata_ro, NULL, NULL); } void set_kernel_text_rw(void) { + if (!kernel_set_to_readonly) + return; + set_section_perms(ro_perms, ARRAY_SIZE(ro_perms), false, current->active_mm); } void set_kernel_text_ro(void) { + if (!kernel_set_to_readonly) + return; + set_section_perms(ro_perms, ARRAY_SIZE(ro_perms), true, current->active_mm); } From bcfa7262bbc0cf7b39ac112ae2ece9f9310ae4d9 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Wed, 11 Jul 2018 12:54:54 -0500 Subject: [PATCH 406/519] ARM: dts: am3517.dtsi: Disable reference to OMAP3 OTG controller [ Upstream commit 923847413f7316b5ced3491769b3fefa6c56a79a ] The AM3517 has a different OTG controller location than the OMAP3, which is included from omap3.dtsi. This results in a hwmod error. Since the AM3517 has a different OTG controller address, this patch disabes one that is isn't available. Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/am3517.dtsi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/boot/dts/am3517.dtsi b/arch/arm/boot/dts/am3517.dtsi index 5e3f5e86ffcf..cfcbf5baba4f 100644 --- a/arch/arm/boot/dts/am3517.dtsi +++ b/arch/arm/boot/dts/am3517.dtsi @@ -74,6 +74,11 @@ omap3_pmx_core2: pinmux@480025d8 { }; }; +/* Table Table 5-79 of the TRM shows 480ab000 is reserved */ +&usb_otg_hs { + status = "disabled"; +}; + &iva { status = "disabled"; }; From 8ab85f3dc1b45f9189b62c97c82c7e6e1a3de569 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 18 Jun 2018 12:02:00 -0400 Subject: [PATCH 407/519] ixgbe: Be more careful when modifying MAC filters [ Upstream commit d14c780c11fbc10f66c43e7b64eefe87ca442bd3 ] This change makes it so that we are much more explicit about the ordering of updates to the receive address register (RAR) table. Prior to this patch I believe we may have been updating the table while entries were still active, or possibly allowing for reordering of things since we weren't explicitly flushing writes to either the lower or upper portion of the register prior to accessing the other half. Signed-off-by: Alexander Duyck Reviewed-by: Shannon Nelson Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 105dd00ddc1a..cd2afe92f1da 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -1814,7 +1814,12 @@ s32 ixgbe_set_rar_generic(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq, if (enable_addr != 0) rar_high |= IXGBE_RAH_AV; + /* Record lower 32 bits of MAC address and then make + * sure that write is flushed to hardware before writing + * the upper 16 bits and setting the valid bit. + */ IXGBE_WRITE_REG(hw, IXGBE_RAL(index), rar_low); + IXGBE_WRITE_FLUSH(hw); IXGBE_WRITE_REG(hw, IXGBE_RAH(index), rar_high); return 0; @@ -1846,8 +1851,13 @@ s32 ixgbe_clear_rar_generic(struct ixgbe_hw *hw, u32 index) rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(index)); rar_high &= ~(0x0000FFFF | IXGBE_RAH_AV); - IXGBE_WRITE_REG(hw, IXGBE_RAL(index), 0); + /* Clear the address valid bit and upper 16 bits of the address + * before clearing the lower bits. This way we aren't updating + * a live filter. + */ IXGBE_WRITE_REG(hw, IXGBE_RAH(index), rar_high); + IXGBE_WRITE_FLUSH(hw); + IXGBE_WRITE_REG(hw, IXGBE_RAL(index), 0); /* clear VMDq pool/queue selection for this RAR */ hw->mac.ops.clear_vmdq(hw, index, IXGBE_CLEAR_VMDQ_ALL); From 01a8ef2f327a6fe5075ee5027c9fa02df42c1c4e Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 11 Jul 2018 12:00:45 -0400 Subject: [PATCH 408/519] packet: reset network header if packet shorter than ll reserved space [ Upstream commit 993675a3100b16a4c80dfd70cbcde8ea7127b31d ] If variable length link layer headers result in a packet shorter than dev->hard_header_len, reset the network header offset. Else skb->mac_len may exceed skb->len after skb_mac_reset_len. packet_sendmsg_spkt already has similar logic. Fixes: b84bbaf7a6c8 ("packet: in packet_snd start writing at link layer allocation") Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 3a63f33698d3..8de4e6620c1c 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2780,6 +2780,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) goto out_free; } else if (reserve) { skb_reserve(skb, -reserve); + if (len < reserve) + skb_reset_network_header(skb); } /* Returns -EFAULT on error */ From 7795ce1182d5317688750126958954e5d32e3eac Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 12 Jul 2018 15:23:45 +0300 Subject: [PATCH 409/519] qlogic: check kstrtoul() for errors [ Upstream commit 5fc853cc01c68f84984ecc2d5fd777ecad78240f ] We accidentally left out the error handling for kstrtoul(). Fixes: a520030e326a ("qlcnic: Implement flash sysfs callback for 83xx adapter") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c index ccbb04503b27..b53a18e365c2 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c @@ -1128,6 +1128,8 @@ static ssize_t qlcnic_83xx_sysfs_flash_write_handler(struct file *filp, struct qlcnic_adapter *adapter = dev_get_drvdata(dev); ret = kstrtoul(buf, 16, &data); + if (ret) + return ret; switch (data) { case QLC_83XX_FLASH_SECTOR_ERASE_CMD: From 43707aa8c55fb165a1a56f590e0defb198ebdde9 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 12 Jul 2018 06:04:53 -0700 Subject: [PATCH 410/519] tcp: remove DELAYED ACK events in DCTCP [ Upstream commit a69258f7aa2623e0930212f09c586fd06674ad79 ] After fixing the way DCTCP tracking delayed ACKs, the delayed-ACK related callbacks are no longer needed Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Acked-by: Lawrence Brakmo Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/net/tcp.h | 2 -- net/ipv4/tcp_dctcp.c | 25 ------------------------- net/ipv4/tcp_output.c | 4 ---- 3 files changed, 31 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index cac4a6ad5db3..6c89238f192e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -821,8 +821,6 @@ enum tcp_ca_event { CA_EVENT_LOSS, /* loss timeout */ CA_EVENT_ECN_NO_CE, /* ECT set, but not CE marked */ CA_EVENT_ECN_IS_CE, /* received CE marked IP packet */ - CA_EVENT_DELAYED_ACK, /* Delayed ack is sent */ - CA_EVENT_NON_DELAYED_ACK, }; /* Information about inbound ACK, passed to cong_ops->in_ack_event() */ diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 6300edf90e60..62f90f6b7a9d 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -55,7 +55,6 @@ struct dctcp { u32 dctcp_alpha; u32 next_seq; u32 ce_state; - u32 delayed_ack_reserved; u32 loss_cwnd; }; @@ -96,7 +95,6 @@ static void dctcp_init(struct sock *sk) ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA); - ca->delayed_ack_reserved = 0; ca->loss_cwnd = 0; ca->ce_state = 0; @@ -230,25 +228,6 @@ static void dctcp_state(struct sock *sk, u8 new_state) } } -static void dctcp_update_ack_reserved(struct sock *sk, enum tcp_ca_event ev) -{ - struct dctcp *ca = inet_csk_ca(sk); - - switch (ev) { - case CA_EVENT_DELAYED_ACK: - if (!ca->delayed_ack_reserved) - ca->delayed_ack_reserved = 1; - break; - case CA_EVENT_NON_DELAYED_ACK: - if (ca->delayed_ack_reserved) - ca->delayed_ack_reserved = 0; - break; - default: - /* Don't care for the rest. */ - break; - } -} - static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) { switch (ev) { @@ -258,10 +237,6 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) case CA_EVENT_ECN_NO_CE: dctcp_ce_state_1_to_0(sk); break; - case CA_EVENT_DELAYED_ACK: - case CA_EVENT_NON_DELAYED_ACK: - dctcp_update_ack_reserved(sk, ev); - break; default: /* Don't care for the rest. */ break; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 6fa749ce231f..2d3c9df8d75c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3316,8 +3316,6 @@ void tcp_send_delayed_ack(struct sock *sk) int ato = icsk->icsk_ack.ato; unsigned long timeout; - tcp_ca_event(sk, CA_EVENT_DELAYED_ACK); - if (ato > TCP_DELACK_MIN) { const struct tcp_sock *tp = tcp_sk(sk); int max_ato = HZ / 2; @@ -3374,8 +3372,6 @@ void __tcp_send_ack(struct sock *sk, u32 rcv_nxt) if (sk->sk_state == TCP_CLOSE) return; - tcp_ca_event(sk, CA_EVENT_NON_DELAYED_ACK); - /* We are not putting this on the write queue, so * tcp_transmit_skb() will set the ownership to this * sock. From 9c8f268dcdd5d3dacf504873861b9f18c70021b0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 3 Jul 2018 15:30:56 +0300 Subject: [PATCH 411/519] drm/nouveau/gem: off by one bugs in nouveau_gem_pushbuf_reloc_apply() [ Upstream commit 7f073d011f93e92d4d225526b9ab6b8b0bbd6613 ] The bo array has req->nr_buffers elements so the > should be >= so we don't read beyond the end of the array. Fixes: a1606a9596e5 ("drm/nouveau: new gem pushbuf interface, bump to 0.0.16") Signed-off-by: Dan Carpenter Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_gem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 495c279da200..ae560f5977fc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -602,7 +602,7 @@ nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli, struct nouveau_bo *nvbo; uint32_t data; - if (unlikely(r->bo_index > req->nr_buffers)) { + if (unlikely(r->bo_index >= req->nr_buffers)) { NV_PRINTK(err, cli, "reloc bo index invalid\n"); ret = -EINVAL; break; @@ -612,7 +612,7 @@ nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli, if (b->presumed.valid) continue; - if (unlikely(r->reloc_bo_index > req->nr_buffers)) { + if (unlikely(r->reloc_bo_index >= req->nr_buffers)) { NV_PRINTK(err, cli, "reloc container bo index invalid\n"); ret = -EINVAL; break; From 0821ddad494b97f0980db1877c4417e7d45c4925 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 13 Jul 2018 21:25:19 -0700 Subject: [PATCH 412/519] net/ethernet/freescale/fman: fix cross-build error [ Upstream commit c133459765fae249ba482f62e12f987aec4376f0 ] CC [M] drivers/net/ethernet/freescale/fman/fman.o In file included from ../drivers/net/ethernet/freescale/fman/fman.c:35: ../include/linux/fsl/guts.h: In function 'guts_set_dmacr': ../include/linux/fsl/guts.h:165:2: error: implicit declaration of function 'clrsetbits_be32' [-Werror=implicit-function-declaration] clrsetbits_be32(&guts->dmacr, 3 << shift, device << shift); ^~~~~~~~~~~~~~~ Signed-off-by: Randy Dunlap Cc: Madalin Bucur Cc: netdev@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/fsl/guts.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/fsl/guts.h b/include/linux/fsl/guts.h index 84d971ff3fba..5d06e838e650 100644 --- a/include/linux/fsl/guts.h +++ b/include/linux/fsl/guts.h @@ -16,6 +16,7 @@ #define __FSL_GUTS_H__ #include +#include /** * Global Utility Registers. From 8cfe6f3afe83a2768563f718bb57c99ca249cf4c Mon Sep 17 00:00:00 2001 From: David Lechner Date: Mon, 16 Jul 2018 17:58:10 -0500 Subject: [PATCH 413/519] net: usb: rtl8150: demote allmulti message to dev_dbg() [ Upstream commit 3a9b0455062ffb9d2f6cd4473a76e3456f318c9f ] This driver can spam the kernel log with multiple messages of: net eth0: eth0: allmulti set Usually 4 or 8 at a time (probably because of using ConnMan). This message doesn't seem useful, so let's demote it from dev_info() to dev_dbg(). Signed-off-by: David Lechner Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/rtl8150.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c index 39672984dde1..58b1e18fdd64 100644 --- a/drivers/net/usb/rtl8150.c +++ b/drivers/net/usb/rtl8150.c @@ -681,7 +681,7 @@ static void rtl8150_set_multicast(struct net_device *netdev) (netdev->flags & IFF_ALLMULTI)) { rx_creg &= 0xfffe; rx_creg |= 0x0002; - dev_info(&netdev->dev, "%s: allmulti set\n", netdev->name); + dev_dbg(&netdev->dev, "%s: allmulti set\n", netdev->name); } else { /* ~RX_MULTICAST, ~RX_PROMISCUOUS */ rx_creg &= 0x00fc; From 8621e69878ba41ed24987a487eaf01a6505223c6 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 18 Jul 2018 08:31:43 +0200 Subject: [PATCH 414/519] net: qca_spi: Avoid packet drop during initial sync [ Upstream commit b2bab426dc715de147f8039a3fccff27d795f4eb ] As long as the synchronization with the QCA7000 isn't finished, we cannot accept packets from the upper layers. So let the SPI thread enable the TX queue after sync and avoid unwanted packet drop. Signed-off-by: Stefan Wahren Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for QCA7000") Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qualcomm/qca_spi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index c90ae4d4be7d..7b903af19e57 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -635,7 +635,7 @@ qcaspi_netdev_open(struct net_device *dev) return ret; } - netif_start_queue(qca->net_dev); + /* SPI thread takes care of TX queue */ return 0; } From e77b1523b93cbc8863cfe656ca0c9e82f7ba43c9 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 18 Jul 2018 08:31:44 +0200 Subject: [PATCH 415/519] net: qca_spi: Make sure the QCA7000 reset is triggered [ Upstream commit 711c62dfa6bdb4326ca6c587f295ea5c4f7269de ] In case the SPI thread is not running, a simple reset of sync state won't fix the transmit timeout. We also need to wake up the kernel thread. Signed-off-by: Stefan Wahren Fixes: ed7d42e24eff ("net: qca_spi: fix transmit queue timeout handling") Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qualcomm/qca_spi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index 7b903af19e57..25f673059c65 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -739,6 +739,9 @@ qcaspi_netdev_tx_timeout(struct net_device *dev) qca->net_dev->stats.tx_errors++; /* Trigger tx queue flush and QCA7000 reset */ qca->sync = QCASPI_SYNC_UNKNOWN; + + if (qca->spi_thread) + wake_up_process(qca->spi_thread); } static int From 780e559aaa6ae4b184d9af4acd0754f8608b3715 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 18 Jul 2018 08:31:45 +0200 Subject: [PATCH 416/519] net: qca_spi: Fix log level if probe fails [ Upstream commit 50973993260a6934f0a00da53d9b746cfbea89ab ] In cases the probing fails the log level of the messages should be an error. Signed-off-by: Stefan Wahren Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qualcomm/qca_spi.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index 25f673059c65..7886a8a5b55b 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -868,22 +868,22 @@ qca_spi_probe(struct spi_device *spi) if ((qcaspi_clkspeed < QCASPI_CLK_SPEED_MIN) || (qcaspi_clkspeed > QCASPI_CLK_SPEED_MAX)) { - dev_info(&spi->dev, "Invalid clkspeed: %d\n", - qcaspi_clkspeed); + dev_err(&spi->dev, "Invalid clkspeed: %d\n", + qcaspi_clkspeed); return -EINVAL; } if ((qcaspi_burst_len < QCASPI_BURST_LEN_MIN) || (qcaspi_burst_len > QCASPI_BURST_LEN_MAX)) { - dev_info(&spi->dev, "Invalid burst len: %d\n", - qcaspi_burst_len); + dev_err(&spi->dev, "Invalid burst len: %d\n", + qcaspi_burst_len); return -EINVAL; } if ((qcaspi_pluggable < QCASPI_PLUGGABLE_MIN) || (qcaspi_pluggable > QCASPI_PLUGGABLE_MAX)) { - dev_info(&spi->dev, "Invalid pluggable: %d\n", - qcaspi_pluggable); + dev_err(&spi->dev, "Invalid pluggable: %d\n", + qcaspi_pluggable); return -EINVAL; } @@ -944,8 +944,8 @@ qca_spi_probe(struct spi_device *spi) } if (register_netdev(qcaspi_devs)) { - dev_info(&spi->dev, "Unable to register net device %s\n", - qcaspi_devs->name); + dev_err(&spi->dev, "Unable to register net device %s\n", + qcaspi_devs->name); free_netdev(qcaspi_devs); return -EFAULT; } From 81970da69122fe4bf2af5bb1bb4c7f62d4744e79 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 17 Jul 2018 18:27:45 -0700 Subject: [PATCH 417/519] tcp: identify cryptic messages as TCP seq # bugs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e56b8ce363a36fb7b74b80aaa5cc9084f2c908b4 ] Attempt to make cryptic TCP seq number error messages clearer by (1) identifying the source of the message as "TCP", (2) identifying the errors as "seq # bug", and (3) grouping the field identifiers and values by separating them with commas. E.g., the following message is changed from: recvmsg bug 2: copied 73BCB6CD seq 70F17CBE rcvnxt 73BCB9AA fl 0 WARNING: CPU: 2 PID: 1501 at /linux/net/ipv4/tcp.c:1881 tcp_recvmsg+0x649/0xb90 to: TCP recvmsg seq # bug 2: copied 73BCB6CD, seq 70F17CBE, rcvnxt 73BCB9AA, fl 0 WARNING: CPU: 2 PID: 1501 at /linux/net/ipv4/tcp.c:2011 tcp_recvmsg+0x694/0xba0 Suggested-by: 積丹尼 Dan Jacobson Signed-off-by: Randy Dunlap Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a0f0a7db946b..5e162b8ab184 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1659,7 +1659,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, * shouldn't happen. */ if (WARN(before(*seq, TCP_SKB_CB(skb)->seq), - "recvmsg bug: copied %X seq %X rcvnxt %X fl %X\n", + "TCP recvmsg seq # bug: copied %X, seq %X, rcvnxt %X, fl %X\n", *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags)) break; @@ -1672,7 +1672,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) goto found_fin_ok; WARN(!(flags & MSG_PEEK), - "recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n", + "TCP recvmsg seq # bug 2: copied %X, seq %X, rcvnxt %X, fl %X\n", *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags); } From b84ec04bae905901f5226a67968dabc52ab0c3a6 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Tue, 21 Aug 2018 13:31:50 -0700 Subject: [PATCH 418/519] staging: android: ion: check for kref overflow This patch is against 4.4. It does not apply to master due to a large rework of ion in 4.12 which removed the affected functions altogther. 4c23cbff073f3b9b ("staging: android: ion: Remove import interface") Userspace can cause the kref to handles to increment arbitrarily high. Ensure it does not overflow. Signed-off-by: Daniel Rosenberg Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ion/ion.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c index 374f840f31a4..47cb163da9a0 100644 --- a/drivers/staging/android/ion/ion.c +++ b/drivers/staging/android/ion/ion.c @@ -15,6 +15,7 @@ * */ +#include #include #include #include @@ -387,6 +388,16 @@ static void ion_handle_get(struct ion_handle *handle) kref_get(&handle->ref); } +/* Must hold the client lock */ +static struct ion_handle *ion_handle_get_check_overflow( + struct ion_handle *handle) +{ + if (atomic_read(&handle->ref.refcount) + 1 == 0) + return ERR_PTR(-EOVERFLOW); + ion_handle_get(handle); + return handle; +} + static int ion_handle_put_nolock(struct ion_handle *handle) { int ret; @@ -433,9 +444,9 @@ static struct ion_handle *ion_handle_get_by_id_nolock(struct ion_client *client, handle = idr_find(&client->idr, id); if (handle) - ion_handle_get(handle); + return ion_handle_get_check_overflow(handle); - return handle ? handle : ERR_PTR(-EINVAL); + return ERR_PTR(-EINVAL); } struct ion_handle *ion_handle_get_by_id(struct ion_client *client, @@ -1202,7 +1213,7 @@ struct ion_handle *ion_import_dma_buf(struct ion_client *client, int fd) /* if a handle exists for this buffer just take a reference to it */ handle = ion_handle_lookup(client, buffer); if (!IS_ERR(handle)) { - ion_handle_get(handle); + handle = ion_handle_get_check_overflow(handle); mutex_unlock(&client->lock); goto end; } From 1186a6ea75df00ec27b9cf2c5d0a5e4298739301 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 28 May 2018 13:31:13 +0200 Subject: [PATCH 419/519] KVM: irqfd: fix race between EPOLLHUP and irq_bypass_register_consumer commit 9432a3175770e06cb83eada2d91fac90c977cb99 upstream. A comment warning against this bug is there, but the code is not doing what the comment says. Therefore it is possible that an EPOLLHUP races against irq_bypass_register_consumer. The EPOLLHUP handler schedules irqfd_shutdown, and if that runs soon enough, you get a use-after-free. Reported-by: syzbot Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Reviewed-by: David Hildenbrand Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- virt/kvm/eventfd.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 1203829316b2..f509cfd37db5 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -405,11 +405,6 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) if (events & POLLIN) schedule_work(&irqfd->inject); - /* - * do not drop the file until the irqfd is fully initialized, otherwise - * we might race against the POLLHUP - */ - fdput(f); #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS irqfd->consumer.token = (void *)irqfd->eventfd; irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer; @@ -423,6 +418,12 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) #endif srcu_read_unlock(&kvm->irq_srcu, idx); + + /* + * do not drop the file until the irqfd is fully initialized, otherwise + * we might race against the POLLHUP + */ + fdput(f); return 0; fail: From a89f83823b97b6da1ecf7a51184b28822e78cc07 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Thu, 2 Aug 2018 00:03:40 -0400 Subject: [PATCH 420/519] ext4: fix spectre gadget in ext4_mb_regular_allocator() commit 1a5d5e5d51e75a5bca67dadbcea8c841934b7b85 upstream. 'ac->ac_g_ex.fe_len' is a user-controlled value which is used in the derivation of 'ac->ac_2order'. 'ac->ac_2order', in turn, is used to index arrays which makes it a potential spectre gadget. Fix this by sanitizing the value assigned to 'ac->ac2_order'. This covers the following accesses found with the help of smatch: * fs/ext4/mballoc.c:1896 ext4_mb_simple_scan_group() warn: potential spectre issue 'grp->bb_counters' [w] (local cap) * fs/ext4/mballoc.c:445 mb_find_buddy() warn: potential spectre issue 'EXT4_SB(e4b->bd_sb)->s_mb_offsets' [r] (local cap) * fs/ext4/mballoc.c:446 mb_find_buddy() warn: potential spectre issue 'EXT4_SB(e4b->bd_sb)->s_mb_maxs' [r] (local cap) Suggested-by: Josh Poimboeuf Signed-off-by: Jeremy Cline Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/mballoc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 75f79ff29ce0..828b4c080c38 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -2144,7 +2145,8 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) * This should tell if fe_len is exactly power of 2 */ if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0) - ac->ac_2order = i - 1; + ac->ac_2order = array_index_nospec(i - 1, + sb->s_blocksize_bits + 2); } /* if stream allocation is enabled, use global goal */ From 49b3acf7ed1997af70ab95d95995eb2a1a6fdf93 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Sun, 12 Aug 2018 16:38:03 -0400 Subject: [PATCH 421/519] parisc: Remove ordered stores from syscall.S MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7797167ffde1f00446301cb22b37b7c03194cfaf upstream. Now that we use a sync prior to releasing the locks in syscall.S, we don't need the PA 2.0 ordered stores used to release some locks.  Using an ordered store, potentially slows the release and subsequent code. There are a number of other ordered stores and loads that serve no purpose.  I have converted these to normal stores. Signed-off-by: John David Anglin Cc: stable@vger.kernel.org # 4.0+ Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/syscall.S | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index f68eedc72484..dd44022c3ae3 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -627,12 +627,12 @@ cas_action: stw %r1, 4(%sr2,%r20) #endif /* The load and store could fail */ -1: ldw,ma 0(%r26), %r28 +1: ldw 0(%r26), %r28 sub,<> %r28, %r25, %r0 -2: stw,ma %r24, 0(%r26) +2: stw %r24, 0(%r26) /* Free lock */ sync - stw,ma %r20, 0(%sr2,%r20) + stw %r20, 0(%sr2,%r20) #if ENABLE_LWS_DEBUG /* Clear thread register indicator */ stw %r0, 4(%sr2,%r20) @@ -796,30 +796,30 @@ cas2_action: ldo 1(%r0),%r28 /* 8bit CAS */ -13: ldb,ma 0(%r26), %r29 +13: ldb 0(%r26), %r29 sub,= %r29, %r25, %r0 b,n cas2_end -14: stb,ma %r24, 0(%r26) +14: stb %r24, 0(%r26) b cas2_end copy %r0, %r28 nop nop /* 16bit CAS */ -15: ldh,ma 0(%r26), %r29 +15: ldh 0(%r26), %r29 sub,= %r29, %r25, %r0 b,n cas2_end -16: sth,ma %r24, 0(%r26) +16: sth %r24, 0(%r26) b cas2_end copy %r0, %r28 nop nop /* 32bit CAS */ -17: ldw,ma 0(%r26), %r29 +17: ldw 0(%r26), %r29 sub,= %r29, %r25, %r0 b,n cas2_end -18: stw,ma %r24, 0(%r26) +18: stw %r24, 0(%r26) b cas2_end copy %r0, %r28 nop @@ -827,10 +827,10 @@ cas2_action: /* 64bit CAS */ #ifdef CONFIG_64BIT -19: ldd,ma 0(%r26), %r29 +19: ldd 0(%r26), %r29 sub,*= %r29, %r25, %r0 b,n cas2_end -20: std,ma %r24, 0(%r26) +20: std %r24, 0(%r26) copy %r0, %r28 #else /* Compare first word */ @@ -849,7 +849,7 @@ cas2_action: cas2_end: /* Free lock */ sync - stw,ma %r20, 0(%sr2,%r20) + stw %r20, 0(%sr2,%r20) /* Enable interrupts */ ssm PSW_SM_I, %r0 /* Return to userspace, set no error */ From 3e6170d014af6d3e9608987a0dee6e7f01c074b3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 18 Jun 2018 21:35:07 -0700 Subject: [PATCH 422/519] xfrm_user: prevent leaking 2 bytes of kernel memory commit 45c180bc29babbedd6b8c01b975780ef44d9d09c upstream. struct xfrm_userpolicy_type has two holes, so we should not use C99 style initializer. KMSAN report: BUG: KMSAN: kernel-infoleak in copyout lib/iov_iter.c:140 [inline] BUG: KMSAN: kernel-infoleak in _copy_to_iter+0x1b14/0x2800 lib/iov_iter.c:571 CPU: 1 PID: 4520 Comm: syz-executor841 Not tainted 4.17.0+ #5 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x185/0x1d0 lib/dump_stack.c:113 kmsan_report+0x188/0x2a0 mm/kmsan/kmsan.c:1117 kmsan_internal_check_memory+0x138/0x1f0 mm/kmsan/kmsan.c:1211 kmsan_copy_to_user+0x7a/0x160 mm/kmsan/kmsan.c:1253 copyout lib/iov_iter.c:140 [inline] _copy_to_iter+0x1b14/0x2800 lib/iov_iter.c:571 copy_to_iter include/linux/uio.h:106 [inline] skb_copy_datagram_iter+0x422/0xfa0 net/core/datagram.c:431 skb_copy_datagram_msg include/linux/skbuff.h:3268 [inline] netlink_recvmsg+0x6f1/0x1900 net/netlink/af_netlink.c:1959 sock_recvmsg_nosec net/socket.c:802 [inline] sock_recvmsg+0x1d6/0x230 net/socket.c:809 ___sys_recvmsg+0x3fe/0x810 net/socket.c:2279 __sys_recvmmsg+0x58e/0xe30 net/socket.c:2391 do_sys_recvmmsg+0x2a6/0x3e0 net/socket.c:2472 __do_sys_recvmmsg net/socket.c:2485 [inline] __se_sys_recvmmsg net/socket.c:2481 [inline] __x64_sys_recvmmsg+0x15d/0x1c0 net/socket.c:2481 do_syscall_64+0x15b/0x230 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x446ce9 RSP: 002b:00007fc307918db8 EFLAGS: 00000293 ORIG_RAX: 000000000000012b RAX: ffffffffffffffda RBX: 00000000006dbc24 RCX: 0000000000446ce9 RDX: 000000000000000a RSI: 0000000020005040 RDI: 0000000000000003 RBP: 00000000006dbc20 R08: 0000000020004e40 R09: 0000000000000000 R10: 0000000040000000 R11: 0000000000000293 R12: 0000000000000000 R13: 00007ffc8d2df32f R14: 00007fc3079199c0 R15: 0000000000000001 Uninit was stored to memory at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:279 [inline] kmsan_save_stack mm/kmsan/kmsan.c:294 [inline] kmsan_internal_chain_origin+0x12b/0x210 mm/kmsan/kmsan.c:685 kmsan_memcpy_origins+0x11d/0x170 mm/kmsan/kmsan.c:527 __msan_memcpy+0x109/0x160 mm/kmsan/kmsan_instr.c:413 __nla_put lib/nlattr.c:569 [inline] nla_put+0x276/0x340 lib/nlattr.c:627 copy_to_user_policy_type net/xfrm/xfrm_user.c:1678 [inline] dump_one_policy+0xbe1/0x1090 net/xfrm/xfrm_user.c:1708 xfrm_policy_walk+0x45a/0xd00 net/xfrm/xfrm_policy.c:1013 xfrm_dump_policy+0x1c0/0x2a0 net/xfrm/xfrm_user.c:1749 netlink_dump+0x9b5/0x1550 net/netlink/af_netlink.c:2226 __netlink_dump_start+0x1131/0x1270 net/netlink/af_netlink.c:2323 netlink_dump_start include/linux/netlink.h:214 [inline] xfrm_user_rcv_msg+0x8a3/0x9b0 net/xfrm/xfrm_user.c:2577 netlink_rcv_skb+0x37e/0x600 net/netlink/af_netlink.c:2448 xfrm_netlink_rcv+0xb2/0xf0 net/xfrm/xfrm_user.c:2598 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0x1680/0x1750 net/netlink/af_netlink.c:1336 netlink_sendmsg+0x104f/0x1350 net/netlink/af_netlink.c:1901 sock_sendmsg_nosec net/socket.c:629 [inline] sock_sendmsg net/socket.c:639 [inline] ___sys_sendmsg+0xec8/0x1320 net/socket.c:2117 __sys_sendmsg net/socket.c:2155 [inline] __do_sys_sendmsg net/socket.c:2164 [inline] __se_sys_sendmsg net/socket.c:2162 [inline] __x64_sys_sendmsg+0x331/0x460 net/socket.c:2162 do_syscall_64+0x15b/0x230 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Local variable description: ----upt.i@dump_one_policy Variable was created at: dump_one_policy+0x78/0x1090 net/xfrm/xfrm_user.c:1689 xfrm_policy_walk+0x45a/0xd00 net/xfrm/xfrm_policy.c:1013 Byte 130 of 137 is uninitialized Memory access starts at ffff88019550407f Fixes: c0144beaeca42 ("[XFRM] netlink: Use nla_put()/NLA_PUT() variantes") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Steffen Klassert Cc: Herbert Xu Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_user.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 90270d7110a3..78c40bb681b9 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1624,9 +1624,11 @@ static inline size_t userpolicy_type_attrsize(void) #ifdef CONFIG_XFRM_SUB_POLICY static int copy_to_user_policy_type(u8 type, struct sk_buff *skb) { - struct xfrm_userpolicy_type upt = { - .type = type, - }; + struct xfrm_userpolicy_type upt; + + /* Sadly there are two holes in struct xfrm_userpolicy_type */ + memset(&upt, 0, sizeof(upt)); + upt.type = type; return nla_put(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt); } From 76cb5cc66114d2758796198fca7f3387a6f24b75 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 17 Jul 2018 21:03:15 +0200 Subject: [PATCH 423/519] netfilter: conntrack: dccp: treat SYNC/SYNCACK as invalid if no prior state commit 6613b6173dee098997229caf1f3b961c49da75e6 upstream. When first DCCP packet is SYNC or SYNCACK, we insert a new conntrack that has an un-initialized timeout value, i.e. such entry could be reaped at any time. Mark them as INVALID and only ignore SYNC/SYNCACK when connection had an old state. Reported-by: syzbot+6f18401420df260e37ed@syzkaller.appspotmail.com Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_conntrack_proto_dccp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index fce1b1cca32d..99d0e9261a64 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -244,14 +244,14 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] = * We currently ignore Sync packets * * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG, + sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, }, [DCCP_PKT_SYNCACK] = { /* * We currently ignore SyncAck packets * * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG, + sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, }, }, [CT_DCCP_ROLE_SERVER] = { @@ -372,14 +372,14 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] = * We currently ignore Sync packets * * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG, + sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, }, [DCCP_PKT_SYNCACK] = { /* * We currently ignore SyncAck packets * * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG, + sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, }, }, }; From 62c4e369c9b98480a4b75b3a74a962a6b298120b Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 6 Aug 2018 10:38:34 -0400 Subject: [PATCH 424/519] packet: refine ring v3 block size test to hold one frame commit 4576cd469d980317c4edd9173f8b694aa71ea3a3 upstream. TPACKET_V3 stores variable length frames in fixed length blocks. Blocks must be able to store a block header, optional private space and at least one minimum sized frame. Frames, even for a zero snaplen packet, store metadata headers and optional reserved space. In the block size bounds check, ensure that the frame of the chosen configuration fits. This includes sockaddr_ll and optional tp_reserve. Syzbot was able to construct a ring with insuffient room for the sockaddr_ll in the header of a zero-length frame, triggering an out-of-bounds write in dev_parse_header. Convert the comparison to less than, as zero is a valid snap len. This matches the test for minimum tp_frame_size immediately below. Fixes: f6fb8f100b80 ("af-packet: TPACKET_V3 flexible buffer implementation.") Fixes: eb73190f4fbe ("net/packet: refine check for priv area size") Reported-by: syzbot Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8de4e6620c1c..07668f152a3a 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4176,6 +4176,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, } if (req->tp_block_nr) { + unsigned int min_frame_size; + /* Sanity tests and some calculations */ err = -EBUSY; if (unlikely(rb->pg_vec)) @@ -4198,12 +4200,12 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, goto out; if (unlikely(!PAGE_ALIGNED(req->tp_block_size))) goto out; + min_frame_size = po->tp_hdrlen + po->tp_reserve; if (po->tp_version >= TPACKET_V3 && - req->tp_block_size <= - BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv) + sizeof(struct tpacket3_hdr)) + req->tp_block_size < + BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv) + min_frame_size) goto out; - if (unlikely(req->tp_frame_size < po->tp_hdrlen + - po->tp_reserve)) + if (unlikely(req->tp_frame_size < min_frame_size)) goto out; if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1))) goto out; From 6d124ea608ac800f46100741f7ccd79791c061c8 Mon Sep 17 00:00:00 2001 From: Elad Raz Date: Wed, 6 Jan 2016 13:01:04 +0100 Subject: [PATCH 425/519] bridge: Propagate vlan add failure to user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 08474cc1e6ea71237cab7e4a651a623c9dea1084 upstream. Disallow adding interfaces to a bridge when vlan filtering operation failed. Send the failure code to the user. Signed-off-by: Elad Raz Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: SZ Lin (林上智) Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_if.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 3400b1e47668..50e84e634dfe 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -511,8 +511,11 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (br_fdb_insert(br, p, dev->dev_addr, 0)) netdev_err(dev, "failed insert local address bridge forwarding table\n"); - if (nbp_vlan_init(p)) + err = nbp_vlan_init(p); + if (err) { netdev_err(dev, "failed to initialize vlan filtering on this port\n"); + goto err6; + } spin_lock_bh(&br->lock); changed_addr = br_stp_recalculate_bridge_id(br); @@ -533,6 +536,12 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) return 0; +err6: + list_del_rcu(&p->list); + br_fdb_delete_by_port(br, p, 0, 1); + nbp_update_port_count(br); + netdev_upper_dev_unlink(dev, br->dev); + err5: dev->priv_flags &= ~IFF_BRIDGE_PORT; netdev_rx_handler_unregister(dev); From 400db6fe74317d64c920025ed4de2de7b3522230 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Sun, 12 Aug 2018 16:31:17 -0400 Subject: [PATCH 426/519] parisc: Remove unnecessary barriers from spinlock.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3b885ac1dc35b87a39ee176a6c7e2af9c789d8b8 upstream. Now that mb() is an instruction barrier, it will slow performance if we issue unnecessary barriers. The spinlock defines have a number of unnecessary barriers.  The __ldcw() define is both a hardware and compiler barrier.  The mb() barriers in the routines using __ldcw() serve no purpose. The only barrier needed is the one in arch_spin_unlock().  We need to ensure all accesses are complete prior to releasing the lock. Signed-off-by: John David Anglin Cc: stable@vger.kernel.org # 4.0+ Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/asm/spinlock.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h index 64f2992e439f..617efa845054 100644 --- a/arch/parisc/include/asm/spinlock.h +++ b/arch/parisc/include/asm/spinlock.h @@ -21,7 +21,6 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *x, { volatile unsigned int *a; - mb(); a = __ldcw_align(x); while (__ldcw(a) == 0) while (*a == 0) @@ -31,16 +30,15 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *x, local_irq_disable(); } else cpu_relax(); - mb(); } static inline void arch_spin_unlock(arch_spinlock_t *x) { volatile unsigned int *a; - mb(); + a = __ldcw_align(x); - *a = 1; mb(); + *a = 1; } static inline int arch_spin_trylock(arch_spinlock_t *x) @@ -48,10 +46,8 @@ static inline int arch_spin_trylock(arch_spinlock_t *x) volatile unsigned int *a; int ret; - mb(); a = __ldcw_align(x); ret = __ldcw(a) != 0; - mb(); return ret; } From 8837163ebeba0ab5cd82d8eb284060e0e3cb4a35 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 19 Jul 2018 17:27:31 -0500 Subject: [PATCH 427/519] PCI: hotplug: Don't leak pci_slot on registration failure commit 4ce6435820d1f1cc2c2788e232735eb244bcc8a3 upstream. If addition of sysfs files fails on registration of a hotplug slot, the struct pci_slot as well as the entry in the slot_list is leaked. The issue has been present since the hotplug core was introduced in 2002: https://git.kernel.org/tglx/history/c/a8a2069f432c Perhaps the idea was that even though sysfs addition fails, the slot should still be usable. But that's not how drivers use the interface, they abort probe if a non-zero value is returned. Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # v2.4.15+ Cc: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/pci/hotplug/pci_hotplug_core.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c index d1fab97d6b01..6ce2a73fe0e4 100644 --- a/drivers/pci/hotplug/pci_hotplug_core.c +++ b/drivers/pci/hotplug/pci_hotplug_core.c @@ -457,8 +457,17 @@ int __pci_hp_register(struct hotplug_slot *slot, struct pci_bus *bus, list_add(&slot->slot_list, &pci_hotplug_slot_list); result = fs_add_slot(pci_slot); + if (result) + goto err_list_del; + kobject_uevent(&pci_slot->kobj, KOBJ_ADD); dbg("Added slot %s to the list\n", name); + goto out; + +err_list_del: + list_del(&slot->slot_list); + pci_slot->hotplug = NULL; + pci_destroy_slot(pci_slot); out: mutex_unlock(&pci_hp_mutex); return result; From cc7614a5e8ec4514aa27ee3874ad05a1057e644d Mon Sep 17 00:00:00 2001 From: Myron Stowe Date: Mon, 13 Aug 2018 12:19:39 -0600 Subject: [PATCH 428/519] PCI: Skip MPS logic for Virtual Functions (VFs) commit 3dbe97efe8bf450b183d6dee2305cbc032e6b8a4 upstream. PCIe r4.0, sec 9.3.5.4, "Device Control Register", shows both Max_Payload_Size (MPS) and Max_Read_request_Size (MRRS) to be 'RsvdP' for VFs. Just prior to the table it states: "PF and VF functionality is defined in Section 7.5.3.4 except where noted in Table 9-16. For VF fields marked 'RsvdP', the PF setting applies to the VF." All of which implies that with respect to Max_Payload_Size Supported (MPSS), MPS, and MRRS values, we should not be paying any attention to the VF's fields, but rather only to the PF's. Only looking at the PF's fields also logically makes sense as it's the sole physical interface to the PCIe bus. Link: https://bugzilla.kernel.org/show_bug.cgi?id=200527 Fixes: 27d868b5e6cf ("PCI: Set MPS to match upstream bridge") Signed-off-by: Myron Stowe Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # 4.3+ Cc: Keith Busch Cc: Sinan Kaya Cc: Dongdong Liu Cc: Jon Mason Signed-off-by: Greg Kroah-Hartman --- drivers/pci/probe.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 566897f24dee..5f040619393f 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1338,6 +1338,10 @@ static void pci_configure_mps(struct pci_dev *dev) if (!pci_is_pcie(dev) || !bridge || !pci_is_pcie(bridge)) return; + /* MPS and MRRS fields are of type 'RsvdP' for VFs, short-circuit out */ + if (dev->is_virtfn) + return; + mps = pcie_get_mps(dev); p_mps = pcie_get_mps(bridge); From 131412f4f6f52b72c3a099c9cdac5d9c6034c76c Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 19 Jul 2018 17:27:32 -0500 Subject: [PATCH 429/519] PCI: pciehp: Fix use-after-free on unplug commit 281e878eab191cce4259abbbf1a0322e3adae02c upstream. When pciehp is unbound (e.g. on unplug of a Thunderbolt device), the hotplug_slot struct is deregistered and thus freed before freeing the IRQ. The IRQ handler and the work items it schedules print the slot name referenced from the freed structure in various informational and debug log messages, each time resulting in a quadruple dereference of freed pointers (hotplug_slot -> pci_slot -> kobject -> name). At best the slot name is logged as "(null)", at worst kernel memory is exposed in logs or the driver crashes: pciehp 0000:10:00.0:pcie204: Slot((null)): Card not present An attacker may provoke the bug by unplugging multiple devices on a Thunderbolt daisy chain at once. Unplugging can also be simulated by powering down slots via sysfs. The bug is particularly easy to trigger in poll mode. It has been present since the driver's introduction in 2004: https://git.kernel.org/tglx/history/c/c16b4b14d980 Fix by rearranging teardown such that the IRQ is freed first. Run the work items queued by the IRQ handler to completion before freeing the hotplug_slot struct by draining the work queue from the ->release_slot callback which is invoked by pci_hp_deregister(). Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # v2.6.4 Signed-off-by: Greg Kroah-Hartman --- drivers/pci/hotplug/pciehp.h | 1 + drivers/pci/hotplug/pciehp_core.c | 7 +++++++ drivers/pci/hotplug/pciehp_hpc.c | 5 ++--- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index cbe58480b474..6b0f7e0d7dbd 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -132,6 +132,7 @@ int pciehp_unconfigure_device(struct slot *p_slot); void pciehp_queue_pushbutton_work(struct work_struct *work); struct controller *pcie_init(struct pcie_device *dev); int pcie_init_notification(struct controller *ctrl); +void pcie_shutdown_notification(struct controller *ctrl); int pciehp_enable_slot(struct slot *p_slot); int pciehp_disable_slot(struct slot *p_slot); void pcie_reenable_notification(struct controller *ctrl); diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index 8f6ded43760a..47cc3568514e 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -77,6 +77,12 @@ static int reset_slot (struct hotplug_slot *slot, int probe); */ static void release_slot(struct hotplug_slot *hotplug_slot) { + struct slot *slot = hotplug_slot->private; + + /* queued work needs hotplug_slot name */ + cancel_delayed_work(&slot->work); + drain_workqueue(slot->wq); + kfree(hotplug_slot->ops); kfree(hotplug_slot->info); kfree(hotplug_slot); @@ -276,6 +282,7 @@ static void pciehp_remove(struct pcie_device *dev) { struct controller *ctrl = get_service_data(dev); + pcie_shutdown_notification(ctrl); cleanup_slot(ctrl); pciehp_release_ctrl(ctrl); } diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 63c6c7fce3eb..cd982778a6b8 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -741,7 +741,7 @@ int pcie_init_notification(struct controller *ctrl) return 0; } -static void pcie_shutdown_notification(struct controller *ctrl) +void pcie_shutdown_notification(struct controller *ctrl) { if (ctrl->notification_enabled) { pcie_disable_notification(ctrl); @@ -776,7 +776,7 @@ static int pcie_init_slot(struct controller *ctrl) static void pcie_cleanup_slot(struct controller *ctrl) { struct slot *slot = ctrl->slot; - cancel_delayed_work(&slot->work); + destroy_workqueue(slot->wq); kfree(slot); } @@ -853,7 +853,6 @@ struct controller *pcie_init(struct pcie_device *dev) void pciehp_release_ctrl(struct controller *ctrl) { - pcie_shutdown_notification(ctrl); pcie_cleanup_slot(ctrl); kfree(ctrl); } From 6e57e6c67fd4b568b180fdbd5c14043d39fe6cda Mon Sep 17 00:00:00 2001 From: Esben Haabendal Date: Thu, 16 Aug 2018 10:43:12 +0200 Subject: [PATCH 430/519] i2c: imx: Fix race condition in dma read MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit bed4ff1ed4d8f2ef5007c5c6ae1b29c5677a3632 upstream. This fixes a race condition, where the DMAEN bit ends up being set after I2C slave has transmitted a byte following the dummy read. When that happens, an interrupt is generated instead, and no DMA request is generated to kickstart the DMA read, and a timeout happens after DMA_TIMEOUT (1 sec). Fixed by setting the DMAEN bit before the dummy read. Signed-off-by: Esben Haabendal Acked-by: Uwe Kleine-König Signed-off-by: Wolfram Sang Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-imx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index a4abf7dc9576..cf1b57a054d0 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -677,9 +677,6 @@ static int i2c_imx_dma_read(struct imx_i2c_struct *i2c_imx, struct imx_i2c_dma *dma = i2c_imx->dma; struct device *dev = &i2c_imx->adapter.dev; - temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR); - temp |= I2CR_DMAEN; - imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR); dma->chan_using = dma->chan_rx; dma->dma_transfer_dir = DMA_DEV_TO_MEM; @@ -792,6 +789,7 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs, bo int i, result; unsigned int temp; int block_data = msgs->flags & I2C_M_RECV_LEN; + int use_dma = i2c_imx->dma && msgs->len >= DMA_THRESHOLD && !block_data; dev_dbg(&i2c_imx->adapter.dev, "<%s> write slave address: addr=0x%x\n", @@ -818,12 +816,14 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs, bo */ if ((msgs->len - 1) || block_data) temp &= ~I2CR_TXAK; + if (use_dma) + temp |= I2CR_DMAEN; imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR); imx_i2c_read_reg(i2c_imx, IMX_I2C_I2DR); /* dummy read */ dev_dbg(&i2c_imx->adapter.dev, "<%s> read data\n", __func__); - if (i2c_imx->dma && msgs->len >= DMA_THRESHOLD && !block_data) + if (use_dma) return i2c_imx_dma_read(i2c_imx, msgs, is_lastmsg); /* read data */ From 712254045c02edf3dc21714337a23bf361d0c5ee Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 21 Aug 2018 21:59:37 -0700 Subject: [PATCH 431/519] reiserfs: fix broken xattr handling (heap corruption, bad retval) commit a13f085d111e90469faf2d9965eb39b11c114d7e upstream. This fixes the following issues: - When a buffer size is supplied to reiserfs_listxattr() such that each individual name fits, but the concatenation of all names doesn't fit, reiserfs_listxattr() overflows the supplied buffer. This leads to a kernel heap overflow (verified using KASAN) followed by an out-of-bounds usercopy and is therefore a security bug. - When a buffer size is supplied to reiserfs_listxattr() such that a name doesn't fit, -ERANGE should be returned. But reiserfs instead just truncates the list of names; I have verified that if the only xattr on a file has a longer name than the supplied buffer length, listxattr() incorrectly returns zero. With my patch applied, -ERANGE is returned in both cases and the memory corruption doesn't happen anymore. Credit for making me clean this code up a bit goes to Al Viro, who pointed out that the ->actor calling convention is suboptimal and should be changed. Link: http://lkml.kernel.org/r/20180802151539.5373-1-jannh@google.com Fixes: 48b32a3553a5 ("reiserfs: use generic xattr handlers") Signed-off-by: Jann Horn Acked-by: Jeff Mahoney Cc: Eric Biggers Cc: Al Viro Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/reiserfs/xattr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index a8dbc93e45eb..8b32fdaad468 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -791,8 +791,10 @@ static int listxattr_filler(struct dir_context *ctx, const char *name, size = handler->list(handler, b->dentry, b->buf + b->pos, b->size, name, namelen); - if (size > b->size) + if (b->pos + size > b->size) { + b->pos = -ERANGE; return -ERANGE; + } } else { size = handler->list(handler, b->dentry, NULL, 0, name, namelen); From 0c73169690eb1d7d6f72a128a010bd84343e503a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Aug 2018 13:27:02 +0200 Subject: [PATCH 432/519] Linux 4.4.152 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 04199cf99dd5..523b0d4354fb 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 151 +SUBLEVEL = 152 EXTRAVERSION = NAME = Blurry Fish Butt From adaba23ccd7d1625942f2c27612d2b416c87e011 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Aug 2018 06:50:15 -0700 Subject: [PATCH 433/519] x86/mm/pat: Fix L1TF stable backport for CPA Patch for stable only to fix boot resets caused by the L1TF patches. Stable trees reverted the following patch Revert "x86/mm/pat: Ensure cpa->pfn only contains page frame numbers" This reverts commit 87e2bd898d3a79a8c609f183180adac47879a2a4 which is commit edc3b9129cecd0f0857112136f5b8b1bc1d45918 upstream. but the L1TF patch backported here x86/mm/pat: Make set_memory_np() L1TF safe commit 958f79b9ee55dfaf00c8106ed1c22a2919e0028b upstream set_memory_np() is used to mark kernel mappings not present, but it has it's own open coded mechanism which does not have the L1TF protection of inverting the address bits. assumed that cpa->pfn contains a PFN. With the above patch reverted it does not, which causes the PMD to be set to an incorrect address shifted by 12 bits, which can cause early boot reset on some systems, like an Apollo Lake embedded system. Convert the address to a PFN before passing it to pmd_pfn() Thanks to Bernhard for bisecting and testing. Cc: stable@vger.kernel.org # 4.4 and 4.9 Reported-by: Bernhard Kaindl Tested-by: Bernhard Kaindl Signed-off-by: Andi Kleen Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/pageattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 27610c2d1821..1007fa80f5a6 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1006,7 +1006,7 @@ static int populate_pmd(struct cpa_data *cpa, pmd = pmd_offset(pud, start); - set_pmd(pmd, pmd_mkhuge(pfn_pmd(cpa->pfn, + set_pmd(pmd, pmd_mkhuge(pfn_pmd(cpa->pfn >> PAGE_SHIFT, canon_pgprot(pmd_pgprot)))); start += PMD_SIZE; From f9866720724db8a163cf305fc907cdab0b38fa09 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 24 Aug 2017 10:50:29 -0700 Subject: [PATCH 434/519] x86/mm: Fix use-after-free of ldt_struct commit ccd5b3235180eef3cfec337df1c8554ab151b5cc upstream. The following commit: 39a0526fb3f7 ("x86/mm: Factor out LDT init from context init") renamed init_new_context() to init_new_context_ldt() and added a new init_new_context() which calls init_new_context_ldt(). However, the error code of init_new_context_ldt() was ignored. Consequently, if a memory allocation in alloc_ldt_struct() failed during a fork(), the ->context.ldt of the new task remained the same as that of the old task (due to the memcpy() in dup_mm()). ldt_struct's are not intended to be shared, so a use-after-free occurred after one task exited. Fix the bug by making init_new_context() pass through the error code of init_new_context_ldt(). This bug was found by syzkaller, which encountered the following splat: BUG: KASAN: use-after-free in free_ldt_struct.part.2+0x10a/0x150 arch/x86/kernel/ldt.c:116 Read of size 4 at addr ffff88006d2cb7c8 by task kworker/u9:0/3710 CPU: 1 PID: 3710 Comm: kworker/u9:0 Not tainted 4.13.0-rc4-next-20170811 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 print_address_description+0x73/0x250 mm/kasan/report.c:252 kasan_report_error mm/kasan/report.c:351 [inline] kasan_report+0x24e/0x340 mm/kasan/report.c:409 __asan_report_load4_noabort+0x14/0x20 mm/kasan/report.c:429 free_ldt_struct.part.2+0x10a/0x150 arch/x86/kernel/ldt.c:116 free_ldt_struct arch/x86/kernel/ldt.c:173 [inline] destroy_context_ldt+0x60/0x80 arch/x86/kernel/ldt.c:171 destroy_context arch/x86/include/asm/mmu_context.h:157 [inline] __mmdrop+0xe9/0x530 kernel/fork.c:889 mmdrop include/linux/sched/mm.h:42 [inline] exec_mmap fs/exec.c:1061 [inline] flush_old_exec+0x173c/0x1ff0 fs/exec.c:1291 load_elf_binary+0x81f/0x4ba0 fs/binfmt_elf.c:855 search_binary_handler+0x142/0x6b0 fs/exec.c:1652 exec_binprm fs/exec.c:1694 [inline] do_execveat_common.isra.33+0x1746/0x22e0 fs/exec.c:1816 do_execve+0x31/0x40 fs/exec.c:1860 call_usermodehelper_exec_async+0x457/0x8f0 kernel/umh.c:100 ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:431 Allocated by task 3700: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 [inline] kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551 kmem_cache_alloc_trace+0x136/0x750 mm/slab.c:3627 kmalloc include/linux/slab.h:493 [inline] alloc_ldt_struct+0x52/0x140 arch/x86/kernel/ldt.c:67 write_ldt+0x7b7/0xab0 arch/x86/kernel/ldt.c:277 sys_modify_ldt+0x1ef/0x240 arch/x86/kernel/ldt.c:307 entry_SYSCALL_64_fastpath+0x1f/0xbe Freed by task 3700: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 [inline] kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524 __cache_free mm/slab.c:3503 [inline] kfree+0xca/0x250 mm/slab.c:3820 free_ldt_struct.part.2+0xdd/0x150 arch/x86/kernel/ldt.c:121 free_ldt_struct arch/x86/kernel/ldt.c:173 [inline] destroy_context_ldt+0x60/0x80 arch/x86/kernel/ldt.c:171 destroy_context arch/x86/include/asm/mmu_context.h:157 [inline] __mmdrop+0xe9/0x530 kernel/fork.c:889 mmdrop include/linux/sched/mm.h:42 [inline] __mmput kernel/fork.c:916 [inline] mmput+0x541/0x6e0 kernel/fork.c:927 copy_process.part.36+0x22e1/0x4af0 kernel/fork.c:1931 copy_process kernel/fork.c:1546 [inline] _do_fork+0x1ef/0xfb0 kernel/fork.c:2025 SYSC_clone kernel/fork.c:2135 [inline] SyS_clone+0x37/0x50 kernel/fork.c:2129 do_syscall_64+0x26c/0x8c0 arch/x86/entry/common.c:287 return_from_SYSCALL_64+0x0/0x7a Here is a C reproducer: #include #include #include #include #include #include #include static void *fork_thread(void *_arg) { fork(); } int main(void) { struct user_desc desc = { .entry_number = 8191 }; syscall(__NR_modify_ldt, 1, &desc, sizeof(desc)); for (;;) { if (fork() == 0) { pthread_t t; srand(getpid()); pthread_create(&t, NULL, fork_thread, NULL); usleep(rand() % 10000); syscall(__NR_exit_group, 0); } wait(NULL); } } Note: the reproducer takes advantage of the fact that alloc_ldt_struct() may use vmalloc() to allocate a large ->entries array, and after commit: 5d17a73a2ebe ("vmalloc: back off when the current task is killed") it is possible for userspace to fail a task's vmalloc() by sending a fatal signal, e.g. via exit_group(). It would be more difficult to reproduce this bug on kernels without that commit. This bug only affected kernels with CONFIG_MODIFY_LDT_SYSCALL=y. Signed-off-by: Eric Biggers Acked-by: Dave Hansen Cc: [v4.6+] Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Christoph Hellwig Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: Linus Torvalds Cc: Michal Hocko Cc: Peter Zijlstra Cc: Rik van Riel Cc: Tetsuo Handa Cc: Thomas Gleixner Cc: linux-mm@kvack.org Fixes: 39a0526fb3f7 ("x86/mm: Factor out LDT init from context init") Link: http://lkml.kernel.org/r/20170824175029.76040-1-ebiggers3@gmail.com Signed-off-by: Ingo Molnar Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mmu_context.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index effc12767cbf..d8d19fe99e45 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -109,8 +109,7 @@ static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); - init_new_context_ldt(tsk, mm); - return 0; + return init_new_context_ldt(tsk, mm); } static inline void destroy_context(struct mm_struct *mm) { From d5e678942de33a5d8545a8b7c825eb93b57be1a9 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Mon, 22 Feb 2016 09:28:34 -0500 Subject: [PATCH 435/519] ovl: Ensure upper filesystem supports d_type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 45aebeaf4f67468f76bedf62923a576a519a9b68 upstream. In some instances xfs has been created with ftype=0 and there if a file on lower fs is removed, overlay leaves a whiteout in upper fs but that whiteout does not get filtered out and is visible to overlayfs users. And reason it does not get filtered out because upper filesystem does not report file type of whiteout as DT_CHR during iterate_dir(). So it seems to be a requirement that upper filesystem support d_type for overlayfs to work properly. Do this check during mount and fail if d_type is not supported. Suggested-by: Dave Chinner Signed-off-by: Vivek Goyal Signed-off-by: Miklos Szeredi Signed-off-by: SZ Lin (林上智) Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/overlayfs.h | 1 + fs/overlayfs/readdir.c | 37 +++++++++++++++++++++++++++++++++++++ fs/overlayfs/super.c | 15 +++++++++++++++ 3 files changed, 53 insertions(+) diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index c319d5eaabcf..28316b292b8a 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -163,6 +163,7 @@ extern const struct file_operations ovl_dir_operations; int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list); void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list); void ovl_cache_free(struct list_head *list); +int ovl_check_d_type_supported(struct path *realpath); /* inode.c */ int ovl_setattr(struct dentry *dentry, struct iattr *attr); diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 299a6e1d6b77..0c59955c4653 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -43,6 +43,7 @@ struct ovl_readdir_data { struct ovl_cache_entry *first_maybe_whiteout; int count; int err; + bool d_type_supported; }; struct ovl_dir_file { @@ -581,3 +582,39 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list) } mutex_unlock(&upper->d_inode->i_mutex); } + +static int ovl_check_d_type(struct dir_context *ctx, const char *name, + int namelen, loff_t offset, u64 ino, + unsigned int d_type) +{ + struct ovl_readdir_data *rdd = + container_of(ctx, struct ovl_readdir_data, ctx); + + /* Even if d_type is not supported, DT_DIR is returned for . and .. */ + if (!strncmp(name, ".", namelen) || !strncmp(name, "..", namelen)) + return 0; + + if (d_type != DT_UNKNOWN) + rdd->d_type_supported = true; + + return 0; +} + +/* + * Returns 1 if d_type is supported, 0 not supported/unknown. Negative values + * if error is encountered. + */ +int ovl_check_d_type_supported(struct path *realpath) +{ + int err; + struct ovl_readdir_data rdd = { + .ctx.actor = ovl_check_d_type, + .d_type_supported = false, + }; + + err = ovl_dir_read(realpath, &rdd); + if (err) + return err; + + return rdd.d_type_supported; +} diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index d70208c0de84..2de4e3a7d6e7 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1054,6 +1054,21 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) sb->s_flags |= MS_RDONLY; ufs->workdir = NULL; } + + /* + * Upper should support d_type, else whiteouts are visible. + * Given workdir and upper are on same fs, we can do + * iterate_dir() on workdir. + */ + err = ovl_check_d_type_supported(&workpath); + if (err < 0) + goto out_put_workdir; + + if (!err) { + pr_err("overlayfs: upper fs needs to support d_type.\n"); + err = -EINVAL; + goto out_put_workdir; + } } err = -ENOMEM; From 0f9a6d88cd9f3b16a86639bd652202fe27096b18 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 20 May 2016 09:04:26 -0400 Subject: [PATCH 436/519] ovl: Do d_type check only if work dir creation was successful MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 21765194cecf2e4514ad75244df459f188140a0f upstream. d_type check requires successful creation of workdir as iterates through work dir and expects work dir to be present in it. If that's not the case, this check will always return d_type not supported even if underlying filesystem might be supporting it. So don't do this check if work dir creation failed in previous step. Signed-off-by: Vivek Goyal Signed-off-by: Miklos Szeredi Signed-off-by: SZ Lin (林上智) Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/super.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 2de4e3a7d6e7..fd21c5f74fba 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1058,16 +1058,19 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) /* * Upper should support d_type, else whiteouts are visible. * Given workdir and upper are on same fs, we can do - * iterate_dir() on workdir. + * iterate_dir() on workdir. This check requires successful + * creation of workdir in previous step. */ - err = ovl_check_d_type_supported(&workpath); - if (err < 0) - goto out_put_workdir; + if (ufs->workdir) { + err = ovl_check_d_type_supported(&workpath); + if (err < 0) + goto out_put_workdir; - if (!err) { - pr_err("overlayfs: upper fs needs to support d_type.\n"); - err = -EINVAL; - goto out_put_workdir; + if (!err) { + pr_err("overlayfs: upper fs needs to support d_type.\n"); + err = -EINVAL; + goto out_put_workdir; + } } } From 7eaa995c75bd23b57163541c3285a2c984018b7e Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 1 Jul 2016 10:02:44 -0400 Subject: [PATCH 437/519] ovl: warn instead of error if d_type is not supported MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e7c0b5991dd1be7b6f6dc2b54a15a0f47b64b007 upstream. overlay needs underlying fs to support d_type. Recently I put in a patch in to detect this condition and started failing mount if underlying fs did not support d_type. But this breaks existing configurations over kernel upgrade. Those who are running docker (partially broken configuration) with xfs not supporting d_type, are surprised that after kernel upgrade docker does not run anymore. https://github.com/docker/docker/issues/22937#issuecomment-229881315 So instead of erroring out, detect broken configuration and warn about it. This should allow existing docker setups to continue working after kernel upgrade. Signed-off-by: Vivek Goyal Signed-off-by: Miklos Szeredi Fixes: 45aebeaf4f67 ("ovl: Ensure upper filesystem supports d_type") Cc: 4.6 Signed-off-by: SZ Lin (林上智) Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/super.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index fd21c5f74fba..0035cb80ecd1 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1066,11 +1066,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (err < 0) goto out_put_workdir; - if (!err) { - pr_err("overlayfs: upper fs needs to support d_type.\n"); - err = -EINVAL; - goto out_put_workdir; - } + /* + * We allowed this configuration and don't want to + * break users over kernel upgrade. So warn instead + * of erroring out. + */ + if (!err) + pr_warn("overlayfs: upper fs needs to support d_type.\n"); } } From 577189c37a844243359afce1c3c94418259fe696 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 28 Aug 2018 07:23:44 +0200 Subject: [PATCH 438/519] Linux 4.4.153 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 523b0d4354fb..208a813be615 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 152 +SUBLEVEL = 153 EXTRAVERSION = NAME = Blurry Fish Butt From 21a24b5db1858867ffafb5ca221b352a232e15eb Mon Sep 17 00:00:00 2001 From: Ethan Zhao Date: Mon, 4 Sep 2017 13:59:34 +0800 Subject: [PATCH 439/519] sched/sysctl: Check user input value of sysctl_sched_time_avg commit 5ccba44ba118a5000cccc50076b0344632459779 upstream. System will hang if user set sysctl_sched_time_avg to 0: [root@XXX ~]# sysctl kernel.sched_time_avg_ms=0 Stack traceback for pid 0 0xffff883f6406c600 0 0 1 3 R 0xffff883f6406cf50 *swapper/3 ffff883f7ccc3ae8 0000000000000018 ffffffff810c4dd0 0000000000000000 0000000000017800 ffff883f7ccc3d78 0000000000000003 ffff883f7ccc3bf8 ffffffff810c4fc9 ffff883f7ccc3c08 00000000810c5043 ffff883f7ccc3c08 Call Trace: [] ? update_group_capacity+0x110/0x200 [] ? update_sd_lb_stats+0x109/0x600 [] ? find_busiest_group+0x47/0x530 [] ? load_balance+0x194/0x900 [] ? update_rq_clock.part.83+0x1a/0xe0 [] ? rebalance_domains+0x152/0x290 [] ? run_rebalance_domains+0xdc/0x1d0 [] ? __do_softirq+0xfb/0x320 [] ? irq_exit+0x125/0x130 [] ? scheduler_ipi+0x97/0x160 [] ? smp_reschedule_interrupt+0x29/0x30 [] ? reschedule_interrupt+0x6e/0x80 [] ? cpuidle_enter_state+0xcc/0x230 [] ? cpuidle_enter_state+0x9c/0x230 [] ? cpuidle_enter+0x17/0x20 [] ? cpu_startup_entry+0x38c/0x420 [] ? start_secondary+0x173/0x1e0 Because divide-by-zero error happens in function: update_group_capacity() update_cpu_capacity() scale_rt_capacity() { ... total = sched_avg_period() + delta; used = div_u64(avg, total); ... } To fix this issue, check user input value of sysctl_sched_time_avg, keep it unchanged when hitting invalid input, and set the minimum limit of sysctl_sched_time_avg to 1 ms. Reported-by: James Puthukattukaran Signed-off-by: Ethan Zhao Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: efault@gmx.de Cc: ethan.kernel@gmail.com Cc: keescook@chromium.org Cc: mcgrof@kernel.org Cc: Link: http://lkml.kernel.org/r/1504504774-18253-1-git-send-email-ethan.zhao@oracle.com Signed-off-by: Ingo Molnar Cc: Steve Muckle Signed-off-by: Greg Kroah-Hartman --- kernel/sysctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 17c59e78661b..66100d1bc3f7 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -342,7 +342,8 @@ static struct ctl_table kern_table[] = { .data = &sysctl_sched_time_avg, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, }, { .procname = "sched_shares_window_ns", From 8ed0ff83f5fd0ca412a52bd71af332598c01ca46 Mon Sep 17 00:00:00 2001 From: "yujuan.qi" Date: Mon, 31 Jul 2017 11:23:01 +0800 Subject: [PATCH 440/519] Cipso: cipso_v4_optptr enter infinite loop commit 40413955ee265a5e42f710940ec78f5450d49149 upstream. in for(),if((optlen > 0) && (optptr[1] == 0)), enter infinite loop. Test: receive a packet which the ip length > 20 and the first byte of ip option is 0, produce this issue Signed-off-by: yujuan.qi Acked-by: Paul Moore Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/cipso_ipv4.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 5f3b81941a6f..5169b9b36b6a 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1593,9 +1593,17 @@ unsigned char *cipso_v4_optptr(const struct sk_buff *skb) int taglen; for (optlen = iph->ihl*4 - sizeof(struct iphdr); optlen > 0; ) { - if (optptr[0] == IPOPT_CIPSO) + switch (optptr[0]) { + case IPOPT_CIPSO: return optptr; - taglen = optptr[1]; + case IPOPT_END: + return NULL; + case IPOPT_NOOP: + taglen = 1; + break; + default: + taglen = optptr[1]; + } optlen -= taglen; optptr += taglen; } From 1b8e283f8a48e1a5d34f26dc3e5dcabf9607f503 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Thu, 7 Jun 2018 10:11:02 +0300 Subject: [PATCH 441/519] vti6: fix PMTU caching and reporting on xmit [ Upstream commit d6990976af7c5d8f55903bfb4289b6fb030bf754 ] When setting the skb->dst before doing the MTU check, the route PMTU caching and reporting is done on the new dst which is about to be released. Instead, PMTU handling should be done using the original dst. This is aligned with IPv4 VTI. Fixes: ccd740cbc6 ("vti6: Add pmtu handling to vti6_xmit.") Signed-off-by: Eyal Birger Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_vti.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 40bb7a5e6d47..6aca9a6b2303 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -469,10 +469,6 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) goto tx_err_dst_release; } - skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev))); - skb_dst_set(skb, dst); - skb->dev = skb_dst(skb)->dev; - mtu = dst_mtu(dst); if (!skb->ignore_df && skb->len > mtu) { skb_dst(skb)->ops->update_pmtu(dst, NULL, skb, mtu); @@ -487,9 +483,14 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) htonl(mtu)); } - return -EMSGSIZE; + err = -EMSGSIZE; + goto tx_err_dst_release; } + skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev))); + skb_dst_set(skb, dst); + skb->dev = skb_dst(skb)->dev; + err = dst_output(t->net, skb->sk, skb); if (net_xmit_eval(err) == 0) { struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); From dbcad9a65dbb5f3efa1391f8e4b67303669f8cc0 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 21 Jun 2018 09:30:47 +0300 Subject: [PATCH 442/519] xfrm: fix missing dst_release() after policy blocking lbcast and multicast [ Upstream commit 8cc88773855f988d6a3bbf102bbd9dd9c828eb81 ] Fix missing dst_release() when local broadcast or multicast traffic is xfrm policy blocked. For IPv4 this results to dst leak: ip_route_output_flow() allocates dst_entry via __ip_route_output_key() and passes it to xfrm_lookup_route(). xfrm_lookup returns ERR_PTR(-EPERM) that is propagated. The dst that was allocated is never released. IPv4 local broadcast testcase: ping -b 192.168.1.255 & sleep 1 ip xfrm policy add src 0.0.0.0/0 dst 192.168.1.255/32 dir out action block IPv4 multicast testcase: ping 224.0.0.1 & sleep 1 ip xfrm policy add src 0.0.0.0/0 dst 224.0.0.1/32 dir out action block For IPv6 the missing dst_release() causes trouble e.g. when used in netns: ip netns add TEST ip netns exec TEST ip link set lo up ip link add dummy0 type dummy ip link set dev dummy0 netns TEST ip netns exec TEST ip addr add fd00::1111 dev dummy0 ip netns exec TEST ip link set dummy0 up ip netns exec TEST ping -6 -c 5 ff02::1%dummy0 & sleep 1 ip netns exec TEST ip xfrm policy add src ::/0 dst ff02::1 dir out action block wait ip netns del TEST After netns deletion we see: [ 258.239097] unregister_netdevice: waiting for lo to become free. Usage count = 2 [ 268.279061] unregister_netdevice: waiting for lo to become free. Usage count = 2 [ 278.367018] unregister_netdevice: waiting for lo to become free. Usage count = 2 [ 288.375259] unregister_netdevice: waiting for lo to become free. Usage count = 2 Fixes: ac37e2515c1a ("xfrm: release dst_orig in case of error in xfrm_lookup()") Signed-off-by: Tommi Rantala Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_policy.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index f9a13b67df5e..e9eecf6f0bff 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2326,6 +2326,9 @@ struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE) return make_blackhole(net, dst_orig->ops->family, dst_orig); + if (IS_ERR(dst)) + dst_release(dst_orig); + return dst; } EXPORT_SYMBOL(xfrm_lookup_route); From d9c00c89596a239996dd8659b7f4db5674e51a98 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 25 Jun 2018 14:00:07 +0200 Subject: [PATCH 443/519] xfrm: free skb if nlsk pointer is NULL [ Upstream commit 86126b77dcd551ce223e7293bb55854e3df05646 ] nlmsg_multicast() always frees the skb, so in case we cannot call it we must do that ourselves. Fixes: 21ee543edc0dea ("xfrm: fix race between netns cleanup and state expire notification") Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_user.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 78c40bb681b9..a9b4491a3cc4 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -980,10 +980,12 @@ static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb, { struct sock *nlsk = rcu_dereference(net->xfrm.nlsk); - if (nlsk) - return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC); - else - return -1; + if (!nlsk) { + kfree_skb(skb); + return -EPIPE; + } + + return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC); } static inline size_t xfrm_spdinfo_msgsize(void) From 0dc742d907f8d05f188a284433cf9fb565a61f6d Mon Sep 17 00:00:00 2001 From: "mpubbise@codeaurora.org" Date: Mon, 2 Jul 2018 15:40:14 +0530 Subject: [PATCH 444/519] mac80211: add stations tied to AP_VLANs during hw reconfig [ Upstream commit 19103a4bfb42f320395daa5616ece3e89e759d63 ] As part of hw reconfig, only stations linked to AP interfaces are added back to the driver ignoring those which are tied to AP_VLAN interfaces. It is true that there could be stations tied to the AP_VLAN interface while serving 4addr clients or when using AP_VLAN for VLAN operations; we should be adding these stations back to the driver as part of hw reconfig, failing to do so can cause functional issues. In the case of ath10k driver, the following errors were observed. ath10k_pci : failed to install key for non-existent peer XX:XX:XX:XX:XX:XX Workqueue: events_freezable ieee80211_restart_work [mac80211] (unwind_backtrace) from (show_stack+0x10/0x14) (show_stack) (dump_stack+0x80/0xa0) (dump_stack) (warn_slowpath_common+0x68/0x8c) (warn_slowpath_common) (warn_slowpath_null+0x18/0x20) (warn_slowpath_null) (ieee80211_enable_keys+0x88/0x154 [mac80211]) (ieee80211_enable_keys) (ieee80211_reconfig+0xc90/0x19c8 [mac80211]) (ieee80211_reconfig]) (ieee80211_restart_work+0x8c/0xa0 [mac80211]) (ieee80211_restart_work) (process_one_work+0x284/0x488) (process_one_work) (worker_thread+0x228/0x360) (worker_thread) (kthread+0xd8/0xec) (kthread) (ret_from_fork+0x14/0x24) Also while bringing down the AP VAP, WARN_ONs and errors related to peer removal were observed. ath10k_pci : failed to clear all peer wep keys for vdev 0: -2 ath10k_pci : failed to disassociate station: 8c:fd:f0:0a:8c:f5 vdev 0: -2 (unwind_backtrace) (show_stack+0x10/0x14) (show_stack) (dump_stack+0x80/0xa0) (dump_stack) (warn_slowpath_common+0x68/0x8c) (warn_slowpath_common) (warn_slowpath_null+0x18/0x20) (warn_slowpath_null) (sta_set_sinfo+0xb98/0xc9c [mac80211]) (sta_set_sinfo [mac80211]) (__sta_info_flush+0xf0/0x134 [mac80211]) (__sta_info_flush [mac80211]) (ieee80211_stop_ap+0xe8/0x390 [mac80211]) (ieee80211_stop_ap [mac80211]) (__cfg80211_stop_ap+0xe0/0x3dc [cfg80211]) (__cfg80211_stop_ap [cfg80211]) (cfg80211_stop_ap+0x30/0x44 [cfg80211]) (cfg80211_stop_ap [cfg80211]) (genl_rcv_msg+0x274/0x30c) (genl_rcv_msg) (netlink_rcv_skb+0x58/0xac) (netlink_rcv_skb) (genl_rcv+0x20/0x34) (genl_rcv) (netlink_unicast+0x11c/0x204) (netlink_unicast) (netlink_sendmsg+0x30c/0x370) (netlink_sendmsg) (sock_sendmsg+0x70/0x84) (sock_sendmsg) (___sys_sendmsg.part.3+0x188/0x228) (___sys_sendmsg.part.3) (__sys_sendmsg+0x4c/0x70) (__sys_sendmsg) (ret_fast_syscall+0x0/0x44) These issues got fixed by adding the stations which are tied to AP_VLANs back to the driver. Signed-off-by: Manikanta Pubbisetty Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/mac80211/util.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index ec26a84b00e2..2214c77d4172 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2006,7 +2006,8 @@ int ieee80211_reconfig(struct ieee80211_local *local) if (!sta->uploaded) continue; - if (sta->sdata->vif.type != NL80211_IFTYPE_AP) + if (sta->sdata->vif.type != NL80211_IFTYPE_AP && + sta->sdata->vif.type != NL80211_IFTYPE_AP_VLAN) continue; for (state = IEEE80211_STA_NOTEXIST; From 8d437bd6db6563b58e861361dc8070558cac6e89 Mon Sep 17 00:00:00 2001 From: Bernd Edlinger Date: Sun, 8 Jul 2018 09:57:22 +0000 Subject: [PATCH 445/519] nl80211: Add a missing break in parse_station_flags [ Upstream commit 5cf3006cc81d9aa09a10aa781fc065546b12919d ] I was looking at usually suppressed gcc warnings, [-Wimplicit-fallthrough=] in this case: The code definitely looks like a break is missing here. However I am not able to test the NL80211_IFTYPE_MESH_POINT, nor do I actually know what might be :) So please use this patch with caution and only if you are able to do some testing. Signed-off-by: Bernd Edlinger [johannes: looks obvious enough to apply as is, interesting though that it never seems to have been a problem] Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b0b58d1565c2..b07fd8b8b50c 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3578,6 +3578,7 @@ static int parse_station_flags(struct genl_info *info, params->sta_flags_mask = BIT(NL80211_STA_FLAG_AUTHENTICATED) | BIT(NL80211_STA_FLAG_MFP) | BIT(NL80211_STA_FLAG_AUTHORIZED); + break; default: return -EINVAL; } From f9ddeba81ccdeb6eb85e47929b81cae6dfc3cdfb Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Tue, 3 Jul 2018 12:56:03 -0400 Subject: [PATCH 446/519] drm/bridge: adv7511: Reset registers on hotplug [ Upstream commit 5f3417569165a8ee57654217f73e0160312f409c ] The bridge loses its hw state when the cable is unplugged. If we detect this case in the hpd handler, reset its state. Reported-by: Rob Clark Tested-by: Rob Clark Reviewed-by: Archit Taneja Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20180703165648.120401-1-seanpaul@chromium.org Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i2c/adv7511.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/i2c/adv7511.c b/drivers/gpu/drm/i2c/adv7511.c index dba5c0ea0827..c7c243e9b808 100644 --- a/drivers/gpu/drm/i2c/adv7511.c +++ b/drivers/gpu/drm/i2c/adv7511.c @@ -450,6 +450,18 @@ static void adv7511_hpd_work(struct work_struct *work) else status = connector_status_disconnected; + /* + * The bridge resets its registers on unplug. So when we get a plug + * event and we're already supposed to be powered, cycle the bridge to + * restore its state. + */ + if (status == connector_status_connected && + adv7511->connector.status == connector_status_disconnected && + adv7511->powered) { + regcache_mark_dirty(adv7511->regmap); + adv7511_power_on(adv7511); + } + if (adv7511->connector.status != status) { adv7511->connector.status = status; drm_kms_helper_hotplug_event(adv7511->connector.dev); From 982d6c0f1d449492e13ec60f9db8e1acaf7f9e64 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Wed, 11 Jul 2018 22:09:52 +0530 Subject: [PATCH 447/519] scsi: libiscsi: fix possible NULL pointer dereference in case of TMF [ Upstream commit a17037e7d59075053b522048742a08ac9500bde8 ] In iscsi_check_tmf_restrictions() task->hdr is dereferenced to print the opcode, it is possible that task->hdr is NULL. There are two cases based on opcode argument: 1. ISCSI_OP_SCSI_CMD - In this case alloc_pdu() is called after iscsi_check_tmf_restrictions() iscsi_prep_scsi_cmd_pdu() -> iscsi_check_tmf_restrictions() -> alloc_pdu(). Transport drivers allocate memory for iSCSI hdr in alloc_pdu() and assign it to task->hdr. In case of TMF task->hdr will be NULL resulting in NULL pointer dereference. 2. ISCSI_OP_SCSI_DATA_OUT - In this case transport driver can free the memory for iSCSI hdr after transmitting the pdu so task->hdr can be NULL or invalid. This patch fixes this issue by removing task->hdr->opcode from the printk statement. Signed-off-by: Varun Prakash Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/libiscsi.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 9f0b00c38658..a74f8fbefd33 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -283,11 +283,11 @@ static int iscsi_check_tmf_restrictions(struct iscsi_task *task, int opcode) */ if (opcode != ISCSI_OP_SCSI_DATA_OUT) { iscsi_conn_printk(KERN_INFO, conn, - "task [op %x/%x itt " + "task [op %x itt " "0x%x/0x%x] " "rejected.\n", - task->hdr->opcode, opcode, - task->itt, task->hdr_itt); + opcode, task->itt, + task->hdr_itt); return -EACCES; } /* @@ -296,10 +296,10 @@ static int iscsi_check_tmf_restrictions(struct iscsi_task *task, int opcode) */ if (conn->session->fast_abort) { iscsi_conn_printk(KERN_INFO, conn, - "task [op %x/%x itt " + "task [op %x itt " "0x%x/0x%x] fast abort.\n", - task->hdr->opcode, opcode, - task->itt, task->hdr_itt); + opcode, task->itt, + task->hdr_itt); return -EACCES; } break; From 0f14e3a837623b5634aae4f2d56cf7bbb9accca5 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Wed, 11 Apr 2018 17:31:35 +0200 Subject: [PATCH 448/519] drm/imx: imx-ldb: disable LDB on driver bind [ Upstream commit b58262396fabd43dc869b576e3defdd23b32fe94 ] The LVDS signal integrity is only guaranteed when the correct enable sequence (first IPU DI, then LDB) is used. If the LDB display output was active before the imx-drm driver is loaded (like when a bootsplash was active) the DI will be disabled by the full IPU reset we do when loading the driver. The LDB control registers are not part of the IPU range and thus will remain unchanged. This leads to the LDB still being active when the DI is getting enabled, effectively reversing the required enable sequence. Fix this by also disabling the LDB on driver bind. Signed-off-by: Lucas Stach Signed-off-by: Philipp Zabel Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/imx/imx-ldb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c index abacc8f67469..e1226b71cbda 100644 --- a/drivers/gpu/drm/imx/imx-ldb.c +++ b/drivers/gpu/drm/imx/imx-ldb.c @@ -526,6 +526,9 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data) return PTR_ERR(imx_ldb->regmap); } + /* disable LDB by resetting the control register to POR default */ + regmap_write(imx_ldb->regmap, IOMUXC_GPR2, 0); + imx_ldb->dev = dev; if (of_id) From 0ec60fd99a94996767c455cb34d49e946f7d3687 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Wed, 11 Apr 2018 17:31:36 +0200 Subject: [PATCH 449/519] drm/imx: imx-ldb: check if channel is enabled before printing warning [ Upstream commit c80d673b91a6c81d765864e10f2b15110ee900ad ] If the second LVDS channel has been disabled in the DT when using dual-channel mode we should not print a warning. Signed-off-by: Lucas Stach Signed-off-by: Philipp Zabel Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/imx/imx-ldb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c index e1226b71cbda..31ca56e593f5 100644 --- a/drivers/gpu/drm/imx/imx-ldb.c +++ b/drivers/gpu/drm/imx/imx-ldb.c @@ -569,14 +569,14 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data) if (ret || i < 0 || i > 1) return -EINVAL; + if (!of_device_is_available(child)) + continue; + if (dual && i > 0) { dev_warn(dev, "dual-channel mode, ignoring second output\n"); continue; } - if (!of_device_is_available(child)) - continue; - channel = &imx_ldb->channel[i]; channel->ldb = imx_ldb; channel->chno = i; From 9ffc4d3233da61a674c6e6bff122346e7d7fa679 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Wed, 20 Jun 2018 11:54:53 +0800 Subject: [PATCH 450/519] usb: gadget: r8a66597: Fix two possible sleep-in-atomic-context bugs in init_controller() [ Upstream commit 0602088b10a7c0b4e044a810678ef93d7cc5bf48 ] The driver may sleep with holding a spinlock. The function call paths (from bottom to top) in Linux-4.16.7 are: [FUNC] msleep drivers/usb/gadget/udc/r8a66597-udc.c, 839: msleep in init_controller drivers/usb/gadget/udc/r8a66597-udc.c, 96: init_controller in r8a66597_usb_disconnect drivers/usb/gadget/udc/r8a66597-udc.c, 93: spin_lock in r8a66597_usb_disconnect [FUNC] msleep drivers/usb/gadget/udc/r8a66597-udc.c, 835: msleep in init_controller drivers/usb/gadget/udc/r8a66597-udc.c, 96: init_controller in r8a66597_usb_disconnect drivers/usb/gadget/udc/r8a66597-udc.c, 93: spin_lock in r8a66597_usb_disconnect To fix these bugs, msleep() is replaced with mdelay(). This bug is found by my static analysis tool (DSAC-2) and checked by my code review. Signed-off-by: Jia-Ju Bai Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/r8a66597-udc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/r8a66597-udc.c b/drivers/usb/gadget/udc/r8a66597-udc.c index baa0609a429d..c7206a3b13c7 100644 --- a/drivers/usb/gadget/udc/r8a66597-udc.c +++ b/drivers/usb/gadget/udc/r8a66597-udc.c @@ -835,11 +835,11 @@ static void init_controller(struct r8a66597 *r8a66597) r8a66597_bset(r8a66597, XCKE, SYSCFG0); - msleep(3); + mdelay(3); r8a66597_bset(r8a66597, PLLC, SYSCFG0); - msleep(1); + mdelay(1); r8a66597_bset(r8a66597, SCKE, SYSCFG0); From 68f4f658ef0a1d49b13de2ae991b13a93df675e6 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Wed, 20 Jun 2018 11:55:08 +0800 Subject: [PATCH 451/519] usb: gadget: r8a66597: Fix a possible sleep-in-atomic-context bugs in r8a66597_queue() [ Upstream commit f36b507c14c4b6e634463a610294e9cb0065c8ea ] The driver may sleep in an interrupt handler. The function call path (from bottom to top) in Linux-4.16.7 is: [FUNC] r8a66597_queue(GFP_KERNEL) drivers/usb/gadget/udc/r8a66597-udc.c, 1193: r8a66597_queue in get_status drivers/usb/gadget/udc/r8a66597-udc.c, 1301: get_status in setup_packet drivers/usb/gadget/udc/r8a66597-udc.c, 1381: setup_packet in irq_control_stage drivers/usb/gadget/udc/r8a66597-udc.c, 1508: irq_control_stage in r8a66597_irq (interrupt handler) To fix this bug, GFP_KERNEL is replaced with GFP_ATOMIC. This bug is found by my static analysis tool (DSAC-2) and checked by my code review. Signed-off-by: Jia-Ju Bai Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/r8a66597-udc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/r8a66597-udc.c b/drivers/usb/gadget/udc/r8a66597-udc.c index c7206a3b13c7..e34094647603 100644 --- a/drivers/usb/gadget/udc/r8a66597-udc.c +++ b/drivers/usb/gadget/udc/r8a66597-udc.c @@ -1193,7 +1193,7 @@ __acquires(r8a66597->lock) r8a66597->ep0_req->length = 2; /* AV: what happens if we get called again before that gets through? */ spin_unlock(&r8a66597->lock); - r8a66597_queue(r8a66597->gadget.ep0, r8a66597->ep0_req, GFP_KERNEL); + r8a66597_queue(r8a66597->gadget.ep0, r8a66597->ep0_req, GFP_ATOMIC); spin_lock(&r8a66597->lock); } From 5e02503f2a99621059daaf176e970b083648e0bb Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 15 Jul 2018 10:37:37 -0700 Subject: [PATCH 452/519] usb/phy: fix PPC64 build errors in phy-fsl-usb.c [ Upstream commit a39ba90a1cc7010edb0a7132e1b67f3d80b994e9 ] Fix build errors when built for PPC64: These variables are only used on PPC32 so they don't need to be initialized for PPC64. ../drivers/usb/phy/phy-fsl-usb.c: In function 'usb_otg_start': ../drivers/usb/phy/phy-fsl-usb.c:865:3: error: '_fsl_readl' undeclared (first use in this function); did you mean 'fsl_readl'? _fsl_readl = _fsl_readl_be; ../drivers/usb/phy/phy-fsl-usb.c:865:16: error: '_fsl_readl_be' undeclared (first use in this function); did you mean 'fsl_readl'? _fsl_readl = _fsl_readl_be; ../drivers/usb/phy/phy-fsl-usb.c:866:3: error: '_fsl_writel' undeclared (first use in this function); did you mean 'fsl_writel'? _fsl_writel = _fsl_writel_be; ../drivers/usb/phy/phy-fsl-usb.c:866:17: error: '_fsl_writel_be' undeclared (first use in this function); did you mean 'fsl_writel'? _fsl_writel = _fsl_writel_be; ../drivers/usb/phy/phy-fsl-usb.c:868:16: error: '_fsl_readl_le' undeclared (first use in this function); did you mean 'fsl_readl'? _fsl_readl = _fsl_readl_le; ../drivers/usb/phy/phy-fsl-usb.c:869:17: error: '_fsl_writel_le' undeclared (first use in this function); did you mean 'fsl_writel'? _fsl_writel = _fsl_writel_le; and the sysfs "show" function return type should be ssize_t, not int: ../drivers/usb/phy/phy-fsl-usb.c:1042:49: error: initialization of 'ssize_t (*)(struct device *, struct device_attribute *, char *)' {aka 'long int (*)(struct device *, struct device_attribute *, char *)'} from incompatible pointer type 'int (*)(struct device *, struct device_attribute *, char *)' [-Werror=incompatible-pointer-types] static DEVICE_ATTR(fsl_usb2_otg_state, S_IRUGO, show_fsl_usb2_otg_state, NULL); Signed-off-by: Randy Dunlap Cc: Felipe Balbi Cc: linux-usb@vger.kernel.org Cc: Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/phy/phy-fsl-usb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/phy/phy-fsl-usb.c b/drivers/usb/phy/phy-fsl-usb.c index 94eb2923afed..85d031ce85c1 100644 --- a/drivers/usb/phy/phy-fsl-usb.c +++ b/drivers/usb/phy/phy-fsl-usb.c @@ -879,6 +879,7 @@ int usb_otg_start(struct platform_device *pdev) if (pdata->init && pdata->init(pdev) != 0) return -EINVAL; +#ifdef CONFIG_PPC32 if (pdata->big_endian_mmio) { _fsl_readl = _fsl_readl_be; _fsl_writel = _fsl_writel_be; @@ -886,6 +887,7 @@ int usb_otg_start(struct platform_device *pdev) _fsl_readl = _fsl_readl_le; _fsl_writel = _fsl_writel_le; } +#endif /* request irq */ p_otg->irq = platform_get_irq(pdev, 0); @@ -976,7 +978,7 @@ int usb_otg_start(struct platform_device *pdev) /* * state file in sysfs */ -static int show_fsl_usb2_otg_state(struct device *dev, +static ssize_t show_fsl_usb2_otg_state(struct device *dev, struct device_attribute *attr, char *buf) { struct otg_fsm *fsm = &fsl_otg_dev->fsm; From 7b85bc4bea0f1c922315aaf9451c544b69dc20f6 Mon Sep 17 00:00:00 2001 From: Peter Senna Tschudin Date: Tue, 10 Jul 2018 16:01:45 +0200 Subject: [PATCH 453/519] tools: usb: ffs-test: Fix build on big endian systems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a2b22dddc7bb6110ac3b5ed1a60aa9279836fadb ] The tools/usb/ffs-test.c file defines cpu_to_le16/32 by using the C library htole16/32 function calls. However, cpu_to_le16/32 are used when initializing structures, i.e in a context where a function call is not allowed. It works fine on little endian systems because htole16/32 are defined by the C library as no-ops. But on big-endian systems, they are actually doing something, which might involve calling a function, causing build failures, such as: ffs-test.c:48:25: error: initializer element is not constant #define cpu_to_le32(x) htole32(x) ^~~~~~~ ffs-test.c:128:12: note: in expansion of macro ‘cpu_to_le32’ .magic = cpu_to_le32(FUNCTIONFS_DESCRIPTORS_MAGIC_V2), ^~~~~~~~~~~ To solve this, we code cpu_to_le16/32 in a way that allows them to be used when initializing structures. This fix was imported from meta-openembedded/android-tools/fix-big-endian-build.patch written by Thomas Petazzoni . CC: Thomas Petazzoni Signed-off-by: Peter Senna Tschudin Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/usb/ffs-test.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/tools/usb/ffs-test.c b/tools/usb/ffs-test.c index 88d5e71be044..47dfa0b0fcd7 100644 --- a/tools/usb/ffs-test.c +++ b/tools/usb/ffs-test.c @@ -44,12 +44,25 @@ /******************** Little Endian Handling ********************************/ -#define cpu_to_le16(x) htole16(x) -#define cpu_to_le32(x) htole32(x) +/* + * cpu_to_le16/32 are used when initializing structures, a context where a + * function call is not allowed. To solve this, we code cpu_to_le16/32 in a way + * that allows them to be used when initializing structures. + */ + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define cpu_to_le16(x) (x) +#define cpu_to_le32(x) (x) +#else +#define cpu_to_le16(x) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)) +#define cpu_to_le32(x) \ + ((((x) & 0xff000000u) >> 24) | (((x) & 0x00ff0000u) >> 8) | \ + (((x) & 0x0000ff00u) << 8) | (((x) & 0x000000ffu) << 24)) +#endif + #define le32_to_cpu(x) le32toh(x) #define le16_to_cpu(x) le16toh(x) - /******************** Messages and Errors ***********************************/ static const char argv0[] = "ffs-test"; From 801a35b0963c1d1af581486ac0461ad41cea6a6f Mon Sep 17 00:00:00 2001 From: Eugeniu Rosca Date: Mon, 2 Jul 2018 23:46:47 +0200 Subject: [PATCH 454/519] usb: gadget: f_uac2: fix endianness of 'struct cntrl_*_lay3' [ Upstream commit eec24f2a0d4dc3b1d95a3ccd2feb523ede3ba775 ] The list [1] of commits doing endianness fixes in USB subsystem is long due to below quote from USB spec Revision 2.0 from April 27, 2000: ------------ 8.1 Byte/Bit Ordering Multiple byte fields in standard descriptors, requests, and responses are interpreted as and moved over the bus in little-endian order, i.e. LSB to MSB. ------------ This commit belongs to the same family. [1] Example of endianness fixes in USB subsystem: commit 14e1d56cbea6 ("usb: gadget: f_uac2: endianness fixes.") commit 42370b821168 ("usb: gadget: f_uac1: endianness fixes.") commit 63afd5cc7877 ("USB: chaoskey: fix Alea quirk on big-endian hosts") commit 74098c4ac782 ("usb: gadget: acm: fix endianness in notifications") commit cdd7928df0d2 ("ACM gadget: fix endianness in notifications") commit 323ece54e076 ("cdc-wdm: fix endianness bug in debug statements") commit e102609f1072 ("usb: gadget: uvc: Fix endianness mismatches") list goes on Fixes: 132fcb460839 ("usb: gadget: Add Audio Class 2.0 Driver") Signed-off-by: Eugeniu Rosca Reviewed-by: Ruslan Bilovol Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_uac2.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c index b5dab103be38..e931c3cb0840 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -941,14 +941,14 @@ static struct usb_descriptor_header *hs_audio_desc[] = { }; struct cntrl_cur_lay3 { - __u32 dCUR; + __le32 dCUR; }; struct cntrl_range_lay3 { - __u16 wNumSubRanges; - __u32 dMIN; - __u32 dMAX; - __u32 dRES; + __le16 wNumSubRanges; + __le32 dMIN; + __le32 dMAX; + __le32 dRES; } __packed; static inline void @@ -1296,9 +1296,9 @@ in_rq_cur(struct usb_function *fn, const struct usb_ctrlrequest *cr) memset(&c, 0, sizeof(struct cntrl_cur_lay3)); if (entity_id == USB_IN_CLK_ID) - c.dCUR = p_srate; + c.dCUR = cpu_to_le32(p_srate); else if (entity_id == USB_OUT_CLK_ID) - c.dCUR = c_srate; + c.dCUR = cpu_to_le32(c_srate); value = min_t(unsigned, w_length, sizeof c); memcpy(req->buf, &c, value); @@ -1336,15 +1336,15 @@ in_rq_range(struct usb_function *fn, const struct usb_ctrlrequest *cr) if (control_selector == UAC2_CS_CONTROL_SAM_FREQ) { if (entity_id == USB_IN_CLK_ID) - r.dMIN = p_srate; + r.dMIN = cpu_to_le32(p_srate); else if (entity_id == USB_OUT_CLK_ID) - r.dMIN = c_srate; + r.dMIN = cpu_to_le32(c_srate); else return -EOPNOTSUPP; r.dMAX = r.dMIN; r.dRES = 0; - r.wNumSubRanges = 1; + r.wNumSubRanges = cpu_to_le16(1); value = min_t(unsigned, w_length, sizeof r); memcpy(req->buf, &r, value); From 1fa903cb3b09af0c32888fbf8dde4d3def60fb6f Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 20 Jul 2018 14:47:03 -0400 Subject: [PATCH 455/519] tools/power turbostat: fix -S on UP systems [ Upstream commit 9d83601a9cc1884d1b5706ee2acc661d558c6838 ] The -S (system summary) option failed to print any data on a 1-processor system. Reported-by: Artem Bityutskiy Signed-off-by: Len Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/power/x86/turbostat/turbostat.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 0dac7e05a6ac..36e0d255d1b6 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -663,9 +663,7 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_ if (!printed || !summary_only) print_header(); - if (topo.num_cpus > 1) - format_counters(&average.threads, &average.cores, - &average.packages); + format_counters(&average.threads, &average.cores, &average.packages); printed = 1; From 7193329df84d8d346585e22ea1b800b8fa177124 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 19 Jul 2018 10:27:13 +0800 Subject: [PATCH 456/519] net: caif: Add a missing rcu_read_unlock() in caif_flow_cb [ Upstream commit 64119e05f7b31e83e2555f6782e6cdc8f81c63f4 ] Add a missing rcu_read_unlock in the error path Fixes: c95567c80352 ("caif: added check for potential null return") Signed-off-by: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/caif/caif_dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index d730a0f68f46..a0443d40d677 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -131,8 +131,10 @@ static void caif_flow_cb(struct sk_buff *skb) caifd = caif_get(skb->dev); WARN_ON(caifd == NULL); - if (caifd == NULL) + if (!caifd) { + rcu_read_unlock(); return; + } caifd_hold(caifd); rcu_read_unlock(); From af130c7f1d432510034bdd10332188ec46dcc04c Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Wed, 18 Jul 2018 22:50:03 -0700 Subject: [PATCH 457/519] qed: Fix possible race for the link state value. [ Upstream commit 58874c7b246109d8efb2b0099d1aa296d6bfc3fa ] There's a possible race where driver can read link status in mid-transition and see that virtual-link is up yet speed is 0. Since in this mid-transition we're guaranteed to see a mailbox from MFW soon, we can afford to treat this as link down. Fixes: cc875c2e ("qed: Add link support") Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 20d048cdcb88..c898006abb32 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -420,6 +420,7 @@ static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn, break; default: p_link->speed = 0; + p_link->link_up = 0; } /* Correct speed according to bandwidth allocation */ From 405d3fdb6b8671d73377f619e1971ce4fece7a50 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 20 Jul 2018 19:30:57 +0200 Subject: [PATCH 458/519] atl1c: reserve min skb headroom [ Upstream commit 6e56830776828d8ca9897fc4429eeab47c3bb432 ] Got crash report with following backtrace: BUG: unable to handle kernel paging request at ffff8801869daffe RIP: 0010:[] [] ip6_finish_output2+0x394/0x4c0 RSP: 0018:ffff880186c83a98 EFLAGS: 00010283 RAX: ffff8801869db00e ... [] ip6_finish_output+0x8c/0xf0 [] ip6_output+0x57/0x100 [] ip6_forward+0x4b9/0x840 [] ip6_rcv_finish+0x66/0xc0 [] ipv6_rcv+0x319/0x530 [] netif_receive_skb+0x1c/0x70 [] atl1c_clean+0x1ec/0x310 [atl1c] ... The bad access is in neigh_hh_output(), at skb->data - 16 (HH_DATA_MOD). atl1c driver provided skb with no headroom, so 14 bytes (ethernet header) got pulled, but then 16 are copied. Reserve NET_SKB_PAD bytes headroom, like netdev_alloc_skb(). Compile tested only; I lack hardware. Fixes: 7b7017642199 ("atl1c: Fix misuse of netdev_alloc_skb in refilling rx ring") Signed-off-by: Florian Westphal Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 8b5988e210d5..c08d34f618b9 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -1683,6 +1683,7 @@ static struct sk_buff *atl1c_alloc_skb(struct atl1c_adapter *adapter) skb = build_skb(page_address(page) + adapter->rx_page_offset, adapter->rx_frag_size); if (likely(skb)) { + skb_reserve(skb, NET_SKB_PAD); adapter->rx_page_offset += adapter->rx_frag_size; if (adapter->rx_page_offset >= PAGE_SIZE) adapter->rx_page = NULL; From 66673aace3d6fb5539c7e4c6c2bb589ffed51dab Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 21 Jul 2018 12:59:25 -0700 Subject: [PATCH 459/519] net: prevent ISA drivers from building on PPC32 [ Upstream commit c9ce1fa1c24b08e13c2a3b5b1f94a19c9eaa982c ] Prevent drivers from building on PPC32 if they use isa_bus_to_virt(), isa_virt_to_bus(), or isa_page_to_bus(), which are not available and thus cause build errors. ../drivers/net/ethernet/3com/3c515.c: In function 'corkscrew_open': ../drivers/net/ethernet/3com/3c515.c:824:9: error: implicit declaration of function 'isa_virt_to_bus'; did you mean 'virt_to_bus'? [-Werror=implicit-function-declaration] ../drivers/net/ethernet/amd/lance.c: In function 'lance_rx': ../drivers/net/ethernet/amd/lance.c:1203:23: error: implicit declaration of function 'isa_bus_to_virt'; did you mean 'bus_to_virt'? [-Werror=implicit-function-declaration] ../drivers/net/ethernet/amd/ni65.c: In function 'ni65_init_lance': ../drivers/net/ethernet/amd/ni65.c:585:20: error: implicit declaration of function 'isa_virt_to_bus'; did you mean 'virt_to_bus'? [-Werror=implicit-function-declaration] ../drivers/net/ethernet/cirrus/cs89x0.c: In function 'net_open': ../drivers/net/ethernet/cirrus/cs89x0.c:897:20: error: implicit declaration of function 'isa_virt_to_bus'; did you mean 'virt_to_bus'? [-Werror=implicit-function-declaration] Signed-off-by: Randy Dunlap Suggested-by: Michael Ellerman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/3com/Kconfig | 2 +- drivers/net/ethernet/amd/Kconfig | 4 ++-- drivers/net/ethernet/cirrus/Kconfig | 1 + 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/3com/Kconfig b/drivers/net/ethernet/3com/Kconfig index 5b7658bcf020..5c3ef9fc8207 100644 --- a/drivers/net/ethernet/3com/Kconfig +++ b/drivers/net/ethernet/3com/Kconfig @@ -32,7 +32,7 @@ config EL3 config 3C515 tristate "3c515 ISA \"Fast EtherLink\"" - depends on ISA && ISA_DMA_API + depends on ISA && ISA_DMA_API && !PPC32 ---help--- If you have a 3Com ISA EtherLink XL "Corkscrew" 3c515 Fast Ethernet network card, say Y here. diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig index 0038709fd317..ec59425fdbff 100644 --- a/drivers/net/ethernet/amd/Kconfig +++ b/drivers/net/ethernet/amd/Kconfig @@ -44,7 +44,7 @@ config AMD8111_ETH config LANCE tristate "AMD LANCE and PCnet (AT1500 and NE2100) support" - depends on ISA && ISA_DMA_API && !ARM + depends on ISA && ISA_DMA_API && !ARM && !PPC32 ---help--- If you have a network (Ethernet) card of this type, say Y here. Some LinkSys cards are of this type. @@ -138,7 +138,7 @@ config PCMCIA_NMCLAN config NI65 tristate "NI6510 support" - depends on ISA && ISA_DMA_API && !ARM + depends on ISA && ISA_DMA_API && !ARM && !PPC32 ---help--- If you have a network (Ethernet) card of this type, say Y here. diff --git a/drivers/net/ethernet/cirrus/Kconfig b/drivers/net/ethernet/cirrus/Kconfig index 5ab912937aff..ec0b545197e2 100644 --- a/drivers/net/ethernet/cirrus/Kconfig +++ b/drivers/net/ethernet/cirrus/Kconfig @@ -19,6 +19,7 @@ if NET_VENDOR_CIRRUS config CS89x0 tristate "CS89x0 support" depends on ISA || EISA || ARM + depends on !PPC32 ---help--- Support for CS89x0 chipset based Ethernet cards. If you have a network (Ethernet) card of this type, say Y and read the file From ae34cbea8c5378ca3b2a423260f4bc80e04a49d7 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Mon, 9 Jul 2018 21:16:40 +0200 Subject: [PATCH 460/519] can: mpc5xxx_can: check of_iomap return before use [ Upstream commit b5c1a23b17e563b656cc9bb76ce5323b997d90e8 ] of_iomap() can return NULL so that return needs to be checked and NULL treated as failure. While at it also take care of the missing of_node_put() in the error path. Signed-off-by: Nicholas Mc Guire Fixes: commit afa17a500a36 ("net/can: add driver for mscan family & mpc52xx_mscan") Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/mscan/mpc5xxx_can.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/can/mscan/mpc5xxx_can.c b/drivers/net/can/mscan/mpc5xxx_can.c index c7427bdd3a4b..2949a381a94d 100644 --- a/drivers/net/can/mscan/mpc5xxx_can.c +++ b/drivers/net/can/mscan/mpc5xxx_can.c @@ -86,6 +86,11 @@ static u32 mpc52xx_can_get_clock(struct platform_device *ofdev, return 0; } cdm = of_iomap(np_cdm, 0); + if (!cdm) { + of_node_put(np_cdm); + dev_err(&ofdev->dev, "can't map clock node!\n"); + return 0; + } if (in_8(&cdm->ipb_clk_sel) & 0x1) freq *= 2; From ca08b42d1f69d4af1037dbb0091120cc583c6d29 Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Fri, 13 Jul 2018 17:20:17 +0200 Subject: [PATCH 461/519] i2c: davinci: Avoid zero value of CLKH [ Upstream commit cc8de9a68599b261244ea453b38678229f06ada7 ] If CLKH is set to 0 I2C clock is not generated at all, so avoid this value and stretch the clock in this case. Signed-off-by: Alexander Sverdlin Acked-by: Sekhar Nori Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-davinci.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-davinci.c b/drivers/i2c/busses/i2c-davinci.c index a8bdcb5292f5..57f6eb1427b4 100644 --- a/drivers/i2c/busses/i2c-davinci.c +++ b/drivers/i2c/busses/i2c-davinci.c @@ -234,12 +234,16 @@ static void i2c_davinci_calc_clk_dividers(struct davinci_i2c_dev *dev) /* * It's not always possible to have 1 to 2 ratio when d=7, so fall back * to minimal possible clkh in this case. + * + * Note: + * CLKH is not allowed to be 0, in this case I2C clock is not generated + * at all */ - if (clk >= clkl + d) { + if (clk > clkl + d) { clkh = clk - clkl - d; clkl -= d; } else { - clkh = 0; + clkh = 1; clkl = clk - (d << 1); } From b7e90154dc16d9538067fe7012a14447d341c43a Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 23 Jul 2018 14:39:33 -0700 Subject: [PATCH 462/519] media: staging: omap4iss: Include asm/cacheflush.h after generic includes [ Upstream commit 0894da849f145af51bde88a6b84f95b9c9e0bc66 ] Including asm/cacheflush.h first results in the following build error when trying to build sparc32:allmodconfig, because 'struct page' has not been declared, and the function declaration ends up creating a separate (private) declaration of struct page (as a result of function arguments being in the scope of the function declaration and definition, not in global scope). The C scoping rules do not just affect variable visibility, they also affect type declaration visibility. The end result is that when the actual call site is seen in , the 'struct page' type in the caller is not the same 'struct page' that the function was declared with, resulting in: In file included from arch/sparc/include/asm/page.h:10:0, ... from drivers/staging/media/omap4iss/iss_video.c:15: include/linux/highmem.h: In function 'clear_user_highpage': include/linux/highmem.h:137:31: error: passing argument 1 of 'sparc_flush_page_to_ram' from incompatible pointer type Include generic includes files first to fix the problem. Fixes: fc96d58c10162 ("[media] v4l: omap4iss: Add support for OMAP4 camera interface - Video devices") Suggested-by: Linus Torvalds Acked-by: David S. Miller Cc: Randy Dunlap Signed-off-by: Guenter Roeck [ Added explanation of C scope rules - Linus ] Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/media/omap4iss/iss_video.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/media/omap4iss/iss_video.c b/drivers/staging/media/omap4iss/iss_video.c index 2a0158bb4974..5a78ef057635 100644 --- a/drivers/staging/media/omap4iss/iss_video.c +++ b/drivers/staging/media/omap4iss/iss_video.c @@ -11,7 +11,6 @@ * (at your option) any later version. */ -#include #include #include #include @@ -22,6 +21,8 @@ #include #include +#include + #include "iss_video.h" #include "iss.h" From 21aee5fe5b4b399123d28628ca31c7823d532887 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Tue, 24 Jul 2018 02:43:52 -0700 Subject: [PATCH 463/519] bnx2x: Fix invalid memory access in rss hash config path. [ Upstream commit ae2dcb28c24794a87e424a726a1cf1a61980f52d ] Rx hash/filter table configuration uses rss_conf_obj to configure filters in the hardware. This object is initialized only when the interface is brought up. This patch adds driver changes to configure rss params only when the device is in opened state. In port disabled case, the config will be cached in the driver structure which will be applied in the successive load path. Please consider applying it to 'net' branch. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index d84efcd34fac..c56b61dce2d1 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -3360,14 +3360,18 @@ static int bnx2x_set_rss_flags(struct bnx2x *bp, struct ethtool_rxnfc *info) DP(BNX2X_MSG_ETHTOOL, "rss re-configured, UDP 4-tupple %s\n", udp_rss_requested ? "enabled" : "disabled"); - return bnx2x_rss(bp, &bp->rss_conf_obj, false, true); + if (bp->state == BNX2X_STATE_OPEN) + return bnx2x_rss(bp, &bp->rss_conf_obj, false, + true); } else if ((info->flow_type == UDP_V6_FLOW) && (bp->rss_conf_obj.udp_rss_v6 != udp_rss_requested)) { bp->rss_conf_obj.udp_rss_v6 = udp_rss_requested; DP(BNX2X_MSG_ETHTOOL, "rss re-configured, UDP 4-tupple %s\n", udp_rss_requested ? "enabled" : "disabled"); - return bnx2x_rss(bp, &bp->rss_conf_obj, false, true); + if (bp->state == BNX2X_STATE_OPEN) + return bnx2x_rss(bp, &bp->rss_conf_obj, false, + true); } return 0; @@ -3481,7 +3485,10 @@ static int bnx2x_set_rxfh(struct net_device *dev, const u32 *indir, bp->rss_conf_obj.ind_table[i] = indir[i] + bp->fp->cl_id; } - return bnx2x_config_rss_eth(bp, false); + if (bp->state == BNX2X_STATE_OPEN) + return bnx2x_config_rss_eth(bp, false); + + return 0; } /** From 7022f61c15dc06efe8d4af4711dac139f2eea121 Mon Sep 17 00:00:00 2001 From: Shubhrajyoti Datta Date: Tue, 24 Jul 2018 10:09:53 +0530 Subject: [PATCH 464/519] net: axienet: Fix double deregister of mdio [ Upstream commit 03bc7cab7d7218088412a75e141696a89059ab00 ] If the registration fails then mdio_unregister is called. However at unbind the unregister ia attempted again resulting in the below crash [ 73.544038] kernel BUG at drivers/net/phy/mdio_bus.c:415! [ 73.549362] Internal error: Oops - BUG: 0 [#1] SMP [ 73.554127] Modules linked in: [ 73.557168] CPU: 0 PID: 2249 Comm: sh Not tainted 4.14.0 #183 [ 73.562895] Hardware name: xlnx,zynqmp (DT) [ 73.567062] task: ffffffc879e41180 task.stack: ffffff800cbe0000 [ 73.572973] PC is at mdiobus_unregister+0x84/0x88 [ 73.577656] LR is at axienet_mdio_teardown+0x18/0x30 [ 73.582601] pc : [] lr : [] pstate: 20000145 [ 73.589981] sp : ffffff800cbe3c30 [ 73.593277] x29: ffffff800cbe3c30 x28: ffffffc879e41180 [ 73.598573] x27: ffffff8008a21000 x26: 0000000000000040 [ 73.603868] x25: 0000000000000124 x24: ffffffc879efe920 [ 73.609164] x23: 0000000000000060 x22: ffffffc879e02000 [ 73.614459] x21: ffffffc879e02800 x20: ffffffc87b0b8870 [ 73.619754] x19: ffffffc879e02800 x18: 000000000000025d [ 73.625050] x17: 0000007f9a719ad0 x16: ffffff8008195bd8 [ 73.630345] x15: 0000007f9a6b3d00 x14: 0000000000000010 [ 73.635640] x13: 74656e7265687465 x12: 0000000000000030 [ 73.640935] x11: 0000000000000030 x10: 0101010101010101 [ 73.646231] x9 : 241f394f42533300 x8 : ffffffc8799f6e98 [ 73.651526] x7 : ffffffc8799f6f18 x6 : ffffffc87b0ba318 [ 73.656822] x5 : ffffffc87b0ba498 x4 : 0000000000000000 [ 73.662117] x3 : 0000000000000000 x2 : 0000000000000008 [ 73.667412] x1 : 0000000000000004 x0 : ffffffc8799f4000 [ 73.672708] Process sh (pid: 2249, stack limit = 0xffffff800cbe0000) Fix the same by making the bus NULL on unregister. Signed-off-by: Shubhrajyoti Datta Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c index 507bbb0355c2..f6108413adba 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c @@ -218,6 +218,7 @@ int axienet_mdio_setup(struct axienet_local *lp, struct device_node *np) ret = of_mdiobus_register(bus, np1); if (ret) { mdiobus_free(bus); + lp->mii_bus = NULL; return ret; } return 0; From 58b96dea990fa8d440e55e1ccb55ef568ac5cf52 Mon Sep 17 00:00:00 2001 From: Kiran Kumar Modukuri Date: Wed, 25 Jul 2018 14:31:20 +0100 Subject: [PATCH 465/519] fscache: Allow cancelled operations to be enqueued [ Upstream commit d0eb06afe712b7b103b6361f40a9a0c638524669 ] Alter the state-check assertion in fscache_enqueue_operation() to allow cancelled operations to be given processing time so they can be cleaned up. Also fix a debugging statement that was requiring such operations to have an object assigned. Fixes: 9ae326a69004 ("CacheFiles: A cache that backs onto a mounted filesystem") Reported-by: Kiran Kumar Modukuri Signed-off-by: David Howells Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/fscache/operation.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index de67745e1cd7..77946d6f617d 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -66,7 +66,8 @@ void fscache_enqueue_operation(struct fscache_operation *op) ASSERT(op->processor != NULL); ASSERT(fscache_object_is_available(op->object)); ASSERTCMP(atomic_read(&op->usage), >, 0); - ASSERTCMP(op->state, ==, FSCACHE_OP_ST_IN_PROGRESS); + ASSERTIFCMP(op->state != FSCACHE_OP_ST_IN_PROGRESS, + op->state, ==, FSCACHE_OP_ST_CANCELLED); fscache_stat(&fscache_n_op_enqueue); switch (op->flags & FSCACHE_OP_TYPE) { @@ -481,7 +482,8 @@ void fscache_put_operation(struct fscache_operation *op) struct fscache_cache *cache; _enter("{OBJ%x OP%x,%d}", - op->object->debug_id, op->debug_id, atomic_read(&op->usage)); + op->object ? op->object->debug_id : 0, + op->debug_id, atomic_read(&op->usage)); ASSERTCMP(atomic_read(&op->usage), >, 0); From 06144b250b7a7981a7e60f5614e13777fea8dae7 Mon Sep 17 00:00:00 2001 From: Kiran Kumar Modukuri Date: Tue, 18 Jul 2017 16:25:49 -0700 Subject: [PATCH 466/519] cachefiles: Fix refcounting bug in backing-file read monitoring [ Upstream commit 934140ab028713a61de8bca58c05332416d037d1 ] cachefiles_read_waiter() has the right to access a 'monitor' object by virtue of being called under the waitqueue lock for one of the pages in its purview. However, it has no ref on that monitor object or on the associated operation. What it is allowed to do is to move the monitor object to the operation's to_do list, but once it drops the work_lock, it's actually no longer permitted to access that object. However, it is trying to enqueue the retrieval operation for processing - but it can only do this via a pointer in the monitor object, something it shouldn't be doing. If it doesn't enqueue the operation, the operation may not get processed. If the order is flipped so that the enqueue is first, then it's possible for the work processor to look at the to_do list before the monitor is enqueued upon it. Fix this by getting a ref on the operation so that we can trust that it will still be there once we've added the monitor to the to_do list and dropped the work_lock. The op can then be enqueued after the lock is dropped. The bug can manifest in one of a couple of ways. The first manifestation looks like: FS-Cache: FS-Cache: Assertion failed FS-Cache: 6 == 5 is false ------------[ cut here ]------------ kernel BUG at fs/fscache/operation.c:494! RIP: 0010:fscache_put_operation+0x1e3/0x1f0 ... fscache_op_work_func+0x26/0x50 process_one_work+0x131/0x290 worker_thread+0x45/0x360 kthread+0xf8/0x130 ? create_worker+0x190/0x190 ? kthread_cancel_work_sync+0x10/0x10 ret_from_fork+0x1f/0x30 This is due to the operation being in the DEAD state (6) rather than INITIALISED, COMPLETE or CANCELLED (5) because it's already passed through fscache_put_operation(). The bug can also manifest like the following: kernel BUG at fs/fscache/operation.c:69! ... [exception RIP: fscache_enqueue_operation+246] ... #7 [ffff883fff083c10] fscache_enqueue_operation at ffffffffa0b793c6 #8 [ffff883fff083c28] cachefiles_read_waiter at ffffffffa0b15a48 #9 [ffff883fff083c48] __wake_up_common at ffffffff810af028 I'm not entirely certain as to which is line 69 in Lei's kernel, so I'm not entirely clear which assertion failed. Fixes: 9ae326a69004 ("CacheFiles: A cache that backs onto a mounted filesystem") Reported-by: Lei Xue Reported-by: Vegard Nossum Reported-by: Anthony DeRobertis Reported-by: NeilBrown Reported-by: Daniel Axtens Reported-by: Kiran Kumar Modukuri Signed-off-by: David Howells Reviewed-by: Daniel Axtens Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/cachefiles/rdwr.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index c0f3da3926a0..5b68cf526887 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -27,6 +27,7 @@ static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode, struct cachefiles_one_read *monitor = container_of(wait, struct cachefiles_one_read, monitor); struct cachefiles_object *object; + struct fscache_retrieval *op = monitor->op; struct wait_bit_key *key = _key; struct page *page = wait->private; @@ -51,16 +52,22 @@ static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode, list_del(&wait->task_list); /* move onto the action list and queue for FS-Cache thread pool */ - ASSERT(monitor->op); + ASSERT(op); - object = container_of(monitor->op->op.object, - struct cachefiles_object, fscache); + /* We need to temporarily bump the usage count as we don't own a ref + * here otherwise cachefiles_read_copier() may free the op between the + * monitor being enqueued on the op->to_do list and the op getting + * enqueued on the work queue. + */ + fscache_get_retrieval(op); + object = container_of(op->op.object, struct cachefiles_object, fscache); spin_lock(&object->work_lock); - list_add_tail(&monitor->op_link, &monitor->op->to_do); + list_add_tail(&monitor->op_link, &op->to_do); spin_unlock(&object->work_lock); - fscache_enqueue_retrieval(monitor->op); + fscache_enqueue_retrieval(op); + fscache_put_retrieval(op); return 0; } From 9dbea3f6b80638031e4c65285f0b9a60504f1f43 Mon Sep 17 00:00:00 2001 From: Kiran Kumar Modukuri Date: Thu, 21 Jun 2018 13:25:53 -0700 Subject: [PATCH 467/519] cachefiles: Wait rather than BUG'ing on "Unexpected object collision" [ Upstream commit c2412ac45a8f8f1cd582723c1a139608694d410d ] If we meet a conflicting object that is marked FSCACHE_OBJECT_IS_LIVE in the active object tree, we have been emitting a BUG after logging information about it and the new object. Instead, we should wait for the CACHEFILES_OBJECT_ACTIVE flag to be cleared on the old object (or return an error). The ACTIVE flag should be cleared after it has been removed from the active object tree. A timeout of 60s is used in the wait, so we shouldn't be able to get stuck there. Fixes: 9ae326a69004 ("CacheFiles: A cache that backs onto a mounted filesystem") Signed-off-by: Kiran Kumar Modukuri Signed-off-by: David Howells Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/cachefiles/namei.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index c4b893453e0e..c43b4b08546b 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -194,7 +194,6 @@ static int cachefiles_mark_object_active(struct cachefiles_cache *cache, pr_err("\n"); pr_err("Error: Unexpected object collision\n"); cachefiles_printk_object(object, xobject); - BUG(); } atomic_inc(&xobject->usage); write_unlock(&cache->active_lock); From b142f71f17dbd1c203d51b975b98945e7dc310fd Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 14 Jul 2018 01:28:44 +0900 Subject: [PATCH 468/519] selftests/ftrace: Add snapshot and tracing_on test case [ Upstream commit 82f4f3e69c5c29bce940dd87a2c0f16c51d48d17 ] Add a testcase for checking snapshot and tracing_on relationship. This ensures that the snapshotting doesn't affect current tracing on/off settings. Link: http://lkml.kernel.org/r/153149932412.11274.15289227592627901488.stgit@devbox Cc: Tom Zanussi Cc: Hiraku Toyooka Signed-off-by: Masami Hiramatsu Cc: Ingo Molnar Cc: Shuah Khan Cc: linux-kselftest@vger.kernel.org Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../ftrace/test.d/00basic/snapshot.tc | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc diff --git a/tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc b/tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc new file mode 100644 index 000000000000..3b1f45e13a2e --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc @@ -0,0 +1,28 @@ +#!/bin/sh +# description: Snapshot and tracing setting +# flags: instance + +[ ! -f snapshot ] && exit_unsupported + +echo "Set tracing off" +echo 0 > tracing_on + +echo "Allocate and take a snapshot" +echo 1 > snapshot + +# Since trace buffer is empty, snapshot is also empty, but allocated +grep -q "Snapshot is allocated" snapshot + +echo "Ensure keep tracing off" +test `cat tracing_on` -eq 0 + +echo "Set tracing on" +echo 1 > tracing_on + +echo "Take a snapshot again" +echo 1 > snapshot + +echo "Ensure keep tracing on" +test `cat tracing_on` -eq 1 + +exit 0 From 0d35e0188a97a89cf92d16250e505d04707d56b3 Mon Sep 17 00:00:00 2001 From: Li Wang Date: Thu, 26 Jul 2018 16:37:42 -0700 Subject: [PATCH 469/519] zswap: re-check zswap_is_full() after do zswap_shrink() [ Upstream commit 16e536ef47f567289a5699abee9ff7bb304bc12d ] /sys/../zswap/stored_pages keeps rising in a zswap test with "zswap.max_pool_percent=0" parameter. But it should not compress or store pages any more since there is no space in the compressed pool. Reproduce steps: 1. Boot kernel with "zswap.enabled=1" 2. Set the max_pool_percent to 0 # echo 0 > /sys/module/zswap/parameters/max_pool_percent 3. Do memory stress test to see if some pages have been compressed # stress --vm 1 --vm-bytes $mem_available"M" --timeout 60s 4. Watching the 'stored_pages' number increasing or not The root cause is: When zswap_max_pool_percent is set to 0 via kernel parameter, zswap_is_full() will always return true due to zswap_shrink(). But if the shinking is able to reclain a page successfully the code then proceeds to compressing/storing another page, so the value of stored_pages will keep changing. To solve the issue, this patch adds a zswap_is_full() check again after zswap_shrink() to make sure it's now under the max_pool_percent, and to not compress/store if we reached the limit. Link: http://lkml.kernel.org/r/20180530103936.17812-1-liwang@redhat.com Signed-off-by: Li Wang Acked-by: Dan Streetman Cc: Seth Jennings Cc: Huang Ying Cc: Yu Zhao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- mm/zswap.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mm/zswap.c b/mm/zswap.c index 568015e2fe7a..87a8491909ee 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1018,6 +1018,15 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, ret = -ENOMEM; goto reject; } + + /* A second zswap_is_full() check after + * zswap_shrink() to make sure it's now + * under the max_pool_percent + */ + if (zswap_is_full()) { + ret = -ENOMEM; + goto reject; + } } /* allocate entry */ From 27629a00ad18a05669e19ec5f48b0c6d33dba61a Mon Sep 17 00:00:00 2001 From: Calvin Walton Date: Fri, 27 Jul 2018 07:50:53 -0400 Subject: [PATCH 470/519] tools/power turbostat: Read extended processor family from CPUID [ Upstream commit 5aa3d1a20a233d4a5f1ec3d62da3f19d9afea682 ] This fixes the reported family on modern AMD processors (e.g. Ryzen, which is family 0x17). Previously these processors all showed up as family 0xf. See the document https://support.amd.com/TechDocs/56255_OSRR.pdf section CPUID_Fn00000001_EAX for how to calculate the family from the BaseFamily and ExtFamily values. This matches the code in arch/x86/lib/cpu.c Signed-off-by: Calvin Walton Signed-off-by: Len Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/power/x86/turbostat/turbostat.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 36e0d255d1b6..33c79e415075 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2691,7 +2691,9 @@ void process_cpuid() family = (fms >> 8) & 0xf; model = (fms >> 4) & 0xf; stepping = fms & 0xf; - if (family == 6 || family == 0xf) + if (family == 0xf) + family += (fms >> 20) & 0xff; + if (family >= 6) model += ((fms >> 16) & 0xf) << 4; if (debug) From 7d1a2eef5e468fdeca2c443445ff792c2e58bffa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Fri, 27 Jul 2018 13:13:39 +0200 Subject: [PATCH 471/519] Revert "MIPS: BCM47XX: Enable 74K Core ExternalSync for PCIe erratum" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d5ea019f8a381f88545bb26993b62ec24a2796b7 ] This reverts commit 2a027b47dba6 ("MIPS: BCM47XX: Enable 74K Core ExternalSync for PCIe erratum"). Enabling ExternalSync caused a regression for BCM4718A1 (used e.g. in Netgear E3000 and ASUS RT-N16): it simply hangs during PCIe initialization. It's likely that BCM4717A1 is also affected. I didn't notice that earlier as the only BCM47XX devices with PCIe I own are: 1) BCM4706 with 2 x 14e4:4331 2) BCM4706 with 14e4:4360 and 14e4:4331 it appears that BCM4706 is unaffected. While BCM5300X-ES300-RDS.pdf seems to document that erratum and its workarounds (according to quotes provided by Tokunori) it seems not even Broadcom follows them. According to the provided info Broadcom should define CONF7_ES in their SDK's mipsinc.h and implement workaround in the si_mips_init(). Checking both didn't reveal such code. It *could* mean Broadcom also had some problems with the given workaround. Signed-off-by: Rafał Miłecki Signed-off-by: Paul Burton Reported-by: Michael Marley Patchwork: https://patchwork.linux-mips.org/patch/20032/ URL: https://bugs.openwrt.org/index.php?do=details&task_id=1688 Cc: Tokunori Ikegami Cc: Hauke Mehrtens Cc: Chris Packham Cc: James Hogan Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/mips/bcm47xx/setup.c | 6 ------ arch/mips/include/asm/mipsregs.h | 3 --- 2 files changed, 9 deletions(-) diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c index 4ca33175ec05..6d38948f0f1e 100644 --- a/arch/mips/bcm47xx/setup.c +++ b/arch/mips/bcm47xx/setup.c @@ -249,12 +249,6 @@ static int __init bcm47xx_cpu_fixes(void) */ if (bcm47xx_bus.bcma.bus.chipinfo.id == BCMA_CHIP_ID_BCM4706) cpu_wait = NULL; - - /* - * BCM47XX Erratum "R10: PCIe Transactions Periodically Fail" - * Enable ExternalSync for sync instruction to take effect - */ - set_c0_config7(MIPS_CONF7_ES); break; #endif } diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index 15c183ce9d4f..e43aca183c99 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -605,8 +605,6 @@ #define MIPS_CONF7_WII (_ULCAST_(1) << 31) #define MIPS_CONF7_RPS (_ULCAST_(1) << 2) -/* ExternalSync */ -#define MIPS_CONF7_ES (_ULCAST_(1) << 8) #define MIPS_CONF7_IAR (_ULCAST_(1) << 10) #define MIPS_CONF7_AR (_ULCAST_(1) << 16) @@ -2014,7 +2012,6 @@ __BUILD_SET_C0(status) __BUILD_SET_C0(cause) __BUILD_SET_C0(config) __BUILD_SET_C0(config5) -__BUILD_SET_C0(config7) __BUILD_SET_C0(intcontrol) __BUILD_SET_C0(intctl) __BUILD_SET_C0(srsmap) From cae83b4d45d522f3b2cb2c091ff46fdb79bd68bd Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan Date: Fri, 27 Jul 2018 11:19:29 -0700 Subject: [PATCH 472/519] enic: handle mtu change for vf properly [ Upstream commit ab123fe071c9aa9680ecd62eb080eb26cff4892c ] When driver gets notification for mtu change, driver does not handle it for all RQs. It handles only RQ[0]. Fix is to use enic_change_mtu() interface to change mtu for vf. Signed-off-by: Govindarajulu Varadarajan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cisco/enic/enic_main.c | 78 +++++++-------------- 1 file changed, 27 insertions(+), 51 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 8390597aecb8..b20bce2c7da1 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1842,10 +1842,32 @@ static int enic_stop(struct net_device *netdev) return 0; } +static int _enic_change_mtu(struct net_device *netdev, int new_mtu) +{ + bool running = netif_running(netdev); + int err = 0; + + ASSERT_RTNL(); + if (running) { + err = enic_stop(netdev); + if (err) + return err; + } + + netdev->mtu = new_mtu; + + if (running) { + err = enic_open(netdev); + if (err) + return err; + } + + return 0; +} + static int enic_change_mtu(struct net_device *netdev, int new_mtu) { struct enic *enic = netdev_priv(netdev); - int running = netif_running(netdev); if (new_mtu < ENIC_MIN_MTU || new_mtu > ENIC_MAX_MTU) return -EINVAL; @@ -1853,20 +1875,12 @@ static int enic_change_mtu(struct net_device *netdev, int new_mtu) if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic)) return -EOPNOTSUPP; - if (running) - enic_stop(netdev); - - netdev->mtu = new_mtu; - if (netdev->mtu > enic->port_mtu) netdev_warn(netdev, - "interface MTU (%d) set higher than port MTU (%d)\n", - netdev->mtu, enic->port_mtu); + "interface MTU (%d) set higher than port MTU (%d)\n", + netdev->mtu, enic->port_mtu); - if (running) - enic_open(netdev); - - return 0; + return _enic_change_mtu(netdev, new_mtu); } static void enic_change_mtu_work(struct work_struct *work) @@ -1874,47 +1888,9 @@ static void enic_change_mtu_work(struct work_struct *work) struct enic *enic = container_of(work, struct enic, change_mtu_work); struct net_device *netdev = enic->netdev; int new_mtu = vnic_dev_mtu(enic->vdev); - int err; - unsigned int i; - - new_mtu = max_t(int, ENIC_MIN_MTU, min_t(int, ENIC_MAX_MTU, new_mtu)); rtnl_lock(); - - /* Stop RQ */ - del_timer_sync(&enic->notify_timer); - - for (i = 0; i < enic->rq_count; i++) - napi_disable(&enic->napi[i]); - - vnic_intr_mask(&enic->intr[0]); - enic_synchronize_irqs(enic); - err = vnic_rq_disable(&enic->rq[0]); - if (err) { - rtnl_unlock(); - netdev_err(netdev, "Unable to disable RQ.\n"); - return; - } - vnic_rq_clean(&enic->rq[0], enic_free_rq_buf); - vnic_cq_clean(&enic->cq[0]); - vnic_intr_clean(&enic->intr[0]); - - /* Fill RQ with new_mtu-sized buffers */ - netdev->mtu = new_mtu; - vnic_rq_fill(&enic->rq[0], enic_rq_alloc_buf); - /* Need at least one buffer on ring to get going */ - if (vnic_rq_desc_used(&enic->rq[0]) == 0) { - rtnl_unlock(); - netdev_err(netdev, "Unable to alloc receive buffers.\n"); - return; - } - - /* Start RQ */ - vnic_rq_enable(&enic->rq[0]); - napi_enable(&enic->napi[0]); - vnic_intr_unmask(&enic->intr[0]); - enic_notify_timer_start(enic); - + (void)_enic_change_mtu(netdev, new_mtu); rtnl_unlock(); netdev_info(netdev, "interface MTU set as %d\n", netdev->mtu); From ef33fed0535aad84dffdfb978ae732ec4469c485 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 26 Jul 2018 20:16:35 -0700 Subject: [PATCH 473/519] arc: fix build errors in arc/include/asm/delay.h [ Upstream commit 2423665ec53f2a29191b35382075e9834288a975 ] Fix build errors in arch/arc/'s delay.h: - add "extern unsigned long loops_per_jiffy;" - add for "u64" In file included from ../drivers/infiniband/hw/cxgb3/cxio_hal.c:32: ../arch/arc/include/asm/delay.h: In function '__udelay': ../arch/arc/include/asm/delay.h:61:12: error: 'u64' undeclared (first use in this function) loops = ((u64) usecs * 4295 * HZ * loops_per_jiffy) >> 32; ^~~ In file included from ../drivers/infiniband/hw/cxgb3/cxio_hal.c:32: ../arch/arc/include/asm/delay.h: In function '__udelay': ../arch/arc/include/asm/delay.h:63:37: error: 'loops_per_jiffy' undeclared (first use in this function) loops = ((u64) usecs * 4295 * HZ * loops_per_jiffy) >> 32; ^~~~~~~~~~~~~~~ Signed-off-by: Randy Dunlap Cc: Vineet Gupta Cc: linux-snps-arc@lists.infradead.org Cc: Elad Kanfi Cc: Leon Romanovsky Cc: Ofer Levi Signed-off-by: Vineet Gupta Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/delay.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arc/include/asm/delay.h b/arch/arc/include/asm/delay.h index d5da2115d78a..03d6bb0f4e13 100644 --- a/arch/arc/include/asm/delay.h +++ b/arch/arc/include/asm/delay.h @@ -17,8 +17,11 @@ #ifndef __ASM_ARC_UDELAY_H #define __ASM_ARC_UDELAY_H +#include #include /* HZ */ +extern unsigned long loops_per_jiffy; + static inline void __delay(unsigned long loops) { __asm__ __volatile__( From 21d36c9737183a995faca026f887b0a8dcd04a44 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 26 Jul 2018 20:16:35 -0700 Subject: [PATCH 474/519] arc: fix type warnings in arc/mm/cache.c [ Upstream commit ec837d620c750c0d4996a907c8c4f7febe1bbeee ] Fix type warnings in arch/arc/mm/cache.c. ../arch/arc/mm/cache.c: In function 'flush_anon_page': ../arch/arc/mm/cache.c:1062:55: warning: passing argument 2 of '__flush_dcache_page' makes integer from pointer without a cast [-Wint-conversion] __flush_dcache_page((phys_addr_t)page_address(page), page_address(page)); ^~~~~~~~~~~~~~~~~~ ../arch/arc/mm/cache.c:1013:59: note: expected 'long unsigned int' but argument is of type 'void *' void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr) ~~~~~~~~~~~~~~^~~~~ Signed-off-by: Randy Dunlap Cc: Vineet Gupta Cc: linux-snps-arc@lists.infradead.org Cc: Elad Kanfi Cc: Leon Romanovsky Cc: Ofer Levi Signed-off-by: Vineet Gupta Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arc/mm/cache.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 9a84cbdd44b0..017fb440bba4 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -821,7 +821,7 @@ void flush_cache_mm(struct mm_struct *mm) void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr, unsigned long pfn) { - unsigned int paddr = pfn << PAGE_SHIFT; + phys_addr_t paddr = pfn << PAGE_SHIFT; u_vaddr &= PAGE_MASK; @@ -841,8 +841,9 @@ void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long u_vaddr) { /* TBD: do we really need to clear the kernel mapping */ - __flush_dcache_page(page_address(page), u_vaddr); - __flush_dcache_page(page_address(page), page_address(page)); + __flush_dcache_page((phys_addr_t)page_address(page), u_vaddr); + __flush_dcache_page((phys_addr_t)page_address(page), + (phys_addr_t)page_address(page)); } From 3837d650c8ca8e5974760859834c4160be671556 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 1 Aug 2018 18:22:41 +0100 Subject: [PATCH 475/519] drivers: net: lmc: fix case value for target abort error [ Upstream commit afb41bb039656f0cecb54eeb8b2e2088201295f5 ] Current value for a target abort error is 0x010, however, this value should in fact be 0x002. As it stands, the range of error is 0..7 so it is currently never being detected. This bug has been in the driver since the early 2.6.12 days (or before). Detected by CoverityScan, CID#744290 ("Logically dead code") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wan/lmc/lmc_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c index 317bc79cc8b9..c178e1218347 100644 --- a/drivers/net/wan/lmc/lmc_main.c +++ b/drivers/net/wan/lmc/lmc_main.c @@ -1385,7 +1385,7 @@ static irqreturn_t lmc_interrupt (int irq, void *dev_instance) /*fold00*/ case 0x001: printk(KERN_WARNING "%s: Master Abort (naughty)\n", dev->name); break; - case 0x010: + case 0x002: printk(KERN_WARNING "%s: Target Abort (not so naughty)\n", dev->name); break; default: From 621b2dd52dd92b77f61e3a15b634902ffbe54e1b Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Tue, 31 Jul 2018 15:46:02 +0200 Subject: [PATCH 476/519] scsi: fcoe: drop frames in ELS LOGO error path [ Upstream commit 63d0e3dffda311e77b9a8c500d59084e960a824a ] Drop the frames in the ELS LOGO error path instead of just returning an error. This fixes the following kmemleak report: unreferenced object 0xffff880064cb1000 (size 424): comm "kworker/0:2", pid 24, jiffies 4294904293 (age 68.504s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<(____ptrval____)>] _fc_frame_alloc+0x2c/0x180 [libfc] [<(____ptrval____)>] fc_lport_enter_logo+0x106/0x360 [libfc] [<(____ptrval____)>] fc_fabric_logoff+0x8c/0xc0 [libfc] [<(____ptrval____)>] fcoe_if_destroy+0x79/0x3b0 [fcoe] [<(____ptrval____)>] fcoe_destroy_work+0xd2/0x170 [fcoe] [<(____ptrval____)>] process_one_work+0x7ff/0x1420 [<(____ptrval____)>] worker_thread+0x87/0xef0 [<(____ptrval____)>] kthread+0x2db/0x390 [<(____ptrval____)>] ret_from_fork+0x35/0x40 [<(____ptrval____)>] 0xffffffffffffffff which can be triggered by issuing echo eth0 > /sys/bus/fcoe/ctlr_destroy Signed-off-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/fcoe/fcoe_ctlr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c index 34a1b1f333b4..d5184aa1ace4 100644 --- a/drivers/scsi/fcoe/fcoe_ctlr.c +++ b/drivers/scsi/fcoe/fcoe_ctlr.c @@ -752,9 +752,9 @@ int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct fc_lport *lport, case ELS_LOGO: if (fip->mode == FIP_MODE_VN2VN) { if (fip->state != FIP_ST_VNMP_UP) - return -EINVAL; + goto drop; if (ntoh24(fh->fh_d_id) == FC_FID_FLOGI) - return -EINVAL; + goto drop; } else { if (fip->state != FIP_ST_ENABLED) return 0; From 8bbbd3a9a0f8ea4b22150639a05d03000b72fe12 Mon Sep 17 00:00:00 2001 From: Jim Gill Date: Thu, 2 Aug 2018 14:13:30 -0700 Subject: [PATCH 477/519] scsi: vmw_pvscsi: Return DID_RESET for status SAM_STAT_COMMAND_TERMINATED [ Upstream commit e95153b64d03c2b6e8d62e51bdcc33fcad6e0856 ] Commands that are reset are returned with status SAM_STAT_COMMAND_TERMINATED. PVSCSI currently returns DID_OK | SAM_STAT_COMMAND_TERMINATED which fails the command. Instead, set hostbyte to DID_RESET to allow upper layers to retry. Tested by copying a large file between two pvscsi disks on same adapter while performing a bus reset at 1-second intervals. Before fix, commands sometimes fail with DID_OK. After fix, commands observed to fail with DID_RESET. Signed-off-by: Jim Gill Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/vmw_pvscsi.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c index 0f133c1817de..0de2f9069e23 100644 --- a/drivers/scsi/vmw_pvscsi.c +++ b/drivers/scsi/vmw_pvscsi.c @@ -545,9 +545,14 @@ static void pvscsi_complete_request(struct pvscsi_adapter *adapter, (btstat == BTSTAT_SUCCESS || btstat == BTSTAT_LINKED_COMMAND_COMPLETED || btstat == BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG)) { - cmd->result = (DID_OK << 16) | sdstat; - if (sdstat == SAM_STAT_CHECK_CONDITION && cmd->sense_buffer) - cmd->result |= (DRIVER_SENSE << 24); + if (sdstat == SAM_STAT_COMMAND_TERMINATED) { + cmd->result = (DID_RESET << 16); + } else { + cmd->result = (DID_OK << 16) | sdstat; + if (sdstat == SAM_STAT_CHECK_CONDITION && + cmd->sense_buffer) + cmd->result |= (DRIVER_SENSE << 24); + } } else switch (btstat) { case BTSTAT_SUCCESS: From fbee7b5b8c28ed02b6d6603eef27730c148a4481 Mon Sep 17 00:00:00 2001 From: "jie@chenjie6@huwei.com" Date: Fri, 10 Aug 2018 17:23:06 -0700 Subject: [PATCH 478/519] mm/memory.c: check return value of ioremap_prot [ Upstream commit 24eee1e4c47977bdfb71d6f15f6011e7b6188d04 ] ioremap_prot() can return NULL which could lead to an oops. Link: http://lkml.kernel.org/r/1533195441-58594-1-git-send-email-chenjie6@huawei.com Signed-off-by: chen jie Reviewed-by: Andrew Morton Cc: Li Zefan Cc: chenjie Cc: Yang Shi Cc: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- mm/memory.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/memory.c b/mm/memory.c index d5bb1465d30c..42db644f5ec4 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3701,6 +3701,9 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, return -EINVAL; maddr = ioremap_prot(phys_addr, PAGE_ALIGN(len + offset), prot); + if (!maddr) + return -ENOMEM; + if (write) memcpy_toio(maddr + offset, buf, len); else From 58d141a9ad8668b267362b1d17296c9cfa053a01 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 28 Jun 2018 18:46:40 -0500 Subject: [PATCH 479/519] cifs: add missing debug entries for kconfig options commit 950132afd59385caf6e2b84e5235d069fa10681d upstream. /proc/fs/cifs/DebugData displays the features (Kconfig options) used to build cifs.ko but it was missing some, and needed comma separator. These can be useful in debugging certain problems so we know which optional features were enabled in the user's build. Also clarify them, by making them more closely match the corresponding CONFIG_CIFS_* parm. Old format: Features: dfs fscache posix spnego xattr acl New format: Features: DFS,FSCACHE,SMB_DIRECT,STATS,DEBUG2,ALLOW_INSECURE_LEGACY,CIFS_POSIX,UPCALL(SPNEGO),XATTR,ACL Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg Reviewed-by: Pavel Shilovsky Reviewed-by: Paulo Alcantara CC: Stable Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifs_debug.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 0a3544fb50f9..bcbe42fb7e92 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -123,25 +123,41 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) seq_printf(m, "CIFS Version %s\n", CIFS_VERSION); seq_printf(m, "Features:"); #ifdef CONFIG_CIFS_DFS_UPCALL - seq_printf(m, " dfs"); + seq_printf(m, " DFS"); #endif #ifdef CONFIG_CIFS_FSCACHE - seq_printf(m, " fscache"); + seq_printf(m, ",FSCACHE"); +#endif +#ifdef CONFIG_CIFS_SMB_DIRECT + seq_printf(m, ",SMB_DIRECT"); +#endif +#ifdef CONFIG_CIFS_STATS2 + seq_printf(m, ",STATS2"); +#elif defined(CONFIG_CIFS_STATS) + seq_printf(m, ",STATS"); +#endif +#ifdef CONFIG_CIFS_DEBUG2 + seq_printf(m, ",DEBUG2"); +#elif defined(CONFIG_CIFS_DEBUG) + seq_printf(m, ",DEBUG"); +#endif +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY + seq_printf(m, ",ALLOW_INSECURE_LEGACY"); #endif #ifdef CONFIG_CIFS_WEAK_PW_HASH - seq_printf(m, " lanman"); + seq_printf(m, ",WEAK_PW_HASH"); #endif #ifdef CONFIG_CIFS_POSIX - seq_printf(m, " posix"); + seq_printf(m, ",CIFS_POSIX"); #endif #ifdef CONFIG_CIFS_UPCALL - seq_printf(m, " spnego"); + seq_printf(m, ",UPCALL(SPNEGO)"); #endif #ifdef CONFIG_CIFS_XATTR - seq_printf(m, " xattr"); + seq_printf(m, ",XATTR"); #endif #ifdef CONFIG_CIFS_ACL - seq_printf(m, " acl"); + seq_printf(m, ",ACL"); #endif seq_putc(m, '\n'); seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid); From 0e28220672bd699b04cb2ac0c1a39f4307ee3402 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Thu, 23 Aug 2018 12:24:02 +0200 Subject: [PATCH 480/519] cifs: check kmalloc before use commit 126c97f4d0d1b5b956e8b0740c81a2b2a2ae548c upstream. The kmalloc was not being checked - if it fails issue a warning and return -ENOMEM to the caller. Signed-off-by: Nicholas Mc Guire Fixes: b8da344b74c8 ("cifs: dynamic allocation of ntlmssp blob") Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky cc: Stable ` Signed-off-by: Greg Kroah-Hartman --- fs/cifs/sess.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index a035d1a95882..9bc7a29f88d6 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -398,6 +398,12 @@ int build_ntlmssp_auth_blob(unsigned char **pbuffer, goto setup_ntlmv2_ret; } *pbuffer = kmalloc(size_of_ntlmssp_blob(ses), GFP_KERNEL); + if (!*pbuffer) { + rc = -ENOMEM; + cifs_dbg(VFS, "Error %d during NTLMSSP allocation\n", rc); + *buflen = 0; + goto setup_ntlmv2_ret; + } sec_blob = (AUTHENTICATE_MESSAGE *)*pbuffer; memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8); From ff3bb182bc12ee1f852fd7f80dd8d9ce040f2462 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 2 Aug 2018 20:28:18 -0500 Subject: [PATCH 481/519] smb3: Do not send SMB3 SET_INFO if nothing changed commit fd09b7d3b352105f08b8e02f7afecf7e816380ef upstream. An earlier commit had a typo which prevented the optimization from working: commit 18dd8e1a65dd ("Do not send SMB3 SET_INFO request if nothing is changing") Thank you to Metze for noticing this. Also clear a reserved field in the FILE_BASIC_INFO struct we send that should be zero (all the other fields in that struct were set or cleared explicitly already in cifs_set_file_info). Reviewed-by: Pavel Shilovsky CC: Stable # 4.9.x+ Reported-by: Stefan Metzmacher Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/inode.c | 2 ++ fs/cifs/smb2inode.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 9cdeb0293267..36c8594bb147 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1063,6 +1063,8 @@ cifs_set_file_info(struct inode *inode, struct iattr *attrs, unsigned int xid, if (!server->ops->set_file_info) return -ENOSYS; + info_buf.Pad = 0; + if (attrs->ia_valid & ATTR_ATIME) { set_time = true; info_buf.LastAccessTime = diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 1238cd3552f9..0267d8cbc996 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -267,7 +267,7 @@ smb2_set_file_info(struct inode *inode, const char *full_path, int rc; if ((buf->CreationTime == 0) && (buf->LastAccessTime == 0) && - (buf->LastWriteTime == 0) && (buf->ChangeTime) && + (buf->LastWriteTime == 0) && (buf->ChangeTime == 0) && (buf->Attributes == 0)) return 0; /* would be a no op, no sense sending this */ From 951461680e88afb347cfb1e53255cd1b50187e2e Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 27 Jul 2018 22:01:49 -0500 Subject: [PATCH 482/519] smb3: don't request leases in symlink creation and query commit 22783155f4bf956c346a81624ec9258930a6fe06 upstream. Fixes problem pointed out by Pavel in discussions about commit 729c0c9dd55204f0c9a823ac8a7bfa83d36c7e78 Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg CC: Stable # 3.18.x+ Signed-off-by: Greg Kroah-Hartman --- fs/cifs/link.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/link.c b/fs/cifs/link.c index e3548f73bdea..728289c32b32 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -419,7 +419,7 @@ smb3_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, struct cifs_io_parms io_parms; int buf_type = CIFS_NO_BUFFER; __le16 *utf16_path; - __u8 oplock = SMB2_OPLOCK_LEVEL_II; + __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; struct smb2_file_all_info *pfile_info = NULL; oparms.tcon = tcon; @@ -481,7 +481,7 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, struct cifs_io_parms io_parms; int create_options = CREATE_NOT_DIR; __le16 *utf16_path; - __u8 oplock = SMB2_OPLOCK_LEVEL_EXCLUSIVE; + __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; struct kvec iov[2]; if (backup_cred(cifs_sb)) From 685d7af55fcc6595aedce7bc91b1f5f25ff88301 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 19 Jul 2018 10:49:51 -0400 Subject: [PATCH 483/519] btrfs: don't leak ret from do_chunk_alloc commit 4559b0a71749c442d34f7cfb9e72c9e58db83948 upstream. If we're trying to make a data reservation and we have to allocate a data chunk we could leak ret == 1, as do_chunk_alloc() will return 1 if it allocated a chunk. Since the end of the function is the success path just return 0. CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Josef Bacik Reviewed-by: Nikolay Borisov Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 982a9d509817..493c7354ec0b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4128,7 +4128,7 @@ int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes) data_sinfo->flags, bytes, 1); spin_unlock(&data_sinfo->lock); - return ret; + return 0; } /* From f4d76c934b4dcb6f068d2c850c35ebac5faff021 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Mon, 16 Jul 2018 10:38:57 +0200 Subject: [PATCH 484/519] s390/kvm: fix deadlock when killed by oom commit 306d6c49ac9ded11114cb53b0925da52f2c2ada1 upstream. When the oom killer kills a userspace process in the page fault handler while in guest context, the fault handler fails to release the mm_sem if the FAULT_FLAG_RETRY_NOWAIT option is set. This leads to a deadlock when tearing down the mm when the process terminates. This bug can only happen when pfault is enabled, so only KVM clients are affected. The problem arises in the rare cases in which handle_mm_fault does not release the mm_sem. This patch fixes the issue by manually releasing the mm_sem when needed. Fixes: 24eb3a824c4f3 ("KVM: s390: Add FAULT_FLAG_RETRY_NOWAIT for guest fault") Cc: # 3.15+ Signed-off-by: Claudio Imbrenda Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/mm/fault.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index ec1a30d0d11a..7218689bd6ee 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -459,6 +459,8 @@ static inline int do_exception(struct pt_regs *regs, int access) /* No reason to continue if interrupted by SIGKILL. */ if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) { fault = VM_FAULT_SIGNAL; + if (flags & FAULT_FLAG_RETRY_NOWAIT) + goto out_up; goto out; } if (unlikely(fault & VM_FAULT_ERROR)) From ac92782e4ef6fc9f4694bdc9e1b5d5bfb72027ff Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 1 Aug 2018 12:36:52 -0400 Subject: [PATCH 485/519] ext4: check for NUL characters in extended attribute's name commit 7d95178c77014dbd8dce36ee40bbbc5e6c121ff5 upstream. Extended attribute names are defined to be NUL-terminated, so the name must not contain a NUL character. This is important because there are places when remove extended attribute, the code uses strlen to determine the length of the entry. That should probably be fixed at some point, but code is currently really messy, so the simplest fix for now is to simply validate that the extended attributes are sane. https://bugzilla.kernel.org/show_bug.cgi?id=200401 Reported-by: Wen Xu Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index c7cad05aed27..9fb2a751fce4 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -197,6 +197,8 @@ ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end, struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e); if ((void *)next >= end) return -EFSCORRUPTED; + if (strnlen(e->e_name, e->e_name_len) != e->e_name_len) + return -EFSCORRUPTED; e = next; } From bca4f76edcc74a0290fd007bf937217a8c43ee00 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 29 Jul 2018 15:48:00 -0400 Subject: [PATCH 486/519] ext4: sysfs: print ext4_super_block fields as little-endian commit a4d2aadca184ece182418950d45ba4ffc7b652d2 upstream. While working on extended rand for last_error/first_error timestamps, I noticed that the endianess is wrong; we access the little-endian fields in struct ext4_super_block as native-endian when we print them. This adds a special case in ext4_attr_show() and ext4_attr_store() to byteswap the superblock fields if needed. In older kernels, this code was part of super.c, it got moved to sysfs.c in linux-4.4. Cc: stable@vger.kernel.org Fixes: 52c198c6820f ("ext4: add sysfs entry showing whether the fs contains errors") Reviewed-by: Andreas Dilger Signed-off-by: Arnd Bergmann Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/sysfs.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index c2ee23acf359..ae9929d678d6 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -277,8 +277,12 @@ static ssize_t ext4_attr_show(struct kobject *kobj, case attr_pointer_ui: if (!ptr) return 0; - return snprintf(buf, PAGE_SIZE, "%u\n", - *((unsigned int *) ptr)); + if (a->attr_ptr == ptr_ext4_super_block_offset) + return snprintf(buf, PAGE_SIZE, "%u\n", + le32_to_cpup(ptr)); + else + return snprintf(buf, PAGE_SIZE, "%u\n", + *((unsigned int *) ptr)); case attr_pointer_atomic: if (!ptr) return 0; @@ -311,7 +315,10 @@ static ssize_t ext4_attr_store(struct kobject *kobj, ret = kstrtoul(skip_spaces(buf), 0, &t); if (ret) return ret; - *((unsigned int *) ptr) = t; + if (a->attr_ptr == ptr_ext4_super_block_offset) + *((__le32 *) ptr) = cpu_to_le32(t); + else + *((unsigned int *) ptr) = t; return len; case attr_inode_readahead: return inode_readahead_blks_store(a, sbi, buf, len); From d684b70576fe7afdfb5d943f30aaaeac1f84a5a2 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Sun, 29 Jul 2018 17:13:42 -0400 Subject: [PATCH 487/519] ext4: reset error code in ext4_find_entry in fallback commit f39b3f45dbcb0343822cce31ea7636ad66e60bc2 upstream. When ext4_find_entry() falls back to "searching the old fashioned way" due to a corrupt dx dir, it needs to reset the error code to NULL so that the nonstandard ERR_BAD_DX_DIR code isn't returned to userspace. https://bugzilla.kernel.org/show_bug.cgi?id=199947 Reported-by: Anatoly Trosinenko Reviewed-by: Andreas Dilger Signed-off-by: Eric Sandeen Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/namei.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 97472088d65a..a1f1e53d0e25 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1401,6 +1401,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, goto cleanup_and_exit; dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " "falling back\n")); + ret = NULL; } nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); if (!nblocks) { From 355cccb6593f87329940d411894bd9de30286b97 Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Wed, 15 Aug 2018 12:51:21 -0700 Subject: [PATCH 488/519] arm64: mm: check for upper PAGE_SHIFT bits in pfn_valid() commit 5ad356eabc47d26a92140a0c4b20eba471c10de3 upstream. ARM64's pfn_valid() shifts away the upper PAGE_SHIFT bits of the input before seeing if the PFN is valid. This leads to false positives when some of the upper bits are set, but the lower bits match a valid PFN. For example, the following userspace code looks up a bogus entry in /proc/kpageflags: int pagemap = open("/proc/self/pagemap", O_RDONLY); int pageflags = open("/proc/kpageflags", O_RDONLY); uint64_t pfn, val; lseek64(pagemap, [...], SEEK_SET); read(pagemap, &pfn, sizeof(pfn)); if (pfn & (1UL << 63)) { /* valid PFN */ pfn &= ((1UL << 55) - 1); /* clear flag bits */ pfn |= (1UL << 55); lseek64(pageflags, pfn * sizeof(uint64_t), SEEK_SET); read(pageflags, &val, sizeof(val)); } On ARM64 this causes the userspace process to crash with SIGSEGV rather than reading (1 << KPF_NOPAGE). kpageflags_read() treats the offset as valid, and stable_page_flags() will try to access an address between the user and kernel address ranges. Fixes: c1cc1552616d ("arm64: MMU initialisation") Cc: stable@vger.kernel.org Signed-off-by: Greg Hackmann Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/init.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index efd89ce4533d..adf4122502a9 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -120,7 +120,11 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) #ifdef CONFIG_HAVE_ARCH_PFN_VALID int pfn_valid(unsigned long pfn) { - return memblock_is_memory(pfn << PAGE_SHIFT); + phys_addr_t addr = pfn << PAGE_SHIFT; + + if ((addr >> PAGE_SHIFT) != pfn) + return 0; + return memblock_is_memory(addr); } EXPORT_SYMBOL(pfn_valid); #endif From 24fa4a211e2655ba71f0da7e351475bb8f752e93 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Mon, 13 Aug 2018 11:43:51 +0100 Subject: [PATCH 489/519] KVM: arm/arm64: Skip updating PTE entry if no change commit 976d34e2dab10ece5ea8fe7090b7692913f89084 upstream. When there is contention on faulting in a particular page table entry at stage 2, the break-before-make requirement of the architecture can lead to additional refaulting due to TLB invalidation. Avoid this by skipping a page table update if the new value of the PTE matches the previous value. Cc: stable@vger.kernel.org Fixes: d5d8184d35c9 ("KVM: ARM: Memory virtualization setup") Reviewed-by: Suzuki Poulose Acked-by: Christoffer Dall Signed-off-by: Punit Agrawal Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/mmu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index e8835d4e173c..cd1779bf6ef7 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -961,6 +961,10 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, /* Create 2nd stage page table mapping - Level 3 */ old_pte = *pte; if (pte_present(old_pte)) { + /* Skip page table update if there is no change */ + if (pte_val(old_pte) == pte_val(*new_pte)) + return 0; + kvm_set_pte(pte, __pte(0)); kvm_tlb_flush_vmid_ipa(kvm, addr); } else { From d839710da969f2f686802bcbbfb8164261b85833 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Mon, 13 Aug 2018 11:43:50 +0100 Subject: [PATCH 490/519] KVM: arm/arm64: Skip updating PMD entry if no change commit 86658b819cd0a9aa584cd84453ed268a6f013770 upstream. Contention on updating a PMD entry by a large number of vcpus can lead to duplicate work when handling stage 2 page faults. As the page table update follows the break-before-make requirement of the architecture, it can lead to repeated refaults due to clearing the entry and flushing the tlbs. This problem is more likely when - * there are large number of vcpus * the mapping is large block mapping such as when using PMD hugepages (512MB) with 64k pages. Fix this by skipping the page table update if there is no change in the entry being updated. Cc: stable@vger.kernel.org Fixes: ad361f093c1e ("KVM: ARM: Support hugetlbfs backed huge pages") Reviewed-by: Suzuki Poulose Acked-by: Christoffer Dall Signed-off-by: Punit Agrawal Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/mmu.c | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index cd1779bf6ef7..e0267532bd4e 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -892,19 +892,35 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache pmd = stage2_get_pmd(kvm, cache, addr); VM_BUG_ON(!pmd); - /* - * Mapping in huge pages should only happen through a fault. If a - * page is merged into a transparent huge page, the individual - * subpages of that huge page should be unmapped through MMU - * notifiers before we get here. - * - * Merging of CompoundPages is not supported; they should become - * splitting first, unmapped, merged, and mapped back in on-demand. - */ - VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd)); - old_pmd = *pmd; if (pmd_present(old_pmd)) { + /* + * Multiple vcpus faulting on the same PMD entry, can + * lead to them sequentially updating the PMD with the + * same value. Following the break-before-make + * (pmd_clear() followed by tlb_flush()) process can + * hinder forward progress due to refaults generated + * on missing translations. + * + * Skip updating the page table if the entry is + * unchanged. + */ + if (pmd_val(old_pmd) == pmd_val(*new_pmd)) + return 0; + + /* + * Mapping in huge pages should only happen through a + * fault. If a page is merged into a transparent huge + * page, the individual subpages of that huge page + * should be unmapped through MMU notifiers before we + * get here. + * + * Merging of CompoundPages is not supported; they + * should become splitting first, unmapped, merged, + * and mapped back in on-demand. + */ + VM_BUG_ON(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd)); + pmd_clear(pmd); kvm_tlb_flush_vmid_ipa(kvm, addr); } else { From 6a56bd7f2ea31d4c86849b8f67d4e2dc1cb5b788 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 20 Aug 2018 11:58:35 +0200 Subject: [PATCH 491/519] x86/speculation/l1tf: Fix overflow in l1tf_pfn_limit() on 32bit commit 9df9516940a61d29aedf4d91b483ca6597e7d480 upstream. On 32bit PAE kernels on 64bit hardware with enough physical bits, l1tf_pfn_limit() will overflow unsigned long. This in turn affects max_swapfile_size() and can lead to swapon returning -EINVAL. This has been observed in a 32bit guest with 42 bits physical address size, where max_swapfile_size() overflows exactly to 1 << 32, thus zero, and produces the following warning to dmesg: [ 6.396845] Truncating oversized swap area, only using 0k out of 2047996k Fix this by using unsigned long long instead. Fixes: 17dbca119312 ("x86/speculation/l1tf: Add sysfs reporting for l1tf") Fixes: 377eeaa8e11f ("x86/speculation/l1tf: Limit swap file size to MAX_PA/2") Reported-by: Dominique Leuenberger Reported-by: Adrian Schroeter Signed-off-by: Vlastimil Babka Signed-off-by: Thomas Gleixner Acked-by: Andi Kleen Acked-by: Michal Hocko Cc: "H . Peter Anvin" Cc: Linus Torvalds Cc: Dave Hansen Cc: Michal Hocko Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180820095835.5298-1-vbabka@suse.cz Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/processor.h | 4 ++-- arch/x86/mm/init.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index a3a53955f01c..b023f127eee3 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -172,9 +172,9 @@ extern const struct seq_operations cpuinfo_op; extern void cpu_detect(struct cpuinfo_x86 *c); -static inline unsigned long l1tf_pfn_limit(void) +static inline unsigned long long l1tf_pfn_limit(void) { - return BIT(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT) - 1; + return BIT_ULL(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT) - 1; } extern void early_cpu_init(void); diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 4954a6cef50a..3bf0ff0d784a 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -779,7 +779,7 @@ unsigned long max_swapfile_size(void) if (boot_cpu_has_bug(X86_BUG_L1TF)) { /* Limit the swap file size to MAX_PA/2 for L1TF workaround */ - unsigned long l1tf_limit = l1tf_pfn_limit() + 1; + unsigned long long l1tf_limit = l1tf_pfn_limit() + 1; /* * We encode swap offsets also with 3 bits below those for pfn * which makes the usable limit higher. @@ -787,7 +787,7 @@ unsigned long max_swapfile_size(void) #if CONFIG_PGTABLE_LEVELS > 2 l1tf_limit <<= PAGE_SHIFT - SWP_OFFSET_FIRST_BIT; #endif - pages = min_t(unsigned long, l1tf_limit, pages); + pages = min_t(unsigned long long, l1tf_limit, pages); } return pages; } From 7b69cd6fa088e473869512672969e6c490cac1b6 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 23 Aug 2018 15:44:18 +0200 Subject: [PATCH 492/519] x86/speculation/l1tf: Fix off-by-one error when warning that system has too much RAM commit b0a182f875689647b014bc01d36b340217792852 upstream. Two users have reported [1] that they have an "extremely unlikely" system with more than MAX_PA/2 memory and L1TF mitigation is not effective. In fact it's a CPU with 36bits phys limit (64GB) and 32GB memory, but due to holes in the e820 map, the main region is almost 500MB over the 32GB limit: [ 0.000000] BIOS-e820: [mem 0x0000000100000000-0x000000081effffff] usable Suggestions to use 'mem=32G' to enable the L1TF mitigation while losing the 500MB revealed, that there's an off-by-one error in the check in l1tf_select_mitigation(). l1tf_pfn_limit() returns the last usable pfn (inclusive) and the range check in the mitigation path does not take this into account. Instead of amending the range check, make l1tf_pfn_limit() return the first PFN which is over the limit which is less error prone. Adjust the other users accordingly. [1] https://bugzilla.suse.com/show_bug.cgi?id=1105536 Fixes: 17dbca119312 ("x86/speculation/l1tf: Add sysfs reporting for l1tf") Reported-by: George Anchev Reported-by: Christopher Snowhill Signed-off-by: Vlastimil Babka Signed-off-by: Thomas Gleixner Cc: "H . Peter Anvin" Cc: Linus Torvalds Cc: Andi Kleen Cc: Dave Hansen Cc: Michal Hocko Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180823134418.17008-1-vbabka@suse.cz Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/processor.h | 2 +- arch/x86/mm/init.c | 2 +- arch/x86/mm/mmap.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index b023f127eee3..337c52192278 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -174,7 +174,7 @@ extern void cpu_detect(struct cpuinfo_x86 *c); static inline unsigned long long l1tf_pfn_limit(void) { - return BIT_ULL(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT) - 1; + return BIT_ULL(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT); } extern void early_cpu_init(void); diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 3bf0ff0d784a..f00eb52c16a6 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -779,7 +779,7 @@ unsigned long max_swapfile_size(void) if (boot_cpu_has_bug(X86_BUG_L1TF)) { /* Limit the swap file size to MAX_PA/2 for L1TF workaround */ - unsigned long long l1tf_limit = l1tf_pfn_limit() + 1; + unsigned long long l1tf_limit = l1tf_pfn_limit(); /* * We encode swap offsets also with 3 bits below those for pfn * which makes the usable limit higher. diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 9a055ea279eb..528d71b50c3b 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -138,7 +138,7 @@ bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot) /* If it's real memory always allow */ if (pfn_valid(pfn)) return true; - if (pfn > l1tf_pfn_limit() && !capable(CAP_SYS_ADMIN)) + if (pfn >= l1tf_pfn_limit() && !capable(CAP_SYS_ADMIN)) return false; return true; } From 72f6531162bd2f1b57e8114c8358fca507090f41 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 23 Aug 2018 16:21:29 +0200 Subject: [PATCH 493/519] x86/speculation/l1tf: Suggest what to do on systems with too much RAM commit 6a012288d6906fee1dbc244050ade1dafe4a9c8d upstream. Two users have reported [1] that they have an "extremely unlikely" system with more than MAX_PA/2 memory and L1TF mitigation is not effective. Make the warning more helpful by suggesting the proper mem=X kernel boot parameter to make it effective and a link to the L1TF document to help decide if the mitigation is worth the unusable RAM. [1] https://bugzilla.suse.com/show_bug.cgi?id=1105536 Suggested-by: Michal Hocko Signed-off-by: Vlastimil Babka Acked-by: Michal Hocko Cc: "H . Peter Anvin" Cc: Linus Torvalds Cc: Andi Kleen Cc: Dave Hansen Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/966571f0-9d7f-43dc-92c6-a10eec7a1254@suse.cz Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 34e4aaaf03d2..b9e6b60df148 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -654,6 +654,10 @@ static void __init l1tf_select_mitigation(void) half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT; if (e820_any_mapped(half_pa, ULLONG_MAX - half_pa, E820_RAM)) { pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n"); + pr_info("You may make it effective by booting the kernel with mem=%llu parameter.\n", + half_pa); + pr_info("However, doing so will make a part of your RAM unusable.\n"); + pr_info("Reading https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html might help you decide.\n"); return; } From dde9849882e1f53b8d96284b4736f95a49ea3eff Mon Sep 17 00:00:00 2001 From: Rian Hunter Date: Sun, 19 Aug 2018 16:08:53 -0700 Subject: [PATCH 494/519] x86/process: Re-export start_thread() commit dc76803e57cc86589c4efcb5362918f9b0c0436f upstream. The consolidation of the start_thread() functions removed the export unintentionally. This breaks binfmt handlers built as a module. Add it back. Fixes: e634d8fc792c ("x86-64: merge the standard and compat start_thread() functions") Signed-off-by: Rian Hunter Signed-off-by: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Vitaly Kuznetsov Cc: Joerg Roedel Cc: Dmitry Safonov Cc: Josh Poimboeuf Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180819230854.7275-1-rian@alum.mit.edu Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/process_64.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 4cbb60fbff3e..c7cc81e9bb84 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -250,6 +250,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) start_thread_common(regs, new_ip, new_sp, __USER_CS, __USER_DS, 0); } +EXPORT_SYMBOL_GPL(start_thread); #ifdef CONFIG_COMPAT void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp) From a8eaf0fc14c07029a74c7cf97e60fb07d3628226 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Tue, 17 Jul 2018 19:00:33 +0300 Subject: [PATCH 495/519] fuse: Don't access pipe->buffers without pipe_lock() commit a2477b0e67c52f4364a47c3ad70902bc2a61bd4c upstream. fuse_dev_splice_write() reads pipe->buffers to determine the size of 'bufs' array before taking the pipe_lock(). This is not safe as another thread might change the 'pipe->buffers' between the allocation and taking the pipe_lock(). So we end up with too small 'bufs' array. Move the bufs allocations inside pipe_lock()/pipe_unlock() to fix this. Fixes: dd3bb14f44a6 ("fuse: support splice() writing to fuse device") Signed-off-by: Andrey Ryabinin Cc: # v2.6.35 Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dev.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index d0cf1f010fbe..d6d8f8e8847a 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1991,11 +1991,14 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, if (!fud) return -EPERM; - bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL); - if (!bufs) - return -ENOMEM; - pipe_lock(pipe); + + bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL); + if (!bufs) { + pipe_unlock(pipe); + return -ENOMEM; + } + nbuf = 0; rem = 0; for (idx = 0; idx < pipe->nrbufs && rem < len; idx++) From d14dbb5c6c4c068a2117fdd3ae73ade8d490f483 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 26 Jul 2018 16:13:11 +0200 Subject: [PATCH 496/519] fuse: fix double request_end() commit 87114373ea507895a62afb10d2910bd9adac35a8 upstream. Refcounting of request is broken when fuse_abort_conn() is called and request is on the fpq->io list: - ref is taken too late - then it is not dropped Fixes: 0d8e84b0432b ("fuse: simplify request abort") Cc: # v4.2 Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index d6d8f8e8847a..42a6263e7809 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -382,7 +382,7 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) struct fuse_iqueue *fiq = &fc->iq; if (test_and_set_bit(FR_FINISHED, &req->flags)) - return; + goto out_put_req; spin_lock(&fiq->waitq.lock); list_del_init(&req->intr_entry); @@ -412,6 +412,7 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) wake_up(&req->waitq); if (req->end) req->end(fc, req); +out_put_req: fuse_put_request(fc, req); } @@ -2154,6 +2155,7 @@ void fuse_abort_conn(struct fuse_conn *fc) set_bit(FR_ABORTED, &req->flags); if (!test_bit(FR_LOCKED, &req->flags)) { set_bit(FR_PRIVATE, &req->flags); + __fuse_get_request(req); list_move(&req->list, &to_end1); } spin_unlock(&req->waitq.lock); @@ -2180,7 +2182,6 @@ void fuse_abort_conn(struct fuse_conn *fc) while (!list_empty(&to_end1)) { req = list_first_entry(&to_end1, struct fuse_req, list); - __fuse_get_request(req); list_del_init(&req->list); request_end(fc, req); } From 4ded21c9ea1cf8687e33ebe0577f6b4353f5d8c6 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 26 Jul 2018 16:13:11 +0200 Subject: [PATCH 497/519] fuse: fix unlocked access to processing queue commit 45ff350bbd9d0f0977ff270a0d427c71520c0c37 upstream. fuse_dev_release() assumes that it's the only one referencing the fpq->processing list, but that's not true, since fuse_abort_conn() can be doing the same without any serialization between the two. Fixes: c3696046beb3 ("fuse: separate pqueue for clones") Cc: # v4.2 Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dev.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 42a6263e7809..5fc7521e05a4 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -2199,9 +2199,15 @@ int fuse_dev_release(struct inode *inode, struct file *file) if (fud) { struct fuse_conn *fc = fud->fc; struct fuse_pqueue *fpq = &fud->pq; + LIST_HEAD(to_end); + spin_lock(&fpq->lock); WARN_ON(!list_empty(&fpq->io)); - end_requests(fc, &fpq->processing); + list_splice_init(&fpq->processing, &to_end); + spin_unlock(&fpq->lock); + + end_requests(fc, &to_end); + /* Are we the last open device? */ if (atomic_dec_and_test(&fc->dev_count)) { WARN_ON(fc->iq.fasync != NULL); From 4d6ef17a060cb227007a7dec6aa886abc4c01297 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 26 Jul 2018 16:13:11 +0200 Subject: [PATCH 498/519] fuse: umount should wait for all requests commit b8f95e5d13f5f0191dcb4b9113113d241636e7cb upstream. fuse_abort_conn() does not guarantee that all async requests have actually finished aborting (i.e. their ->end() function is called). This could actually result in still used inodes after umount. Add a helper to wait until all requests are fully done. This is done by looking at the "num_waiting" counter. When this counter drops to zero, we can be sure that no more requests are outstanding. Fixes: 0d8e84b0432b ("fuse: simplify request abort") Cc: # v4.2 Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dev.c | 23 +++++++++++++++++++---- fs/fuse/fuse_i.h | 1 + fs/fuse/inode.c | 2 ++ 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 5fc7521e05a4..2671e922c720 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -143,6 +143,16 @@ static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background) return !fc->initialized || (for_background && fc->blocked); } +static void fuse_drop_waiting(struct fuse_conn *fc) +{ + if (fc->connected) { + atomic_dec(&fc->num_waiting); + } else if (atomic_dec_and_test(&fc->num_waiting)) { + /* wake up aborters */ + wake_up_all(&fc->blocked_waitq); + } +} + static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages, bool for_background) { @@ -189,7 +199,7 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages, return req; out: - atomic_dec(&fc->num_waiting); + fuse_drop_waiting(fc); return ERR_PTR(err); } @@ -296,7 +306,7 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) if (test_bit(FR_WAITING, &req->flags)) { __clear_bit(FR_WAITING, &req->flags); - atomic_dec(&fc->num_waiting); + fuse_drop_waiting(fc); } if (req->stolen_file) @@ -382,7 +392,7 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) struct fuse_iqueue *fiq = &fc->iq; if (test_and_set_bit(FR_FINISHED, &req->flags)) - goto out_put_req; + goto put_request; spin_lock(&fiq->waitq.lock); list_del_init(&req->intr_entry); @@ -412,7 +422,7 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) wake_up(&req->waitq); if (req->end) req->end(fc, req); -out_put_req: +put_request: fuse_put_request(fc, req); } @@ -2192,6 +2202,11 @@ void fuse_abort_conn(struct fuse_conn *fc) } EXPORT_SYMBOL_GPL(fuse_abort_conn); +void fuse_wait_aborted(struct fuse_conn *fc) +{ + wait_event(fc->blocked_waitq, atomic_read(&fc->num_waiting) == 0); +} + int fuse_dev_release(struct inode *inode, struct file *file) { struct fuse_dev *fud = fuse_get_dev(file); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 604cd42dafef..7aafe9acc6c0 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -842,6 +842,7 @@ void fuse_request_send_background_locked(struct fuse_conn *fc, /* Abort all requests */ void fuse_abort_conn(struct fuse_conn *fc); +void fuse_wait_aborted(struct fuse_conn *fc); /** * Invalidate inode attributes diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index f0b73e0c6d48..9517154ddd29 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -382,6 +382,8 @@ static void fuse_put_super(struct super_block *sb) fuse_send_destroy(fc); fuse_abort_conn(fc); + fuse_wait_aborted(fc); + mutex_lock(&fuse_mutex); list_del(&fc->entry); fuse_ctl_remove_conn(fc); From c93ce9b00b9aa4bae06b758223ff62ca799522c6 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 26 Jul 2018 16:13:11 +0200 Subject: [PATCH 499/519] fuse: Fix oops at process_init_reply() commit e8f3bd773d22f488724dffb886a1618da85c2966 upstream. syzbot is hitting NULL pointer dereference at process_init_reply(). This is because deactivate_locked_super() is called before response for initial request is processed. Fix this by aborting and waiting for all requests (including FUSE_INIT) before resetting fc->sb. Original patch by Tetsuo Handa . Reported-by: syzbot Fixes: e27c9d3877a0 ("fuse: fuse: add time_gran to INIT_OUT") Cc: # v3.19 Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/inode.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 9517154ddd29..4b2eb65be0d4 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -379,11 +379,6 @@ static void fuse_put_super(struct super_block *sb) { struct fuse_conn *fc = get_fuse_conn_super(sb); - fuse_send_destroy(fc); - - fuse_abort_conn(fc); - fuse_wait_aborted(fc); - mutex_lock(&fuse_mutex); list_del(&fc->entry); fuse_ctl_remove_conn(fc); @@ -1174,16 +1169,25 @@ static struct dentry *fuse_mount(struct file_system_type *fs_type, return mount_nodev(fs_type, flags, raw_data, fuse_fill_super); } -static void fuse_kill_sb_anon(struct super_block *sb) +static void fuse_sb_destroy(struct super_block *sb) { struct fuse_conn *fc = get_fuse_conn_super(sb); if (fc) { + fuse_send_destroy(fc); + + fuse_abort_conn(fc); + fuse_wait_aborted(fc); + down_write(&fc->killsb); fc->sb = NULL; up_write(&fc->killsb); } +} +static void fuse_kill_sb_anon(struct super_block *sb) +{ + fuse_sb_destroy(sb); kill_anon_super(sb); } @@ -1206,14 +1210,7 @@ static struct dentry *fuse_mount_blk(struct file_system_type *fs_type, static void fuse_kill_sb_blk(struct super_block *sb) { - struct fuse_conn *fc = get_fuse_conn_super(sb); - - if (fc) { - down_write(&fc->killsb); - fc->sb = NULL; - up_write(&fc->killsb); - } - + fuse_sb_destroy(sb); kill_block_super(sb); } From 84c7c9010234715c9cb9470b09f41ed12b0d2163 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Thu, 19 Jul 2018 15:49:39 +0300 Subject: [PATCH 500/519] fuse: Add missed unlock_page() to fuse_readpages_fill() commit 109728ccc5933151c68d1106e4065478a487a323 upstream. The above error path returns with page unlocked, so this place seems also to behave the same. Fixes: f8dbdf81821b ("fuse: rework fuse_readpages()") Signed-off-by: Kirill Tkhai Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 1a063cbfe503..8577f3ba6dc6 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -879,6 +879,7 @@ static int fuse_readpages_fill(void *_data, struct page *page) } if (WARN_ON(req->num_pages >= req->max_pages)) { + unlock_page(page); fuse_put_request(fc, req); return -EIO; } From f56d71483ea87fb6a4cdfb642585d66b90cc8742 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 3 Jun 2018 16:40:55 +0200 Subject: [PATCH 501/519] udl-kms: change down_interruptible to down commit 8456b99c16d193c4c3b7df305cf431e027f0189c upstream. If we leave urbs around, it causes not only leak, but also memory corruption. This patch fixes the function udl_free_urb_list, so that it always waits for all urbs that are in progress. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/udl/udl_main.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/gpu/drm/udl/udl_main.c b/drivers/gpu/drm/udl/udl_main.c index 33dbfb2c4748..2d7d7e0b2a23 100644 --- a/drivers/gpu/drm/udl/udl_main.c +++ b/drivers/gpu/drm/udl/udl_main.c @@ -141,18 +141,13 @@ static void udl_free_urb_list(struct drm_device *dev) struct list_head *node; struct urb_node *unode; struct urb *urb; - int ret; unsigned long flags; DRM_DEBUG("Waiting for completes and freeing all render urbs\n"); /* keep waiting and freeing, until we've got 'em all */ while (count--) { - - /* Getting interrupted means a leak, but ok at shutdown*/ - ret = down_interruptible(&udl->urbs.limit_sem); - if (ret) - break; + down(&udl->urbs.limit_sem); spin_lock_irqsave(&udl->urbs.lock, flags); From 377d23bc9d8467cebe2b34902d9726c2683bb355 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 3 Jun 2018 16:40:56 +0200 Subject: [PATCH 502/519] udl-kms: handle allocation failure commit 542bb9788a1f485eb1a2229178f665d8ea166156 upstream. Allocations larger than PAGE_ALLOC_COSTLY_ORDER are unreliable and they may fail anytime. This patch fixes the udl kms driver so that when a large alloactions fails, it tries to do multiple smaller allocations. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/udl/udl_main.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/udl/udl_main.c b/drivers/gpu/drm/udl/udl_main.c index 2d7d7e0b2a23..30bfeb1b2512 100644 --- a/drivers/gpu/drm/udl/udl_main.c +++ b/drivers/gpu/drm/udl/udl_main.c @@ -171,17 +171,22 @@ static void udl_free_urb_list(struct drm_device *dev) static int udl_alloc_urb_list(struct drm_device *dev, int count, size_t size) { struct udl_device *udl = dev->dev_private; - int i = 0; struct urb *urb; struct urb_node *unode; char *buf; + size_t wanted_size = count * size; spin_lock_init(&udl->urbs.lock); +retry: udl->urbs.size = size; INIT_LIST_HEAD(&udl->urbs.list); - while (i < count) { + sema_init(&udl->urbs.limit_sem, 0); + udl->urbs.count = 0; + udl->urbs.available = 0; + + while (udl->urbs.count * size < wanted_size) { unode = kzalloc(sizeof(struct urb_node), GFP_KERNEL); if (!unode) break; @@ -197,11 +202,16 @@ static int udl_alloc_urb_list(struct drm_device *dev, int count, size_t size) } unode->urb = urb; - buf = usb_alloc_coherent(udl->udev, MAX_TRANSFER, GFP_KERNEL, + buf = usb_alloc_coherent(udl->udev, size, GFP_KERNEL, &urb->transfer_dma); if (!buf) { kfree(unode); usb_free_urb(urb); + if (size > PAGE_SIZE) { + size /= 2; + udl_free_urb_list(dev); + goto retry; + } break; } @@ -212,16 +222,14 @@ static int udl_alloc_urb_list(struct drm_device *dev, int count, size_t size) list_add_tail(&unode->entry, &udl->urbs.list); - i++; + up(&udl->urbs.limit_sem); + udl->urbs.count++; + udl->urbs.available++; } - sema_init(&udl->urbs.limit_sem, i); - udl->urbs.count = i; - udl->urbs.available = i; + DRM_DEBUG("allocated %d %d byte urbs\n", udl->urbs.count, (int) size); - DRM_DEBUG("allocated %d %d byte urbs\n", i, (int) size); - - return i; + return udl->urbs.count; } struct urb *udl_get_urb(struct drm_device *dev) From 7030b7046e187ce85e59cc61e10d6c18ecce7104 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 3 Jun 2018 16:40:57 +0200 Subject: [PATCH 503/519] udl-kms: fix crash due to uninitialized memory commit 09a00abe3a9941c2715ca83eb88172cd2f54d8fd upstream. We must use kzalloc when allocating the fb_deferred_io structure. Otherwise, the field first_io is undefined and it causes a crash. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/udl/udl_fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c index 29bd801f5dad..0c648efd9a58 100644 --- a/drivers/gpu/drm/udl/udl_fb.c +++ b/drivers/gpu/drm/udl/udl_fb.c @@ -341,7 +341,7 @@ static int udl_fb_open(struct fb_info *info, int user) struct fb_deferred_io *fbdefio; - fbdefio = kmalloc(sizeof(struct fb_deferred_io), GFP_KERNEL); + fbdefio = kzalloc(sizeof(struct fb_deferred_io), GFP_KERNEL); if (fbdefio) { fbdefio->delay = DL_DEFIO_WRITE_DELAY; From b81b8594d6a2ed81025df895139bae5bd7231295 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 27 Jun 2018 17:36:38 +0200 Subject: [PATCH 504/519] ASoC: dpcm: don't merge format from invalid codec dai commit 4febced15ac8ddb9cf3e603edb111842e4863d9a upstream. When merging codec formats, dpcm_runtime_base_format() should skip the codecs which are not supporting the current stream direction. At the moment, if a BE link has more than one codec, and only one of these codecs has no capture DAI, it becomes impossible to start a capture stream because the merged format would be 0. Skipping invalid codec DAI solves the problem. Fixes: b073ed4e2126 ("ASoC: soc-pcm: DPCM cares BE format") Signed-off-by: Jerome Brunet Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-pcm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 43b80db952d1..f99eb8f44282 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1570,6 +1570,14 @@ static u64 dpcm_runtime_base_format(struct snd_pcm_substream *substream) int i; for (i = 0; i < be->num_codecs; i++) { + /* + * Skip CODECs which don't support the current stream + * type. See soc_pcm_init_runtime_hw() for more details + */ + if (!snd_soc_dai_stream_valid(be->codec_dais[i], + stream)) + continue; + codec_dai_drv = be->codec_dais[i]->driver; if (stream == SNDRV_PCM_STREAM_PLAYBACK) codec_stream = &codec_dai_drv->playback; From 3e053c356194988b1e190c5db71f3fc97bb24e9f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 26 Jul 2018 15:49:10 -0500 Subject: [PATCH 505/519] ASoC: sirf: Fix potential NULL pointer dereference commit ae1c696a480c67c45fb23b35162183f72c6be0e1 upstream. There is a potential execution path in which function platform_get_resource() returns NULL. If this happens, we will end up having a NULL pointer dereference. Fix this by replacing devm_ioremap with devm_ioremap_resource, which has the NULL check and the memory region request. This code was detected with the help of Coccinelle. Cc: stable@vger.kernel.org Fixes: 2bd8d1d5cf89 ("ASoC: sirf: Add audio usp interface driver") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/sirf/sirf-usp.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sound/soc/sirf/sirf-usp.c b/sound/soc/sirf/sirf-usp.c index 45fc06c0e0e5..6b504f407079 100644 --- a/sound/soc/sirf/sirf-usp.c +++ b/sound/soc/sirf/sirf-usp.c @@ -367,10 +367,9 @@ static int sirf_usp_pcm_probe(struct platform_device *pdev) platform_set_drvdata(pdev, usp); mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - base = devm_ioremap(&pdev->dev, mem_res->start, - resource_size(mem_res)); - if (base == NULL) - return -ENOMEM; + base = devm_ioremap_resource(&pdev->dev, mem_res); + if (IS_ERR(base)) + return PTR_ERR(base); usp->regmap = devm_regmap_init_mmio(&pdev->dev, base, &sirf_usp_regmap_config); if (IS_ERR(usp->regmap)) From 738cfe4d2de29c5f1da59a057153bf016db89243 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 13 Jul 2018 17:55:15 +0300 Subject: [PATCH 506/519] pinctrl: freescale: off by one in imx1_pinconf_group_dbg_show() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 19da44cd33a3a6ff7c97fff0189999ff15b241e4 upstream. The info->groups[] array is allocated in imx1_pinctrl_parse_dt(). It has info->ngroups elements. Thus the > here should be >= to prevent reading one element beyond the end of the array. Cc: stable@vger.kernel.org Fixes: 30612cd90005 ("pinctrl: imx1 core driver") Signed-off-by: Dan Carpenter Reviewed-by: Uwe Kleine-König Acked-by: Dong Aisheng Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/freescale/pinctrl-imx1-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/freescale/pinctrl-imx1-core.c b/drivers/pinctrl/freescale/pinctrl-imx1-core.c index acaf84cadca3..6c9420ee9e03 100644 --- a/drivers/pinctrl/freescale/pinctrl-imx1-core.c +++ b/drivers/pinctrl/freescale/pinctrl-imx1-core.c @@ -434,7 +434,7 @@ static void imx1_pinconf_group_dbg_show(struct pinctrl_dev *pctldev, const char *name; int i, ret; - if (group > info->ngroups) + if (group >= info->ngroups) return; seq_puts(s, "\n"); From ed9c4055689cab0908fed0214dc0844babb83a6f Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 27 Aug 2018 14:40:09 -0700 Subject: [PATCH 507/519] x86/irqflags: Mark native_restore_fl extern inline commit 1f59a4581b5ecfe9b4f049a7a2cf904d8352842d upstream. This should have been marked extern inline in order to pick up the out of line definition in arch/x86/kernel/irqflags.S. Fixes: 208cbb325589 ("x86/irqflags: Provide a declaration for native_save_fl") Reported-by: Ben Hutchings Signed-off-by: Nick Desaulniers Signed-off-by: Thomas Gleixner Reviewed-by: Juergen Gross Cc: "H. Peter Anvin" Cc: Boris Ostrovsky Cc: Greg Kroah-Hartman Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180827214011.55428-1-ndesaulniers@google.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/irqflags.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index cb7f04981c6b..8afbdcd3032b 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -28,7 +28,8 @@ extern inline unsigned long native_save_fl(void) return flags; } -static inline void native_restore_fl(unsigned long flags) +extern inline void native_restore_fl(unsigned long flags); +extern inline void native_restore_fl(unsigned long flags) { asm volatile("push %0 ; popf" : /* no output */ From 2edb10cbf21fca9b220a2bdf0b87b7bbeaf1e1e9 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 24 Aug 2018 10:03:51 -0700 Subject: [PATCH 508/519] x86/spectre: Add missing family 6 check to microcode check commit 1ab534e85c93945f7862378d8c8adcf408205b19 upstream. The check for Spectre microcodes does not check for family 6, only the model numbers. Add a family 6 check to avoid ambiguity with other families. Fixes: a5b296636453 ("x86/cpufeature: Blacklist SPEC_CTRL/PRED_CMD on early Spectre v2 microcodes") Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Cc: x86@kernel.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180824170351.34874-2-andi@firstfloor.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/intel.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 4dce22d3cb06..b18fe3d245fe 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -74,6 +74,9 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_HYPERVISOR)) return false; + if (c->x86 != 6) + return false; + for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { if (c->x86_model == spectre_bad_microcodes[i].model && c->x86_mask == spectre_bad_microcodes[i].stepping) From 2502064731b133b87540758678b4485c0dbdc925 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 6 Aug 2018 14:26:39 +0200 Subject: [PATCH 509/519] s390: fix br_r1_trampoline for machines without exrl commit 26f843848bae973817b3587780ce6b7b0200d3e4 upstream. For machines without the exrl instruction the BFP jit generates code that uses an "br %r1" instruction located in the lowcore page. Unfortunately there is a cut & paste error that puts an additional "larl %r1,.+14" instruction in the code that clobbers the branch target address in %r1. Remove the larl instruction. Cc: # v4.17+ Fixes: de5cb6eb51 ("s390: use expoline thunks in the BPF JIT") Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/net/bpf_jit_comp.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index a26528afceb2..727693e283da 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -522,8 +522,6 @@ static void bpf_jit_epilogue(struct bpf_jit *jit) /* br %r1 */ _EMIT2(0x07f1); } else { - /* larl %r1,.+14 */ - EMIT6_PCREL_RILB(0xc0000000, REG_1, jit->prg + 14); /* ex 0,S390_lowcore.br_r1_tampoline */ EMIT4_DISP(0x44000000, REG_0, REG_0, offsetof(struct _lowcore, br_r1_trampoline)); From b629d4650a19d883a594a023603894ba9870ab91 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 16 May 2018 09:37:25 +0200 Subject: [PATCH 510/519] s390/qdio: reset old sbal_state flags commit 64e03ff72623b8c2ea89ca3cb660094e019ed4ae upstream. When allocating a new AOB fails, handle_outbound() is still capable of transmitting the selected buffer (just without async completion). But if a previous transfer on this queue slot used async completion, its sbal_state flags field is still set to QDIO_OUTBUF_STATE_FLAG_PENDING. So when the upper layer driver sees this stale flag, it expects an async completion that never happens. Fix this by unconditionally clearing the flags field. Fixes: 104ea556ee7f ("qdio: support asynchronous delivery of storage blocks") Cc: #v3.2+ Signed-off-by: Julian Wiedmann Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/qdio.h | 1 - drivers/s390/cio/qdio_main.c | 5 ++--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 998b61cd0e56..4b39ba700d32 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -261,7 +261,6 @@ struct qdio_outbuf_state { void *user; }; -#define QDIO_OUTBUF_STATE_FLAG_NONE 0x00 #define QDIO_OUTBUF_STATE_FLAG_PENDING 0x01 #define CHSC_AC1_INITIATE_INPUTQ 0x80 diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 742ca57ece8c..d64b401f3d05 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -640,21 +640,20 @@ static inline unsigned long qdio_aob_for_buffer(struct qdio_output_q *q, unsigned long phys_aob = 0; if (!q->use_cq) - goto out; + return 0; if (!q->aobs[bufnr]) { struct qaob *aob = qdio_allocate_aob(); q->aobs[bufnr] = aob; } if (q->aobs[bufnr]) { - q->sbal_state[bufnr].flags = QDIO_OUTBUF_STATE_FLAG_NONE; q->sbal_state[bufnr].aob = q->aobs[bufnr]; q->aobs[bufnr]->user1 = (u64) q->sbal_state[bufnr].user; phys_aob = virt_to_phys(q->aobs[bufnr]); WARN_ON_ONCE(phys_aob & 0xFF); } -out: + q->sbal_state[bufnr].flags = 0; return phys_aob; } From 5169397d50ed4ca70d273022cc7d8d8f7f68966e Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 13 Aug 2018 11:26:46 +0200 Subject: [PATCH 511/519] s390/pci: fix out of bounds access during irq setup commit 866f3576a72b2233a76dffb80290f8086dc49e17 upstream. During interrupt setup we allocate interrupt vectors, walk the list of msi descriptors, and fill in the message data. Requesting more interrupts than supported on s390 can lead to an out of bounds access. When we restrict the number of interrupts we should also stop walking the msi list after all supported interrupts are handled. Cc: stable@vger.kernel.org Signed-off-by: Sebastian Ott Signed-off-by: Heiko Carstens Signed-off-by: Greg Kroah-Hartman --- arch/s390/pci/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index ef0499b76c50..9a5754d4ee87 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -412,6 +412,8 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) hwirq = 0; for_each_pci_msi_entry(msi, pdev) { rc = -EIO; + if (hwirq >= msi_vecs) + break; irq = irq_alloc_desc(0); /* Alloc irq on node 0 */ if (irq < 0) goto out_msi; From 3051451d49dbe177b85f1f1b8fd57ecd7dc87f21 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 28 Apr 2018 21:35:01 +0900 Subject: [PATCH 512/519] kprobes: Make list and blacklist root user read only commit f2a3ab36077222437b4826fc76111caa14562b7c upstream. Since the blacklist and list files on debugfs indicates a sensitive address information to reader, it should be restricted to the root user. Suggested-by: Thomas Richter Suggested-by: Ingo Molnar Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: Arnd Bergmann Cc: David Howells Cc: David S . Miller Cc: Heiko Carstens Cc: Jon Medhurst Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tobin C . Harding Cc: Will Deacon Cc: acme@kernel.org Cc: akpm@linux-foundation.org Cc: brueckner@linux.vnet.ibm.com Cc: linux-arch@vger.kernel.org Cc: rostedt@goodmis.org Cc: schwidefsky@de.ibm.com Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/lkml/152491890171.9916.5183693615601334087.stgit@devbox Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/kprobes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index bbe9dd0886bd..388bcace62f8 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2441,7 +2441,7 @@ static int __init debugfs_kprobe_init(void) if (!dir) return -ENOMEM; - file = debugfs_create_file("list", 0444, dir, NULL, + file = debugfs_create_file("list", 0400, dir, NULL, &debugfs_kprobes_operations); if (!file) goto error; @@ -2451,7 +2451,7 @@ static int __init debugfs_kprobe_init(void) if (!file) goto error; - file = debugfs_create_file("blacklist", 0444, dir, NULL, + file = debugfs_create_file("blacklist", 0400, dir, NULL, &debugfs_kprobe_blacklist_ops); if (!file) goto error; From 79ffdc486e1eb4316d34394dbc611e693a35401a Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Tue, 15 May 2018 23:33:26 +0100 Subject: [PATCH 513/519] MIPS: Correct the 64-bit DSP accumulator register size commit f5958b4cf4fc38ed4583ab83fb7c4cd1ab05f47b upstream. Use the `unsigned long' rather than `__u32' type for DSP accumulator registers, like with the regular MIPS multiply/divide accumulator and general-purpose registers, as all are 64-bit in 64-bit implementations and using a 32-bit data type leads to contents truncation on context saving. Update `arch_ptrace' and `compat_arch_ptrace' accordingly, removing casts that are similarly not used with multiply/divide accumulator or general-purpose register accesses. Signed-off-by: Maciej W. Rozycki Signed-off-by: Paul Burton Fixes: e50c0a8fa60d ("Support the MIPS32 / MIPS64 DSP ASE.") Patchwork: https://patchwork.linux-mips.org/patch/19329/ Cc: Alexander Viro Cc: James Hogan Cc: Ralf Baechle Cc: linux-fsdevel@vger.kernel.org Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # 2.6.15+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/processor.h | 2 +- arch/mips/kernel/ptrace.c | 2 +- arch/mips/kernel/ptrace32.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h index 041153f5cf93..41a8201d572e 100644 --- a/arch/mips/include/asm/processor.h +++ b/arch/mips/include/asm/processor.h @@ -131,7 +131,7 @@ struct mips_fpu_struct { #define NUM_DSP_REGS 6 -typedef __u32 dspreg_t; +typedef unsigned long dspreg_t; struct mips_dsp_state { dspreg_t dspr[NUM_DSP_REGS]; diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 32fa3ae1a0a6..5a869515b393 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -879,7 +879,7 @@ long arch_ptrace(struct task_struct *child, long request, goto out; } dregs = __get_dsp_regs(child); - tmp = (unsigned long) (dregs[addr - DSP_BASE]); + tmp = dregs[addr - DSP_BASE]; break; } case DSP_CONTROL: diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c index d95117e71f69..286ec2d24d47 100644 --- a/arch/mips/kernel/ptrace32.c +++ b/arch/mips/kernel/ptrace32.c @@ -140,7 +140,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, goto out; } dregs = __get_dsp_regs(child); - tmp = (unsigned long) (dregs[addr - DSP_BASE]); + tmp = dregs[addr - DSP_BASE]; break; } case DSP_CONTROL: From bb190acea829443e0be1cd6dc87837df03b39083 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Tue, 21 Aug 2018 12:12:59 -0700 Subject: [PATCH 514/519] MIPS: lib: Provide MIPS64r6 __multi3() for GCC < 7 commit 690d9163bf4b8563a2682e619f938e6a0443947f upstream. Some versions of GCC suboptimally generate calls to the __multi3() intrinsic for MIPS64r6 builds, resulting in link failures due to the missing function: LD vmlinux.o MODPOST vmlinux.o kernel/bpf/verifier.o: In function `kmalloc_array': include/linux/slab.h:631: undefined reference to `__multi3' fs/select.o: In function `kmalloc_array': include/linux/slab.h:631: undefined reference to `__multi3' ... We already have a workaround for this in which we provide the instrinsic, but we do so selectively for GCC 7 only. Unfortunately the issue occurs with older GCC versions too - it has been observed with both GCC 5.4.0 & GCC 6.4.0. MIPSr6 support was introduced in GCC 5, so all major GCC versions prior to GCC 8 are affected and we extend our workaround accordingly to all MIPS64r6 builds using GCC versions older than GCC 8. Signed-off-by: Paul Burton Reported-by: Vladimir Kondratiev Fixes: ebabcf17bcd7 ("MIPS: Implement __multi3 for GCC7 MIPS64r6 builds") Patchwork: https://patchwork.linux-mips.org/patch/20297/ Cc: James Hogan Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # 4.15+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/lib/multi3.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/mips/lib/multi3.c b/arch/mips/lib/multi3.c index 111ad475aa0c..4c2483f410c2 100644 --- a/arch/mips/lib/multi3.c +++ b/arch/mips/lib/multi3.c @@ -4,12 +4,12 @@ #include "libgcc.h" /* - * GCC 7 suboptimally generates __multi3 calls for mips64r6, so for that - * specific case only we'll implement it here. + * GCC 7 & older can suboptimally generate __multi3 calls for mips64r6, so for + * that specific case only we implement that intrinsic here. * * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82981 */ -#if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6) && (__GNUC__ == 7) +#if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6) && (__GNUC__ < 8) /* multiply 64-bit values, low 64-bits returned */ static inline long long notrace dmulu(long long a, long long b) From 461a6385e58e8247e6ba2005aa5d1b8d980ee4a2 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 2 Aug 2018 10:51:40 -0700 Subject: [PATCH 515/519] scsi: sysfs: Introduce sysfs_{un,}break_active_protection() commit 2afc9166f79b8f6da5f347f48515215ceee4ae37 upstream. Introduce these two functions and export them such that the next patch can add calls to these functions from the SCSI core. Signed-off-by: Bart Van Assche Acked-by: Tejun Heo Acked-by: Greg Kroah-Hartman Cc: Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/file.c | 44 +++++++++++++++++++++++++++++++++++++++++++ include/linux/sysfs.h | 14 ++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 39c75a86c67f..666986b95c5d 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -407,6 +407,50 @@ int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr, } EXPORT_SYMBOL_GPL(sysfs_chmod_file); +/** + * sysfs_break_active_protection - break "active" protection + * @kobj: The kernel object @attr is associated with. + * @attr: The attribute to break the "active" protection for. + * + * With sysfs, just like kernfs, deletion of an attribute is postponed until + * all active .show() and .store() callbacks have finished unless this function + * is called. Hence this function is useful in methods that implement self + * deletion. + */ +struct kernfs_node *sysfs_break_active_protection(struct kobject *kobj, + const struct attribute *attr) +{ + struct kernfs_node *kn; + + kobject_get(kobj); + kn = kernfs_find_and_get(kobj->sd, attr->name); + if (kn) + kernfs_break_active_protection(kn); + return kn; +} +EXPORT_SYMBOL_GPL(sysfs_break_active_protection); + +/** + * sysfs_unbreak_active_protection - restore "active" protection + * @kn: Pointer returned by sysfs_break_active_protection(). + * + * Undo the effects of sysfs_break_active_protection(). Since this function + * calls kernfs_put() on the kernfs node that corresponds to the 'attr' + * argument passed to sysfs_break_active_protection() that attribute may have + * been removed between the sysfs_break_active_protection() and + * sysfs_unbreak_active_protection() calls, it is not safe to access @kn after + * this function has returned. + */ +void sysfs_unbreak_active_protection(struct kernfs_node *kn) +{ + struct kobject *kobj = kn->parent->priv; + + kernfs_unbreak_active_protection(kn); + kernfs_put(kn); + kobject_put(kobj); +} +EXPORT_SYMBOL_GPL(sysfs_unbreak_active_protection); + /** * sysfs_remove_file_ns - remove an object attribute with a custom ns tag * @kobj: object we're acting for diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 00a1f330f93a..d3c19f8c4564 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -238,6 +238,9 @@ int __must_check sysfs_create_files(struct kobject *kobj, const struct attribute **attr); int __must_check sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr, umode_t mode); +struct kernfs_node *sysfs_break_active_protection(struct kobject *kobj, + const struct attribute *attr); +void sysfs_unbreak_active_protection(struct kernfs_node *kn); void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, const void *ns); bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr); @@ -351,6 +354,17 @@ static inline int sysfs_chmod_file(struct kobject *kobj, return 0; } +static inline struct kernfs_node * +sysfs_break_active_protection(struct kobject *kobj, + const struct attribute *attr) +{ + return NULL; +} + +static inline void sysfs_unbreak_active_protection(struct kernfs_node *kn) +{ +} + static inline void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, const void *ns) From a49097fb388388cc4b2bf8d3f42693eb594b2484 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 2 Aug 2018 10:51:41 -0700 Subject: [PATCH 516/519] scsi: core: Avoid that SCSI device removal through sysfs triggers a deadlock commit 0ee223b2e1f67cb2de9c0e3247c510d846e74d63 upstream. A long time ago the unfortunate decision was taken to add a self-deletion attribute to the sysfs SCSI device directory. That decision was unfortunate because self-deletion is really tricky. We can't drop that attribute because widely used user space software depends on it, namely the rescan-scsi-bus.sh script. Hence this patch that avoids that writing into that attribute triggers a deadlock. See also commit 7973cbd9fbd9 ("[PATCH] add sysfs attributes to scan and delete scsi_devices"). This patch avoids that self-removal triggers the following deadlock: ====================================================== WARNING: possible circular locking dependency detected 4.18.0-rc2-dbg+ #5 Not tainted ------------------------------------------------------ modprobe/6539 is trying to acquire lock: 000000008323c4cd (kn->count#202){++++}, at: kernfs_remove_by_name_ns+0x45/0x90 but task is already holding lock: 00000000a6ec2c69 (&shost->scan_mutex){+.+.}, at: scsi_remove_host+0x21/0x150 [scsi_mod] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&shost->scan_mutex){+.+.}: __mutex_lock+0xfe/0xc70 mutex_lock_nested+0x1b/0x20 scsi_remove_device+0x26/0x40 [scsi_mod] sdev_store_delete+0x27/0x30 [scsi_mod] dev_attr_store+0x3e/0x50 sysfs_kf_write+0x87/0xa0 kernfs_fop_write+0x190/0x230 __vfs_write+0xd2/0x3b0 vfs_write+0x101/0x270 ksys_write+0xab/0x120 __x64_sys_write+0x43/0x50 do_syscall_64+0x77/0x230 entry_SYSCALL_64_after_hwframe+0x49/0xbe -> #0 (kn->count#202){++++}: lock_acquire+0xd2/0x260 __kernfs_remove+0x424/0x4a0 kernfs_remove_by_name_ns+0x45/0x90 remove_files.isra.1+0x3a/0x90 sysfs_remove_group+0x5c/0xc0 sysfs_remove_groups+0x39/0x60 device_remove_attrs+0x82/0xb0 device_del+0x251/0x580 __scsi_remove_device+0x19f/0x1d0 [scsi_mod] scsi_forget_host+0x37/0xb0 [scsi_mod] scsi_remove_host+0x9b/0x150 [scsi_mod] sdebug_driver_remove+0x4b/0x150 [scsi_debug] device_release_driver_internal+0x241/0x360 device_release_driver+0x12/0x20 bus_remove_device+0x1bc/0x290 device_del+0x259/0x580 device_unregister+0x1a/0x70 sdebug_remove_adapter+0x8b/0xf0 [scsi_debug] scsi_debug_exit+0x76/0xe8 [scsi_debug] __x64_sys_delete_module+0x1c1/0x280 do_syscall_64+0x77/0x230 entry_SYSCALL_64_after_hwframe+0x49/0xbe other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&shost->scan_mutex); lock(kn->count#202); lock(&shost->scan_mutex); lock(kn->count#202); *** DEADLOCK *** 2 locks held by modprobe/6539: #0: 00000000efaf9298 (&dev->mutex){....}, at: device_release_driver_internal+0x68/0x360 #1: 00000000a6ec2c69 (&shost->scan_mutex){+.+.}, at: scsi_remove_host+0x21/0x150 [scsi_mod] stack backtrace: CPU: 10 PID: 6539 Comm: modprobe Not tainted 4.18.0-rc2-dbg+ #5 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014 Call Trace: dump_stack+0xa4/0xf5 print_circular_bug.isra.34+0x213/0x221 __lock_acquire+0x1a7e/0x1b50 lock_acquire+0xd2/0x260 __kernfs_remove+0x424/0x4a0 kernfs_remove_by_name_ns+0x45/0x90 remove_files.isra.1+0x3a/0x90 sysfs_remove_group+0x5c/0xc0 sysfs_remove_groups+0x39/0x60 device_remove_attrs+0x82/0xb0 device_del+0x251/0x580 __scsi_remove_device+0x19f/0x1d0 [scsi_mod] scsi_forget_host+0x37/0xb0 [scsi_mod] scsi_remove_host+0x9b/0x150 [scsi_mod] sdebug_driver_remove+0x4b/0x150 [scsi_debug] device_release_driver_internal+0x241/0x360 device_release_driver+0x12/0x20 bus_remove_device+0x1bc/0x290 device_del+0x259/0x580 device_unregister+0x1a/0x70 sdebug_remove_adapter+0x8b/0xf0 [scsi_debug] scsi_debug_exit+0x76/0xe8 [scsi_debug] __x64_sys_delete_module+0x1c1/0x280 do_syscall_64+0x77/0x230 entry_SYSCALL_64_after_hwframe+0x49/0xbe See also https://www.mail-archive.com/linux-scsi@vger.kernel.org/msg54525.html. Fixes: ac0ece9174ac ("scsi: use device_remove_file_self() instead of device_schedule_callback()") Signed-off-by: Bart Van Assche Cc: Greg Kroah-Hartman Acked-by: Tejun Heo Cc: Johannes Thumshirn Cc: Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_sysfs.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 4477e999ec70..8db0c48943d6 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -678,8 +678,24 @@ static ssize_t sdev_store_delete(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - if (device_remove_file_self(dev, attr)) - scsi_remove_device(to_scsi_device(dev)); + struct kernfs_node *kn; + + kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); + WARN_ON_ONCE(!kn); + /* + * Concurrent writes into the "delete" sysfs attribute may trigger + * concurrent calls to device_remove_file() and scsi_remove_device(). + * device_remove_file() handles concurrent removal calls by + * serializing these and by ignoring the second and later removal + * attempts. Concurrent calls of scsi_remove_device() are + * serialized. The second and later calls of scsi_remove_device() are + * ignored because the first call of that function changes the device + * state into SDEV_DEL. + */ + device_remove_file(dev, attr); + scsi_remove_device(to_scsi_device(dev)); + if (kn) + sysfs_unbreak_active_protection(kn); return count; }; static DEVICE_ATTR(delete, S_IWUSR, NULL, sdev_store_delete); From aba1ba97e64f2e6971df812fc45985222356cdd5 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 26 Jul 2018 12:13:49 -0500 Subject: [PATCH 517/519] iscsi target: fix session creation failure handling commit 26abc916a898d34c5ad159315a2f683def3c5555 upstream. The problem is that iscsi_login_zero_tsih_s1 sets conn->sess early in iscsi_login_set_conn_values. If the function fails later like when we alloc the idr it does kfree(sess) and leaves the conn->sess pointer set. iscsi_login_zero_tsih_s1 then returns -Exyz and we then call iscsi_target_login_sess_out and access the freed memory. This patch has iscsi_login_zero_tsih_s1 either completely setup the session or completely tear it down, so later in iscsi_target_login_sess_out we can just check for it being set to the connection. Cc: stable@vger.kernel.org Fixes: 0957627a9960 ("iscsi-target: Fix sess allocation leak in...") Signed-off-by: Mike Christie Acked-by: Martin K. Petersen Signed-off-by: Matthew Wilcox Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target_login.c | 35 ++++++++++++++--------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index bc2cbffec27e..63e54beed196 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -323,8 +323,7 @@ static int iscsi_login_zero_tsih_s1( pr_err("idr_alloc() for sess_idr failed\n"); iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, ISCSI_LOGIN_STATUS_NO_RESOURCES); - kfree(sess); - return -ENOMEM; + goto free_sess; } sess->creation_time = get_jiffies_64(); @@ -340,20 +339,28 @@ static int iscsi_login_zero_tsih_s1( ISCSI_LOGIN_STATUS_NO_RESOURCES); pr_err("Unable to allocate memory for" " struct iscsi_sess_ops.\n"); - kfree(sess); - return -ENOMEM; + goto remove_idr; } sess->se_sess = transport_init_session(TARGET_PROT_NORMAL); if (IS_ERR(sess->se_sess)) { iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, ISCSI_LOGIN_STATUS_NO_RESOURCES); - kfree(sess->sess_ops); - kfree(sess); - return -ENOMEM; + goto free_ops; } return 0; + +free_ops: + kfree(sess->sess_ops); +remove_idr: + spin_lock_bh(&sess_idr_lock); + idr_remove(&sess_idr, sess->session_index); + spin_unlock_bh(&sess_idr_lock); +free_sess: + kfree(sess); + conn->sess = NULL; + return -ENOMEM; } static int iscsi_login_zero_tsih_s2( @@ -1142,13 +1149,13 @@ void iscsi_target_login_sess_out(struct iscsi_conn *conn, ISCSI_LOGIN_STATUS_INIT_ERR); if (!zero_tsih || !conn->sess) goto old_sess_out; - if (conn->sess->se_sess) - transport_free_session(conn->sess->se_sess); - if (conn->sess->session_index != 0) { - spin_lock_bh(&sess_idr_lock); - idr_remove(&sess_idr, conn->sess->session_index); - spin_unlock_bh(&sess_idr_lock); - } + + transport_free_session(conn->sess->se_sess); + + spin_lock_bh(&sess_idr_lock); + idr_remove(&sess_idr, conn->sess->session_index); + spin_unlock_bh(&sess_idr_lock); + kfree(conn->sess->sess_ops); kfree(conn->sess); conn->sess = NULL; From eb08a285899df82056f712421988c6ccd0b58ba6 Mon Sep 17 00:00:00 2001 From: Scott Bauer Date: Thu, 26 Apr 2018 11:51:08 -0600 Subject: [PATCH 518/519] cdrom: Fix info leak/OOB read in cdrom_ioctl_drive_status commit 8f3fafc9c2f0ece10832c25f7ffcb07c97a32ad4 upstream. Like d88b6d04: "cdrom: information leak in cdrom_ioctl_media_changed()" There is another cast from unsigned long to int which causes a bounds check to fail with specially crafted input. The value is then used as an index in the slot array in cdrom_slot_status(). Signed-off-by: Scott Bauer Signed-off-by: Scott Bauer Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/cdrom/cdrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 0151039bff05..1012b2cb6a16 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2526,7 +2526,7 @@ static int cdrom_ioctl_drive_status(struct cdrom_device_info *cdi, if (!CDROM_CAN(CDC_SELECT_DISC) || (arg == CDSL_CURRENT || arg == CDSL_NONE)) return cdi->ops->drive_status(cdi, CDSL_CURRENT); - if (((int)arg >= cdi->capacity)) + if (arg >= cdi->capacity) return -EINVAL; return cdrom_slot_status(cdi, arg); } From 4d9339a1f0e6e288903d34020f2378d250ea3cac Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 5 Sep 2018 09:18:41 +0200 Subject: [PATCH 519/519] Linux 4.4.154 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 208a813be615..b184286cf7e6 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 153 +SUBLEVEL = 154 EXTRAVERSION = NAME = Blurry Fish Butt