From 49e065f5049f2b8d69f29ddc641498f46108dd12 Mon Sep 17 00:00:00 2001
From: Lan Tianyu <tianyu.lan@intel.com>
Date: Thu, 21 Dec 2017 21:10:36 -0500
Subject: [PATCH 001/519] KVM/Eventfd: Avoid crash when assign and deassign
 specific eventfd in parallel.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit b5020a8e6b54d2ece80b1e7dedb33c79a40ebd47 upstream.

Syzbot reports crashes in kvm_irqfd_assign(), caused by use-after-free
when kvm_irqfd_assign() and kvm_irqfd_deassign() run in parallel
for one specific eventfd. When the assign path hasn't finished but irqfd
has been added to kvm->irqfds.items list, another thead may deassign the
eventfd and free struct kvm_kernel_irqfd(). The assign path then uses
the struct kvm_kernel_irqfd that has been freed by deassign path. To avoid
such issue, keep irqfd under kvm->irq_srcu protection after the irqfd
has been added to kvm->irqfds.items list, and call synchronize_srcu()
in irq_shutdown() to make sure that irqfd has been fully initialized in
the assign path.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Tianyu Lan <tianyu.lan@intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 virt/kvm/eventfd.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 49001fa84ead..1203829316b2 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -119,8 +119,12 @@ irqfd_shutdown(struct work_struct *work)
 {
 	struct kvm_kernel_irqfd *irqfd =
 		container_of(work, struct kvm_kernel_irqfd, shutdown);
+	struct kvm *kvm = irqfd->kvm;
 	u64 cnt;
 
+	/* Make sure irqfd has been initalized in assign path. */
+	synchronize_srcu(&kvm->irq_srcu);
+
 	/*
 	 * Synchronize with the wait-queue and unhook ourselves to prevent
 	 * further events.
@@ -387,7 +391,6 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 
 	idx = srcu_read_lock(&kvm->irq_srcu);
 	irqfd_update(kvm, irqfd);
-	srcu_read_unlock(&kvm->irq_srcu, idx);
 
 	list_add_tail(&irqfd->list, &kvm->irqfds.items);
 
@@ -419,6 +422,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 				irqfd->consumer.token, ret);
 #endif
 
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 	return 0;
 
 fail:

From d0f4cd75aee1d5e55303de347f5a89431480ec7b Mon Sep 17 00:00:00 2001
From: Dewet Thibaut <thibaut.dewet@nokia.com>
Date: Mon, 16 Jul 2018 10:49:27 +0200
Subject: [PATCH 002/519] x86/MCE: Remove min interval polling limitation

commit fbdb328c6bae0a7c78d75734a738b66b86dffc96 upstream.

commit b3b7c4795c ("x86/MCE: Serialize sysfs changes") introduced a min
interval limitation when setting the check interval for polled MCEs.
However, the logic is that 0 disables polling for corrected MCEs, see
Documentation/x86/x86_64/machinecheck. The limitation prevents disabling.

Remove this limitation and allow the value 0 to disable polling again.

Fixes: b3b7c4795c ("x86/MCE: Serialize sysfs changes")
Signed-off-by: Dewet Thibaut <thibaut.dewet@nokia.com>
Signed-off-by: Alexander Sverdlin <alexander.sverdlin@nokia.com>
[ Massage commit message. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20180716084927.24869-1-alexander.sverdlin@nokia.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/mcheck/mce.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index ddc9b8125918..7b8c8c838191 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -2294,9 +2294,6 @@ static ssize_t store_int_with_restart(struct device *s,
 	if (check_interval == old_check_interval)
 		return ret;
 
-	if (check_interval < 1)
-		check_interval = 1;
-
 	mutex_lock(&mce_sysfs_mutex);
 	mce_restart();
 	mutex_unlock(&mce_sysfs_mutex);

From 0cb6eaf5e5be88ddfda51100adc7149d00a28bb2 Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Fri, 20 Jul 2018 17:53:42 -0700
Subject: [PATCH 003/519] fat: fix memory allocation failure handling of
 match_strdup()

commit 35033ab988c396ad7bce3b6d24060c16a9066db8 upstream.

In parse_options(), if match_strdup() failed, parse_options() leaves
opts->iocharset in unexpected state (i.e.  still pointing the freed
string).  And this can be the cause of double free.

To fix, this initialize opts->iocharset always when freeing.

Link: http://lkml.kernel.org/r/8736wp9dzc.fsf@mail.parknet.co.jp
Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Reported-by: syzbot+90b8e10515ae88228a92@syzkaller.appspotmail.com
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/fat/inode.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index cf644d52c0cf..c81cfb79a339 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -613,13 +613,21 @@ static void fat_set_state(struct super_block *sb,
 	brelse(bh);
 }
 
+static void fat_reset_iocharset(struct fat_mount_options *opts)
+{
+	if (opts->iocharset != fat_default_iocharset) {
+		/* Note: opts->iocharset can be NULL here */
+		kfree(opts->iocharset);
+		opts->iocharset = fat_default_iocharset;
+	}
+}
+
 static void delayed_free(struct rcu_head *p)
 {
 	struct msdos_sb_info *sbi = container_of(p, struct msdos_sb_info, rcu);
 	unload_nls(sbi->nls_disk);
 	unload_nls(sbi->nls_io);
-	if (sbi->options.iocharset != fat_default_iocharset)
-		kfree(sbi->options.iocharset);
+	fat_reset_iocharset(&sbi->options);
 	kfree(sbi);
 }
 
@@ -1034,7 +1042,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
 	opts->fs_fmask = opts->fs_dmask = current_umask();
 	opts->allow_utime = -1;
 	opts->codepage = fat_default_codepage;
-	opts->iocharset = fat_default_iocharset;
+	fat_reset_iocharset(opts);
 	if (is_vfat) {
 		opts->shortname = VFAT_SFN_DISPLAY_WINNT|VFAT_SFN_CREATE_WIN95;
 		opts->rodir = 0;
@@ -1184,8 +1192,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
 
 		/* vfat specific */
 		case Opt_charset:
-			if (opts->iocharset != fat_default_iocharset)
-				kfree(opts->iocharset);
+			fat_reset_iocharset(opts);
 			iocharset = match_strdup(&args[0]);
 			if (!iocharset)
 				return -ENOMEM;
@@ -1776,8 +1783,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
 		iput(fat_inode);
 	unload_nls(sbi->nls_io);
 	unload_nls(sbi->nls_disk);
-	if (sbi->options.iocharset != fat_default_iocharset)
-		kfree(sbi->options.iocharset);
+	fat_reset_iocharset(&sbi->options);
 	sb->s_fs_info = NULL;
 	kfree(sbi);
 	return error;

From 01b6ca65e10f2669965fbc62440cb9b09a25d086 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 17 Jul 2018 17:26:43 +0200
Subject: [PATCH 004/519] ALSA: rawmidi: Change resized buffers atomically

commit 39675f7a7c7e7702f7d5341f1e0d01db746543a0 upstream.

The SNDRV_RAWMIDI_IOCTL_PARAMS ioctl may resize the buffers and the
current code is racy.  For example, the sequencer client may write to
buffer while it being resized.

As a simple workaround, let's switch to the resized buffer inside the
stream runtime lock.

Reported-by: syzbot+52f83f0ea8df16932f7f@syzkaller.appspotmail.com
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/core/rawmidi.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
index 16f8124b1150..59111cadaec2 100644
--- a/sound/core/rawmidi.c
+++ b/sound/core/rawmidi.c
@@ -635,7 +635,7 @@ static int snd_rawmidi_info_select_user(struct snd_card *card,
 int snd_rawmidi_output_params(struct snd_rawmidi_substream *substream,
 			      struct snd_rawmidi_params * params)
 {
-	char *newbuf;
+	char *newbuf, *oldbuf;
 	struct snd_rawmidi_runtime *runtime = substream->runtime;
 	
 	if (substream->append && substream->use_count > 1)
@@ -648,13 +648,17 @@ int snd_rawmidi_output_params(struct snd_rawmidi_substream *substream,
 		return -EINVAL;
 	}
 	if (params->buffer_size != runtime->buffer_size) {
-		newbuf = krealloc(runtime->buffer, params->buffer_size,
-				  GFP_KERNEL);
+		newbuf = kmalloc(params->buffer_size, GFP_KERNEL);
 		if (!newbuf)
 			return -ENOMEM;
+		spin_lock_irq(&runtime->lock);
+		oldbuf = runtime->buffer;
 		runtime->buffer = newbuf;
 		runtime->buffer_size = params->buffer_size;
 		runtime->avail = runtime->buffer_size;
+		runtime->appl_ptr = runtime->hw_ptr = 0;
+		spin_unlock_irq(&runtime->lock);
+		kfree(oldbuf);
 	}
 	runtime->avail_min = params->avail_min;
 	substream->active_sensing = !params->no_active_sensing;
@@ -665,7 +669,7 @@ EXPORT_SYMBOL(snd_rawmidi_output_params);
 int snd_rawmidi_input_params(struct snd_rawmidi_substream *substream,
 			     struct snd_rawmidi_params * params)
 {
-	char *newbuf;
+	char *newbuf, *oldbuf;
 	struct snd_rawmidi_runtime *runtime = substream->runtime;
 
 	snd_rawmidi_drain_input(substream);
@@ -676,12 +680,16 @@ int snd_rawmidi_input_params(struct snd_rawmidi_substream *substream,
 		return -EINVAL;
 	}
 	if (params->buffer_size != runtime->buffer_size) {
-		newbuf = krealloc(runtime->buffer, params->buffer_size,
-				  GFP_KERNEL);
+		newbuf = kmalloc(params->buffer_size, GFP_KERNEL);
 		if (!newbuf)
 			return -ENOMEM;
+		spin_lock_irq(&runtime->lock);
+		oldbuf = runtime->buffer;
 		runtime->buffer = newbuf;
 		runtime->buffer_size = params->buffer_size;
+		runtime->appl_ptr = runtime->hw_ptr = 0;
+		spin_unlock_irq(&runtime->lock);
+		kfree(oldbuf);
 	}
 	runtime->avail_min = params->avail_min;
 	return 0;

From b3e0971a733e67d15aa114a4b42b4f4e67618b55 Mon Sep 17 00:00:00 2001
From: Alexey Brodkin <abrodkin@synopsys.com>
Date: Thu, 28 Jun 2018 16:59:14 -0700
Subject: [PATCH 005/519] ARC: Fix CONFIG_SWAP

commit 6e3761145a9ba3ce267c330b6bff51cf6a057b06 upstream.

swap was broken on ARC due to silly copy-paste issue.

We encode offset from swapcache page in __swp_entry() as (off << 13) but
were not decoding back in __swp_offset() as (off >> 13) - it was still
(off << 13).

This finally fixes swap usage on ARC.

| # mkswap /dev/sda2
|
| # swapon -a -e /dev/sda2
| Adding 500728k swap on /dev/sda2.  Priority:-2 extents:1 across:500728k
|
| # free
|              total       used       free     shared    buffers     cached
| Mem:        765104      13456     751648       4736          8       4736
| -/+ buffers/cache:       8712     756392
| Swap:       500728          0     500728

Cc: stable@vger.kernel.org
Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arc/include/asm/pgtable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index e5fec320f158..c07d7b0a4058 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -372,7 +372,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
 
 /* Decode a PTE containing swap "identifier "into constituents */
 #define __swp_type(pte_lookalike)	(((pte_lookalike).val) & 0x1f)
-#define __swp_offset(pte_lookalike)	((pte_lookalike).val << 13)
+#define __swp_offset(pte_lookalike)	((pte_lookalike).val >> 13)
 
 /* NOPs, to keep generic kernel happy */
 #define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })

From cfebfe7a80e35b582fd8d7f6400f9a1d10003583 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Wed, 11 Jul 2018 10:42:20 -0700
Subject: [PATCH 006/519] ARC: mm: allow mprotect to make stack mappings
 executable

commit 93312b6da4df31e4102ce5420e6217135a16c7ea upstream.

mprotect(EXEC) was failing for stack mappings as default vm flags was
missing MAYEXEC.

This was triggered by glibc test suite nptl/tst-execstack testcase

What is surprising is that despite running LTP for years on, we didn't
catch this issue as it lacks a directed test case.

gcc dejagnu tests with nested functions also requiring exec stack work
fine though because they rely on the GNU_STACK segment spit out by
compiler and handled in kernel elf loader.

This glibc case is different as the stack is non exec to begin with and
a dlopen of shared lib with GNU_STACK segment triggers the exec stack
proceedings using a mprotect(PROT_EXEC) which was broken.

CC: stable@vger.kernel.org
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arc/include/asm/page.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h
index 429957f1c236..8f1145ed0046 100644
--- a/arch/arc/include/asm/page.h
+++ b/arch/arc/include/asm/page.h
@@ -102,7 +102,7 @@ typedef pte_t * pgtable_t;
 #define virt_addr_valid(kaddr)  pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
 
 /* Default Permissions for stack/heaps pages (Non Executable) */
-#define VM_DATA_DEFAULT_FLAGS   (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE)
+#define VM_DATA_DEFAULT_FLAGS   (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
 #define WANT_PAGE_VIRTUAL   1
 

From 08a0dc770c40f9d28c28d21c4728a329e489a57b Mon Sep 17 00:00:00 2001
From: Jing Xia <jing.xia.mail@gmail.com>
Date: Fri, 20 Jul 2018 17:53:48 -0700
Subject: [PATCH 007/519] mm: memcg: fix use after free in mem_cgroup_iter()

commit 9f15bde671355c351cf20d9f879004b234353100 upstream.

It was reported that a kernel crash happened in mem_cgroup_iter(), which
can be triggered if the legacy cgroup-v1 non-hierarchical mode is used.

Unable to handle kernel paging request at virtual address 6b6b6b6b6b6b8f
......
Call trace:
  mem_cgroup_iter+0x2e0/0x6d4
  shrink_zone+0x8c/0x324
  balance_pgdat+0x450/0x640
  kswapd+0x130/0x4b8
  kthread+0xe8/0xfc
  ret_from_fork+0x10/0x20

  mem_cgroup_iter():
      ......
      if (css_tryget(css))    <-- crash here
	    break;
      ......

The crashing reason is that mem_cgroup_iter() uses the memcg object whose
pointer is stored in iter->position, which has been freed before and
filled with POISON_FREE(0x6b).

And the root cause of the use-after-free issue is that
invalidate_reclaim_iterators() fails to reset the value of iter->position
to NULL when the css of the memcg is released in non- hierarchical mode.

Link: http://lkml.kernel.org/r/1531994807-25639-1-git-send-email-jing.xia@unisoc.com
Fixes: 6df38689e0e9 ("mm: memcontrol: fix possible memcg leak due to interrupted reclaim")
Signed-off-by: Jing Xia <jing.xia.mail@gmail.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: <chunyan.zhang@unisoc.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/memcontrol.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 55a9facb8e8d..9a8e688724b1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -996,7 +996,7 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
 	int nid, zid;
 	int i;
 
-	while ((memcg = parent_mem_cgroup(memcg))) {
+	for (; memcg; memcg = parent_mem_cgroup(memcg)) {
 		for_each_node(nid) {
 			for (zid = 0; zid < MAX_NR_ZONES; zid++) {
 				mz = &memcg->nodeinfo[nid]->zoneinfo[zid];

From 5a95ecebc7083533c8ac384146de991c29145aee Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@canonical.com>
Date: Thu, 5 Jul 2018 18:49:23 +0000
Subject: [PATCH 008/519] ipv4: Return EINVAL when ping_group_range sysctl
 doesn't map to user ns

[ Upstream commit 70ba5b6db96ff7324b8cfc87e0d0383cf59c9677 ]

The low and high values of the net.ipv4.ping_group_range sysctl were
being silently forced to the default disabled state when a write to the
sysctl contained GIDs that didn't map to the associated user namespace.
Confusingly, the sysctl's write operation would return success and then
a subsequent read of the sysctl would indicate that the low and high
values are the overflowgid.

This patch changes the behavior by clearly returning an error when the
sysctl write operation receives a GID range that doesn't map to the
associated user namespace. In such a situation, the previous value of
the sysctl is preserved and that range will be returned in a subsequent
read of the sysctl.

Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/sysctl_net_ipv4.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 75abf978ef30..da90c74d12ef 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -141,8 +141,9 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write,
 	if (write && ret == 0) {
 		low = make_kgid(user_ns, urange[0]);
 		high = make_kgid(user_ns, urange[1]);
-		if (!gid_valid(low) || !gid_valid(high) ||
-		    (urange[1] < urange[0]) || gid_lt(high, low)) {
+		if (!gid_valid(low) || !gid_valid(high))
+			return -EINVAL;
+		if (urange[1] < urange[0] || gid_lt(high, low)) {
 			low = make_kgid(&init_user_ns, 1);
 			high = make_kgid(&init_user_ns, 0);
 		}

From 2be7797acd1bb9ec30920b2aac29e474184ede4e Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 17 Jul 2018 17:12:39 +0100
Subject: [PATCH 009/519] ipv6: fix useless rol32 call on hash

[ Upstream commit 169dc027fb02492ea37a0575db6a658cf922b854 ]

The rol32 call is currently rotating hash but the rol'd value is
being discarded. I believe the current code is incorrect and hash
should be assigned the rotated value returned from rol32.

Thanks to David Lebrun for spotting this.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/ipv6.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 84f0d0602433..0e01d570fa22 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -762,7 +762,7 @@ static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
 	 * to minimize possbility that any useful information to an
 	 * attacker is leaked. Only lower 20 bits are relevant.
 	 */
-	rol32(hash, 16);
+	hash = rol32(hash, 16);
 
 	flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK;
 

From b67a684222441668cf326427d02c34a8dfedb6be Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Mon, 16 Jul 2018 13:26:13 -0700
Subject: [PATCH 010/519] lib/rhashtable: consider param->min_size when setting
 initial table size

[ Upstream commit 107d01f5ba10f4162c38109496607eb197059064 ]

rhashtable_init() currently does not take into account the user-passed
min_size parameter unless param->nelem_hint is set as well. As such,
the default size (number of buckets) will always be HASH_DEFAULT_SIZE
even if the smallest allowed size is larger than that. Remediate this
by unconditionally calling into rounded_hashtable_size() and handling
things accordingly.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/rhashtable.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 51282f579760..37ea94b636a3 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -670,8 +670,16 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
 
 static size_t rounded_hashtable_size(const struct rhashtable_params *params)
 {
-	return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
-		   (unsigned long)params->min_size);
+	size_t retsize;
+
+	if (params->nelem_hint)
+		retsize = max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
+			      (unsigned long)params->min_size);
+	else
+		retsize = max(HASH_DEFAULT_SIZE,
+			      (unsigned long)params->min_size);
+
+	return retsize;
 }
 
 static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
@@ -728,8 +736,6 @@ int rhashtable_init(struct rhashtable *ht,
 	struct bucket_table *tbl;
 	size_t size;
 
-	size = HASH_DEFAULT_SIZE;
-
 	if ((!params->key_len && !params->obj_hashfn) ||
 	    (params->obj_hashfn && !params->obj_cmpfn))
 		return -EINVAL;
@@ -756,8 +762,7 @@ int rhashtable_init(struct rhashtable *ht,
 
 	ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE);
 
-	if (params->nelem_hint)
-		size = rounded_hashtable_size(&ht->p);
+	size = rounded_hashtable_size(&ht->p);
 
 	/* The maximum (not average) chain length grows with the
 	 * size of the hash table, at a rate of (log N)/(log log N).

From be64f9f7a253184b733072ccd69a90350e86c46d Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Sat, 7 Jul 2018 16:15:26 -0700
Subject: [PATCH 011/519] net/ipv4: Set oif in fib_compute_spec_dst

[ Upstream commit e7372197e15856ec4ee66b668020a662994db103 ]

Xin reported that icmp replies may not use the address on the device the
echo request is received if the destination address is broadcast. Instead
a route lookup is done without considering VRF context. Fix by setting
oif in flow struct to the master device if it is enslaved. That directs
the lookup to the VRF table. If the device is not enslaved, oif is still
0 so no affect.

Fixes: cd2fbe1b6b51 ("net: Use VRF device index for lookups on RX")
Reported-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/fib_frontend.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index c9e68ff48a72..8f05816a8be2 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -297,6 +297,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
 	if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) {
 		struct flowi4 fl4 = {
 			.flowi4_iif = LOOPBACK_IFINDEX,
+			.flowi4_oif = l3mdev_master_ifindex_rcu(dev),
 			.daddr = ip_hdr(skb)->saddr,
 			.flowi4_tos = RT_TOS(ip_hdr(skb)->tos),
 			.flowi4_scope = scope,

From 92b0c8dd9ea76cf215b1f740d06f8de430326289 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Tue, 3 Jul 2018 22:34:54 +0200
Subject: [PATCH 012/519] net: phy: fix flag masking in __set_phy_supported

[ Upstream commit df8ed346d4a806a6eef2db5924285e839604b3f9 ]

Currently also the pause flags are removed from phydev->supported because
they're not included in PHY_DEFAULT_FEATURES. I don't think this is
intended, especially when considering that this function can be called
via phy_set_max_speed() anywhere in a driver. Change the masking to mask
out only the values we're going to change. In addition remove the
misleading comment, job of this small function is just to adjust the
supported and advertised speeds.

Fixes: f3a6bd393c2c ("phylib: Add phy_set_max_speed helper")
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/phy/phy_device.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 8179727d3423..1f2f25a71d18 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1265,11 +1265,8 @@ static int gen10g_resume(struct phy_device *phydev)
 
 static int __set_phy_supported(struct phy_device *phydev, u32 max_speed)
 {
-	/* The default values for phydev->supported are provided by the PHY
-	 * driver "features" member, we want to reset to sane defaults first
-	 * before supporting higher speeds.
-	 */
-	phydev->supported &= PHY_DEFAULT_FEATURES;
+	phydev->supported &= ~(PHY_1000BT_FEATURES | PHY_100BT_FEATURES |
+			       PHY_10BT_FEATURES);
 
 	switch (max_speed) {
 	default:

From 67aaf36e0da9b5008d6732520bcb4046f0cf8962 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Tue, 17 Jul 2018 20:17:33 -0500
Subject: [PATCH 013/519] ptp: fix missing break in switch

[ Upstream commit 9ba8376ce1e2cbf4ce44f7e4bee1d0648e10d594 ]

It seems that a *break* is missing in order to avoid falling through
to the default case. Otherwise, checking *chan* makes no sense.

Fixes: 72df7a7244c0 ("ptp: Allow reassigning calibration pin function")
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ptp/ptp_chardev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
index da7bae991552..d877ff124365 100644
--- a/drivers/ptp/ptp_chardev.c
+++ b/drivers/ptp/ptp_chardev.c
@@ -88,6 +88,7 @@ int ptp_set_pinfunc(struct ptp_clock *ptp, unsigned int pin,
 	case PTP_PF_PHYSYNC:
 		if (chan != 0)
 			return -EINVAL;
+		break;
 	default:
 		return -EINVAL;
 	}

From 8a82aee2d2f349afbfaee3754af7cf40c16c16a8 Mon Sep 17 00:00:00 2001
From: Sanjeev Bansal <sanjeevb.bansal@broadcom.com>
Date: Mon, 16 Jul 2018 11:13:32 +0530
Subject: [PATCH 014/519] tg3: Add higher cpu clock for 5762.

[ Upstream commit 3a498606bb04af603a46ebde8296040b2de350d1 ]

This patch has fix for TX timeout while running bi-directional
traffic with 100 Mbps using 5762.

Signed-off-by: Sanjeev Bansal <sanjeevb.bansal@broadcom.com>
Signed-off-by: Siva Reddy Kallam <siva.kallam@broadcom.com>
Reviewed-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/broadcom/tg3.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 1325825d5225..ce3a56bea6e6 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -9278,6 +9278,15 @@ static int tg3_chip_reset(struct tg3 *tp)
 
 	tg3_restore_clk(tp);
 
+	/* Increase the core clock speed to fix tx timeout issue for 5762
+	 * with 100Mbps link speed.
+	 */
+	if (tg3_asic_rev(tp) == ASIC_REV_5762) {
+		val = tr32(TG3_CPMU_CLCK_ORIDE_ENABLE);
+		tw32(TG3_CPMU_CLCK_ORIDE_ENABLE, val |
+		     TG3_CPMU_MAC_ORIDE_ENABLE);
+	}
+
 	/* Reprobe ASF enable state.  */
 	tg3_flag_clear(tp, ENABLE_ASF);
 	tp->phy_flags &= ~(TG3_PHYFLG_1G_ON_VAUX_OK |

From d629be850ac6e296dfe156604d7bb5202f1613da Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Wed, 11 Jul 2018 14:39:42 +0200
Subject: [PATCH 015/519] net: Don't copy pfmemalloc flag in
 __copy_skb_header()

[ Upstream commit 8b7008620b8452728cadead460a36f64ed78c460 ]

The pfmemalloc flag indicates that the skb was allocated from
the PFMEMALLOC reserves, and the flag is currently copied on skb
copy and clone.

However, an skb copied from an skb flagged with pfmemalloc
wasn't necessarily allocated from PFMEMALLOC reserves, and on
the other hand an skb allocated that way might be copied from an
skb that wasn't.

So we should not copy the flag on skb copy, and rather decide
whether to allow an skb to be associated with sockets unrelated
to page reclaim depending only on how it was allocated.

Move the pfmemalloc flag before headers_start[0] using an
existing 1-bit hole, so that __copy_skb_header() doesn't copy
it.

When cloning, we'll now take care of this flag explicitly,
contravening to the warning comment of __skb_clone().

While at it, restore the newline usage introduced by commit
b19372273164 ("net: reorganize sk_buff for faster
__copy_skb_header()") to visually separate bytes used in
bitfields after headers_start[0], that was gone after commit
a9e419dc7be6 ("netfilter: merge ctinfo into nfct pointer storage
area"), and describe the pfmemalloc flag in the kernel-doc
structure comment.

This doesn't change the size of sk_buff or cacheline boundaries,
but consolidates the 15 bits hole before tc_index into a 2 bytes
hole before csum, that could now be filled more easily.

Reported-by: Patrick Talbert <ptalbert@redhat.com>
Fixes: c93bdd0e03e8 ("netvm: allow skb allocation to use PFMEMALLOC reserves")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/skbuff.h | 12 ++++++------
 net/core/skbuff.c      |  2 ++
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a6da214d0584..c28bd8be290a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -514,6 +514,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1,
  *	@hash: the packet hash
  *	@queue_mapping: Queue mapping for multiqueue devices
  *	@xmit_more: More SKBs are pending for this queue
+ *	@pfmemalloc: skbuff was allocated from PFMEMALLOC reserves
  *	@ndisc_nodetype: router type (from link layer)
  *	@ooo_okay: allow the mapping of a socket to a queue to be changed
  *	@l4_hash: indicate hash is a canonical 4-tuple hash over transport
@@ -594,8 +595,8 @@ struct sk_buff {
 				fclone:2,
 				peeked:1,
 				head_frag:1,
-				xmit_more:1;
-	/* one bit hole */
+				xmit_more:1,
+				pfmemalloc:1;
 	kmemcheck_bitfield_end(flags1);
 
 	/* fields enclosed in headers_start/headers_end are copied
@@ -615,19 +616,18 @@ struct sk_buff {
 
 	__u8			__pkt_type_offset[0];
 	__u8			pkt_type:3;
-	__u8			pfmemalloc:1;
 	__u8			ignore_df:1;
 	__u8			nfctinfo:3;
-
 	__u8			nf_trace:1;
+
 	__u8			ip_summed:2;
 	__u8			ooo_okay:1;
 	__u8			l4_hash:1;
 	__u8			sw_hash:1;
 	__u8			wifi_acked_valid:1;
 	__u8			wifi_acked:1;
-
 	__u8			no_fcs:1;
+
 	/* Indicates the inner headers are valid in the skbuff. */
 	__u8			encapsulation:1;
 	__u8			encap_hdr_csum:1;
@@ -635,11 +635,11 @@ struct sk_buff {
 	__u8			csum_complete_sw:1;
 	__u8			csum_level:2;
 	__u8			csum_bad:1;
-
 #ifdef CONFIG_IPV6_NDISC_NODETYPE
 	__u8			ndisc_nodetype:2;
 #endif
 	__u8			ipvs_property:1;
+
 	__u8			inner_protocol_type:1;
 	__u8			remcsum_offload:1;
 	/* 3 or 5 bit hole */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index fa02c680eebc..60ad04039d2a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -828,6 +828,8 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
 	n->cloned = 1;
 	n->nohdr = 0;
 	n->peeked = 0;
+	if (skb->pfmemalloc)
+		n->pfmemalloc = 1;
 	n->destructor = NULL;
 	C(tail);
 	C(end);

From 80a80f51cc3aeea3d7d9d2b859357f611de7a87d Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Fri, 13 Jul 2018 13:21:07 +0200
Subject: [PATCH 016/519] skbuff: Unconditionally copy pfmemalloc in
 __skb_clone()

[ Upstream commit e78bfb0751d4e312699106ba7efbed2bab1a53ca ]

Commit 8b7008620b84 ("net: Don't copy pfmemalloc flag in
__copy_skb_header()") introduced a different handling for the
pfmemalloc flag in copy and clone paths.

In __skb_clone(), now, the flag is set only if it was set in the
original skb, but not cleared if it wasn't. This is wrong and
might lead to socket buffers being flagged with pfmemalloc even
if the skb data wasn't allocated from pfmemalloc reserves. Copy
the flag instead of ORing it.

Reported-by: Sabrina Dubroca <sd@queasysnail.net>
Fixes: 8b7008620b84 ("net: Don't copy pfmemalloc flag in __copy_skb_header()")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Tested-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/core/skbuff.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 60ad04039d2a..55be076706e5 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -828,8 +828,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
 	n->cloned = 1;
 	n->nohdr = 0;
 	n->peeked = 0;
-	if (skb->pfmemalloc)
-		n->pfmemalloc = 1;
+	C(pfmemalloc);
 	n->destructor = NULL;
 	C(tail);
 	C(end);

From fce27138ceeb47c2644d1bdabc36dfc4cf025e83 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Thu, 21 Jun 2018 16:19:41 +0300
Subject: [PATCH 017/519] xhci: Fix perceived dead host due to runtime suspend
 race with event handler

commit 229bc19fd7aca4f37964af06e3583c1c8f36b5d6 upstream.

Don't rely on event interrupt (EINT) bit alone to detect pending port
change in resume. If no change event is detected the host may be suspended
again, oterwise roothubs are resumed.

There is a lag in xHC setting EINT. If we don't notice the pending change
in resume, and the controller is runtime suspeded again, it causes the
event handler to assume host is dead as it will fail to read xHC registers
once PCI puts the controller to D3 state.

[  268.520969] xhci_hcd: xhci_resume: starting port polling.
[  268.520985] xhci_hcd: xhci_hub_status_data: stopping port polling.
[  268.521030] xhci_hcd: xhci_suspend: stopping port polling.
[  268.521040] xhci_hcd: // Setting command ring address to 0x349bd001
[  268.521139] xhci_hcd: Port Status Change Event for port 3
[  268.521149] xhci_hcd: resume root hub
[  268.521163] xhci_hcd: port resume event for port 3
[  268.521168] xhci_hcd: xHC is not running.
[  268.521174] xhci_hcd: handle_port_status: starting port polling.
[  268.596322] xhci_hcd: xhci_hc_died: xHCI host controller not responding, assume dead

The EINT lag is described in a additional note in xhci specs 4.19.2:

"Due to internal xHC scheduling and system delays, there will be a lag
between a change bit being set and the Port Status Change Event that it
generated being written to the Event Ring. If SW reads the PORTSC and
sees a change bit set, there is no guarantee that the corresponding Port
Status Change Event has already been written into the Event Ring."

Cc: <stable@vger.kernel.org>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci.c | 40 +++++++++++++++++++++++++++++++++++++---
 drivers/usb/host/xhci.h |  4 ++++
 2 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index f2e9f59c90d6..2d837b6bd495 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -887,6 +887,41 @@ static void xhci_disable_port_wake_on_bits(struct xhci_hcd *xhci)
 	spin_unlock_irqrestore(&xhci->lock, flags);
 }
 
+static bool xhci_pending_portevent(struct xhci_hcd *xhci)
+{
+	__le32 __iomem		**port_array;
+	int			port_index;
+	u32			status;
+	u32			portsc;
+
+	status = readl(&xhci->op_regs->status);
+	if (status & STS_EINT)
+		return true;
+	/*
+	 * Checking STS_EINT is not enough as there is a lag between a change
+	 * bit being set and the Port Status Change Event that it generated
+	 * being written to the Event Ring. See note in xhci 1.1 section 4.19.2.
+	 */
+
+	port_index = xhci->num_usb2_ports;
+	port_array = xhci->usb2_ports;
+	while (port_index--) {
+		portsc = readl(port_array[port_index]);
+		if (portsc & PORT_CHANGE_MASK ||
+		    (portsc & PORT_PLS_MASK) == XDEV_RESUME)
+			return true;
+	}
+	port_index = xhci->num_usb3_ports;
+	port_array = xhci->usb3_ports;
+	while (port_index--) {
+		portsc = readl(port_array[port_index]);
+		if (portsc & PORT_CHANGE_MASK ||
+		    (portsc & PORT_PLS_MASK) == XDEV_RESUME)
+			return true;
+	}
+	return false;
+}
+
 /*
  * Stop HC (not bus-specific)
  *
@@ -983,7 +1018,7 @@ EXPORT_SYMBOL_GPL(xhci_suspend);
  */
 int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
 {
-	u32			command, temp = 0, status;
+	u32			command, temp = 0;
 	struct usb_hcd		*hcd = xhci_to_hcd(xhci);
 	struct usb_hcd		*secondary_hcd;
 	int			retval = 0;
@@ -1105,8 +1140,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
  done:
 	if (retval == 0) {
 		/* Resume root hubs only when have pending events. */
-		status = readl(&xhci->op_regs->status);
-		if (status & STS_EINT) {
+		if (xhci_pending_portevent(xhci)) {
 			usb_hcd_resume_root_hub(xhci->shared_hcd);
 			usb_hcd_resume_root_hub(hcd);
 		}
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 1715705acc59..84d8871755b7 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -382,6 +382,10 @@ struct xhci_op_regs {
 #define PORT_PLC	(1 << 22)
 /* port configure error change - port failed to configure its link partner */
 #define PORT_CEC	(1 << 23)
+#define PORT_CHANGE_MASK	(PORT_CSC | PORT_PEC | PORT_WRC | PORT_OCC | \
+				 PORT_RC | PORT_PLC | PORT_CEC)
+
+
 /* Cold Attach Status - xHC can set this bit to report device attached during
  * Sx state. Warm port reset should be perfomed to clear this bit and move port
  * to connected state.

From f868639bf8896908ad45adf1e7c1f786bb3568cc Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Thu, 21 Jun 2018 09:23:24 -0700
Subject: [PATCH 018/519] x86/paravirt: Make native_save_fl() extern inline

commit d0a8d9378d16eb3c69bd8e6d23779fbdbee3a8c7 upstream.

native_save_fl() is marked static inline, but by using it as
a function pointer in arch/x86/kernel/paravirt.c, it MUST be outlined.

paravirt's use of native_save_fl() also requires that no GPRs other than
%rax are clobbered.

Compilers have different heuristics which they use to emit stack guard
code, the emittance of which can break paravirt's callee saved assumption
by clobbering %rcx.

Marking a function definition extern inline means that if this version
cannot be inlined, then the out-of-line version will be preferred. By
having the out-of-line version be implemented in assembly, it cannot be
instrumented with a stack protector, which might violate custom calling
conventions that code like paravirt rely on.

The semantics of extern inline has changed since gnu89. This means that
folks using GCC versions >= 5.1 may see symbol redefinition errors at
link time for subdirs that override KBUILD_CFLAGS (making the C standard
used implicit) regardless of this patch. This has been cleaned up
earlier in the patch set, but is left as a note in the commit message
for future travelers.

Reports:
 https://lkml.org/lkml/2018/5/7/534
 https://github.com/ClangBuiltLinux/linux/issues/16

Discussion:
 https://bugs.llvm.org/show_bug.cgi?id=37512
 https://lkml.org/lkml/2018/5/24/1371

Thanks to the many folks that participated in the discussion.

[Backport for 4.4. 4.4 is missing commit 784d5699eddc "x86: move exports to
actual definitions" which doesn't apply cleanly, and not really worth
backporting IMO. It's simpler to change this patch from upstream:
  + #include <asm-generic/export.h>
rather than
  + #include <asm/export.h>]

Debugged-by: Alistair Strachan <astrachan@google.com>
Debugged-by: Matthias Kaehlcke <mka@chromium.org>
Suggested-by: Arnd Bergmann <arnd@arndb.de>
Suggested-by: H. Peter Anvin <hpa@zytor.com>
Suggested-by: Tom Stellar <tstellar@redhat.com>
Reported-by: Sedat Dilek <sedat.dilek@gmail.com>
Tested-by: Sedat Dilek <sedat.dilek@gmail.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@redhat.com
Cc: akataria@vmware.com
Cc: akpm@linux-foundation.org
Cc: andrea.parri@amarulasolutions.com
Cc: ard.biesheuvel@linaro.org
Cc: aryabinin@virtuozzo.com
Cc: astrachan@google.com
Cc: boris.ostrovsky@oracle.com
Cc: brijesh.singh@amd.com
Cc: caoj.fnst@cn.fujitsu.com
Cc: geert@linux-m68k.org
Cc: ghackmann@google.com
Cc: gregkh@linuxfoundation.org
Cc: jan.kiszka@siemens.com
Cc: jarkko.sakkinen@linux.intel.com
Cc: joe@perches.com
Cc: jpoimboe@redhat.com
Cc: keescook@google.com
Cc: kirill.shutemov@linux.intel.com
Cc: kstewart@linuxfoundation.org
Cc: linux-efi@vger.kernel.org
Cc: linux-kbuild@vger.kernel.org
Cc: manojgupta@google.com
Cc: mawilcox@microsoft.com
Cc: michal.lkml@markovi.net
Cc: mjg59@google.com
Cc: mka@chromium.org
Cc: pombredanne@nexb.com
Cc: rientjes@google.com
Cc: rostedt@goodmis.org
Cc: thomas.lendacky@amd.com
Cc: tweek@google.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.deacon@arm.com
Cc: yamada.masahiro@socionext.com
Link: http://lkml.kernel.org/r/20180621162324.36656-4-ndesaulniers@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/irqflags.h |  2 +-
 arch/x86/kernel/Makefile        |  1 +
 arch/x86/kernel/irqflags.S      | 26 ++++++++++++++++++++++++++
 3 files changed, 28 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kernel/irqflags.S

diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index b77f5edb03b0..0056bc945cd1 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -8,7 +8,7 @@
  * Interrupt control:
  */
 
-static inline unsigned long native_save_fl(void)
+extern inline unsigned long native_save_fl(void)
 {
 	unsigned long flags;
 
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index b1b78ffe01d0..7947cee61f61 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -41,6 +41,7 @@ obj-y			+= alternative.o i8253.o pci-nommu.o hw_breakpoint.o
 obj-y			+= tsc.o tsc_msr.o io_delay.o rtc.o
 obj-y			+= pci-iommu_table.o
 obj-y			+= resource.o
+obj-y			+= irqflags.o
 
 obj-y				+= process.o
 obj-y				+= fpu/
diff --git a/arch/x86/kernel/irqflags.S b/arch/x86/kernel/irqflags.S
new file mode 100644
index 000000000000..3817eb748eb4
--- /dev/null
+++ b/arch/x86/kernel/irqflags.S
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <asm/asm.h>
+#include <asm-generic/export.h>
+#include <linux/linkage.h>
+
+/*
+ * unsigned long native_save_fl(void)
+ */
+ENTRY(native_save_fl)
+	pushf
+	pop %_ASM_AX
+	ret
+ENDPROC(native_save_fl)
+EXPORT_SYMBOL(native_save_fl)
+
+/*
+ * void native_restore_fl(unsigned long flags)
+ * %eax/%rdi: flags
+ */
+ENTRY(native_restore_fl)
+	push %_ASM_ARG1
+	popf
+	ret
+ENDPROC(native_restore_fl)
+EXPORT_SYMBOL(native_restore_fl)

From 8cee8b4cdd50c5f90f8c63b63bcfba6d1f3839b7 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Sat, 14 Jul 2018 02:29:15 -0700
Subject: [PATCH 019/519] x86/cpufeatures: Add CPUID_7_EDX CPUID leaf

(cherry picked from commit 95ca0ee8636059ea2800dfbac9ecac6212d6b38f)

This is a pure feature bits leaf. There are two AVX512 feature bits in it
already which were handled as scattered bits, and three more from this leaf
are going to be added for speculation control features.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Borislav Petkov <bp@suse.de>
Cc: gnomes@lxorguk.ukuu.org.uk
Cc: ak@linux.intel.com
Cc: ashok.raj@intel.com
Cc: dave.hansen@intel.com
Cc: karahmed@amazon.de
Cc: arjan@linux.intel.com
Cc: torvalds@linux-foundation.org
Cc: peterz@infradead.org
Cc: bp@alien8.de
Cc: pbonzini@redhat.com
Cc: tim.c.chen@linux.intel.com
Cc: gregkh@linux-foundation.org
Link: https://lkml.kernel.org/r/1516896855-7642-2-git-send-email-dwmw@amazon.co.uk
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/cpufeature.h        | 7 +++++--
 arch/x86/include/asm/cpufeatures.h       | 6 +++++-
 arch/x86/include/asm/disabled-features.h | 3 ++-
 arch/x86/include/asm/required-features.h | 3 ++-
 arch/x86/kernel/cpu/common.c             | 1 +
 5 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index dd0089841a0f..d72c1db64679 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -28,6 +28,7 @@ enum cpuid_leafs
 	CPUID_8000_000A_EDX,
 	CPUID_7_ECX,
 	CPUID_8000_0007_EBX,
+	CPUID_7_EDX,
 };
 
 #ifdef CONFIG_X86_FEATURE_NAMES
@@ -78,8 +79,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
 	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) ||	\
 	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) ||	\
 	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) ||	\
+	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) ||	\
 	   REQUIRED_MASK_CHECK					  ||	\
-	   BUILD_BUG_ON_ZERO(NCAPINTS != 18))
+	   BUILD_BUG_ON_ZERO(NCAPINTS != 19))
 
 #define DISABLED_MASK_BIT_SET(feature_bit)				\
 	 ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK,  0, feature_bit) ||	\
@@ -100,8 +102,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
 	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) ||	\
 	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) ||	\
 	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) ||	\
+	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) ||	\
 	   DISABLED_MASK_CHECK					  ||	\
-	   BUILD_BUG_ON_ZERO(NCAPINTS != 18))
+	   BUILD_BUG_ON_ZERO(NCAPINTS != 19))
 
 #define cpu_has(c, bit)							\
 	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 :	\
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 205ce70c1d6c..da14ca894a15 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -12,7 +12,7 @@
 /*
  * Defines x86 CPU feature bits
  */
-#define NCAPINTS	18	/* N 32-bit words worth of info */
+#define NCAPINTS	19	/* N 32-bit words worth of info */
 #define NBUGINTS	1	/* N 32-bit bug flags */
 
 /*
@@ -285,6 +285,10 @@
 #define X86_FEATURE_SUCCOR	(17*32+1) /* Uncorrectable error containment and recovery */
 #define X86_FEATURE_SMCA	(17*32+3) /* Scalable MCA */
 
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
+#define X86_FEATURE_AVX512_4VNNIW	(18*32+ 2) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS	(18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+
 /*
  * BUG word(s)
  */
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 21c5ac15657b..1f8cca459c6c 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -59,6 +59,7 @@
 #define DISABLED_MASK15	0
 #define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE)
 #define DISABLED_MASK17	0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
+#define DISABLED_MASK18	0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
 
 #endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index fac9a5c0abe9..6847d85400a8 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -100,6 +100,7 @@
 #define REQUIRED_MASK15	0
 #define REQUIRED_MASK16	0
 #define REQUIRED_MASK17	0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
+#define REQUIRED_MASK18	0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
 
 #endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 736e2843139b..ac7c52682d33 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -695,6 +695,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
 		cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
 		c->x86_capability[CPUID_7_0_EBX] = ebx;
 		c->x86_capability[CPUID_7_ECX] = ecx;
+		c->x86_capability[CPUID_7_EDX] = edx;
 	}
 
 	/* Extended state features: level 0x0000000d */

From 7169b43e7c68edd550efa812c295685947ffa8a0 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Sat, 14 Jul 2018 02:29:24 -0700
Subject: [PATCH 020/519] x86/cpufeatures: Add Intel feature bits for
 Speculation Control

(cherry picked from commit fc67dd70adb711a45d2ef34e12d1a8be75edde61)

Add three feature bits exposed by new microcode on Intel CPUs for
speculation control.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Borislav Petkov <bp@suse.de>
Cc: gnomes@lxorguk.ukuu.org.uk
Cc: ak@linux.intel.com
Cc: ashok.raj@intel.com
Cc: dave.hansen@intel.com
Cc: karahmed@amazon.de
Cc: arjan@linux.intel.com
Cc: torvalds@linux-foundation.org
Cc: peterz@infradead.org
Cc: bp@alien8.de
Cc: pbonzini@redhat.com
Cc: tim.c.chen@linux.intel.com
Cc: gregkh@linux-foundation.org
Link: https://lkml.kernel.org/r/1516896855-7642-3-git-send-email-dwmw@amazon.co.uk
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/cpufeatures.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index da14ca894a15..f50e8576eb3b 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -288,6 +288,9 @@
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
 #define X86_FEATURE_AVX512_4VNNIW	(18*32+ 2) /* AVX-512 Neural Network Instructions */
 #define X86_FEATURE_AVX512_4FMAPS	(18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_SPEC_CTRL		(18*32+26) /* Speculation Control (IBRS + IBPB) */
+#define X86_FEATURE_STIBP		(18*32+27) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_ARCH_CAPABILITIES	(18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
 
 /*
  * BUG word(s)

From b00f820b5143a2fc0a9c859a52be2ef2244834ba Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Sat, 14 Jul 2018 02:29:33 -0700
Subject: [PATCH 021/519] x86/cpufeatures: Add AMD feature bits for Speculation
 Control

(cherry picked from commit 5d10cbc91d9eb5537998b65608441b592eec65e7)

AMD exposes the PRED_CMD/SPEC_CTRL MSRs slightly differently to Intel.
See http://lkml.kernel.org/r/2b3e25cc-286d-8bd0-aeaf-9ac4aae39de8@amd.com

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: gnomes@lxorguk.ukuu.org.uk
Cc: ak@linux.intel.com
Cc: ashok.raj@intel.com
Cc: dave.hansen@intel.com
Cc: karahmed@amazon.de
Cc: arjan@linux.intel.com
Cc: torvalds@linux-foundation.org
Cc: peterz@infradead.org
Cc: bp@alien8.de
Cc: pbonzini@redhat.com
Cc: tim.c.chen@linux.intel.com
Cc: gregkh@linux-foundation.org
Link: https://lkml.kernel.org/r/1516896855-7642-4-git-send-email-dwmw@amazon.co.uk
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/cpufeatures.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index f50e8576eb3b..a5671b849837 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -251,6 +251,9 @@
 
 /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
 #define X86_FEATURE_CLZERO	(13*32+0) /* CLZERO instruction */
+#define X86_FEATURE_AMD_PRED_CMD (13*32+12) /* Prediction Command MSR (AMD) */
+#define X86_FEATURE_AMD_SPEC_CTRL (13*32+14) /* Speculation Control MSR only (AMD) */
+#define X86_FEATURE_AMD_STIBP	(13*32+15) /* Single Thread Indirect Branch Predictors (AMD) */
 
 /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
 #define X86_FEATURE_DTHERM	(14*32+ 0) /* Digital Thermal Sensor */

From 4fbcf1a84d8ad1bf15937fa6f9623045da153b4e Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Sat, 14 Jul 2018 02:29:43 -0700
Subject: [PATCH 022/519] x86/msr: Add definitions for new speculation control
 MSRs

(cherry picked from commit 1e340c60d0dd3ae07b5bedc16a0469c14b9f3410)

Add MSR and bit definitions for SPEC_CTRL, PRED_CMD and ARCH_CAPABILITIES.

See Intel's 336996-Speculative-Execution-Side-Channel-Mitigations.pdf

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: gnomes@lxorguk.ukuu.org.uk
Cc: ak@linux.intel.com
Cc: ashok.raj@intel.com
Cc: dave.hansen@intel.com
Cc: karahmed@amazon.de
Cc: arjan@linux.intel.com
Cc: torvalds@linux-foundation.org
Cc: peterz@infradead.org
Cc: bp@alien8.de
Cc: pbonzini@redhat.com
Cc: tim.c.chen@linux.intel.com
Cc: gregkh@linux-foundation.org
Link: https://lkml.kernel.org/r/1516896855-7642-5-git-send-email-dwmw@amazon.co.uk
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/msr-index.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index b8911aecf035..f4701f0e613a 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -32,6 +32,13 @@
 #define EFER_FFXSR		(1<<_EFER_FFXSR)
 
 /* Intel MSRs. Some also available on other CPUs */
+#define MSR_IA32_SPEC_CTRL		0x00000048 /* Speculation Control */
+#define SPEC_CTRL_IBRS			(1 << 0)   /* Indirect Branch Restricted Speculation */
+#define SPEC_CTRL_STIBP			(1 << 1)   /* Single Thread Indirect Branch Predictors */
+
+#define MSR_IA32_PRED_CMD		0x00000049 /* Prediction Command */
+#define PRED_CMD_IBPB			(1 << 0)   /* Indirect Branch Prediction Barrier */
+
 #define MSR_IA32_PERFCTR0		0x000000c1
 #define MSR_IA32_PERFCTR1		0x000000c2
 #define MSR_FSB_FREQ			0x000000cd
@@ -45,6 +52,11 @@
 #define SNB_C3_AUTO_UNDEMOTE		(1UL << 28)
 
 #define MSR_MTRRcap			0x000000fe
+
+#define MSR_IA32_ARCH_CAPABILITIES	0x0000010a
+#define ARCH_CAP_RDCL_NO		(1 << 0)   /* Not susceptible to Meltdown */
+#define ARCH_CAP_IBRS_ALL		(1 << 1)   /* Enhanced IBRS support */
+
 #define MSR_IA32_BBL_CR_CTL		0x00000119
 #define MSR_IA32_BBL_CR_CTL3		0x0000011e
 

From 4ef0c99359c55ce60ba3859eb615c36c6cbc392c Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Sat, 14 Jul 2018 02:29:52 -0700
Subject: [PATCH 023/519] x86/pti: Do not enable PTI on CPUs which are not
 vulnerable to Meltdown

(cherry picked from commit fec9434a12f38d3aeafeb75711b71d8a1fdef621)

Also, for CPUs which don't speculate at all, don't report that they're
vulnerable to the Spectre variants either.

Leave the cpu_no_meltdown[] match table with just X86_VENDOR_AMD in it
for now, even though that could be done with a simple comparison, on the
assumption that we'll have more to add.

Based on suggestions from Dave Hansen and Alan Cox.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Borislav Petkov <bp@suse.de>
Acked-by: Dave Hansen <dave.hansen@intel.com>
Cc: gnomes@lxorguk.ukuu.org.uk
Cc: ak@linux.intel.com
Cc: ashok.raj@intel.com
Cc: karahmed@amazon.de
Cc: arjan@linux.intel.com
Cc: torvalds@linux-foundation.org
Cc: peterz@infradead.org
Cc: bp@alien8.de
Cc: pbonzini@redhat.com
Cc: tim.c.chen@linux.intel.com
Cc: gregkh@linux-foundation.org
Link: https://lkml.kernel.org/r/1516896855-7642-6-git-send-email-dwmw@amazon.co.uk
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/common.c | 48 ++++++++++++++++++++++++++++++++----
 1 file changed, 43 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index ac7c52682d33..d6c097cdbefb 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -43,6 +43,8 @@
 #include <asm/pat.h>
 #include <asm/microcode.h>
 #include <asm/microcode_intel.h>
+#include <asm/intel-family.h>
+#include <asm/cpu_device_id.h>
 
 #ifdef CONFIG_X86_LOCAL_APIC
 #include <asm/uv/uv.h>
@@ -794,6 +796,41 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
 #endif
 }
 
+static const __initdata struct x86_cpu_id cpu_no_speculation[] = {
+	{ X86_VENDOR_INTEL,	6, INTEL_FAM6_ATOM_CEDARVIEW,	X86_FEATURE_ANY },
+	{ X86_VENDOR_INTEL,	6, INTEL_FAM6_ATOM_CLOVERVIEW,	X86_FEATURE_ANY },
+	{ X86_VENDOR_INTEL,	6, INTEL_FAM6_ATOM_LINCROFT,	X86_FEATURE_ANY },
+	{ X86_VENDOR_INTEL,	6, INTEL_FAM6_ATOM_PENWELL,	X86_FEATURE_ANY },
+	{ X86_VENDOR_INTEL,	6, INTEL_FAM6_ATOM_PINEVIEW,	X86_FEATURE_ANY },
+	{ X86_VENDOR_CENTAUR,	5 },
+	{ X86_VENDOR_INTEL,	5 },
+	{ X86_VENDOR_NSC,	5 },
+	{ X86_VENDOR_ANY,	4 },
+	{}
+};
+
+static const __initdata struct x86_cpu_id cpu_no_meltdown[] = {
+	{ X86_VENDOR_AMD },
+	{}
+};
+
+static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c)
+{
+	u64 ia32_cap = 0;
+
+	if (x86_match_cpu(cpu_no_meltdown))
+		return false;
+
+	if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
+		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+
+	/* Rogue Data Cache Load? No! */
+	if (ia32_cap & ARCH_CAP_RDCL_NO)
+		return false;
+
+	return true;
+}
+
 /*
  * Do minimum CPU detection early.
  * Fields really needed: vendor, cpuid_level, family, model, mask,
@@ -840,11 +877,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 
 	setup_force_cpu_cap(X86_FEATURE_ALWAYS);
 
-	if (c->x86_vendor != X86_VENDOR_AMD)
-		setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
-
-	setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
-	setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+	if (!x86_match_cpu(cpu_no_speculation)) {
+		if (cpu_vulnerable_to_meltdown(c))
+			setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+		setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+		setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+	}
 
 	fpu__init_system(c);
 

From c64410cf4d3abd6c9f5abdd38db0a855926304c5 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Sat, 14 Jul 2018 02:30:01 -0700
Subject: [PATCH 024/519] x86/cpufeature: Blacklist SPEC_CTRL/PRED_CMD on early
 Spectre v2 microcodes

(cherry picked from commit a5b2966364538a0e68c9fa29bc0a3a1651799035)

This doesn't refuse to load the affected microcodes; it just refuses to
use the Spectre v2 mitigation features if they're detected, by clearing
the appropriate feature bits.

The AMD CPUID bits are handled here too, because hypervisors *may* have
been exposing those bits even on Intel chips, for fine-grained control
of what's available.

It is non-trivial to use x86_match_cpu() for this table because that
doesn't handle steppings. And the approach taken in commit bd9240a18
almost made me lose my lunch.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: gnomes@lxorguk.ukuu.org.uk
Cc: ak@linux.intel.com
Cc: ashok.raj@intel.com
Cc: dave.hansen@intel.com
Cc: karahmed@amazon.de
Cc: arjan@linux.intel.com
Cc: torvalds@linux-foundation.org
Cc: peterz@infradead.org
Cc: bp@alien8.de
Cc: pbonzini@redhat.com
Cc: tim.c.chen@linux.intel.com
Cc: gregkh@linux-foundation.org
Link: https://lkml.kernel.org/r/1516896855-7642-7-git-send-email-dwmw@amazon.co.uk
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/intel-family.h |  5 ++-
 arch/x86/kernel/cpu/intel.c         | 67 +++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 6999f7d01a0d..12fa187865c2 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -12,6 +12,7 @@
  */
 
 #define INTEL_FAM6_CORE_YONAH		0x0E
+
 #define INTEL_FAM6_CORE2_MEROM		0x0F
 #define INTEL_FAM6_CORE2_MEROM_L	0x16
 #define INTEL_FAM6_CORE2_PENRYN		0x17
@@ -20,6 +21,7 @@
 #define INTEL_FAM6_NEHALEM		0x1E
 #define INTEL_FAM6_NEHALEM_EP		0x1A
 #define INTEL_FAM6_NEHALEM_EX		0x2E
+
 #define INTEL_FAM6_WESTMERE		0x25
 #define INTEL_FAM6_WESTMERE2		0x1F
 #define INTEL_FAM6_WESTMERE_EP		0x2C
@@ -36,9 +38,9 @@
 #define INTEL_FAM6_HASWELL_GT3E		0x46
 
 #define INTEL_FAM6_BROADWELL_CORE	0x3D
-#define INTEL_FAM6_BROADWELL_XEON_D	0x56
 #define INTEL_FAM6_BROADWELL_GT3E	0x47
 #define INTEL_FAM6_BROADWELL_X		0x4F
+#define INTEL_FAM6_BROADWELL_XEON_D	0x56
 
 #define INTEL_FAM6_SKYLAKE_MOBILE	0x4E
 #define INTEL_FAM6_SKYLAKE_DESKTOP	0x5E
@@ -60,6 +62,7 @@
 #define INTEL_FAM6_ATOM_MERRIFIELD2	0x5A /* Annidale */
 #define INTEL_FAM6_ATOM_GOLDMONT	0x5C
 #define INTEL_FAM6_ATOM_DENVERTON	0x5F /* Goldmont Microserver */
+#define INTEL_FAM6_ATOM_GEMINI_LAKE	0x7A
 
 /* Xeon Phi */
 
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 9299e3bdfad6..23ba9cc0cc0d 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -13,6 +13,7 @@
 #include <asm/msr.h>
 #include <asm/bugs.h>
 #include <asm/cpu.h>
+#include <asm/intel-family.h>
 
 #ifdef CONFIG_X86_64
 #include <linux/topology.h>
@@ -25,6 +26,59 @@
 #include <asm/apic.h>
 #endif
 
+/*
+ * Early microcode releases for the Spectre v2 mitigation were broken.
+ * Information taken from;
+ * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf
+ * - https://kb.vmware.com/s/article/52345
+ * - Microcode revisions observed in the wild
+ * - Release note from 20180108 microcode release
+ */
+struct sku_microcode {
+	u8 model;
+	u8 stepping;
+	u32 microcode;
+};
+static const struct sku_microcode spectre_bad_microcodes[] = {
+	{ INTEL_FAM6_KABYLAKE_DESKTOP,	0x0B,	0x84 },
+	{ INTEL_FAM6_KABYLAKE_DESKTOP,	0x0A,	0x84 },
+	{ INTEL_FAM6_KABYLAKE_DESKTOP,	0x09,	0x84 },
+	{ INTEL_FAM6_KABYLAKE_MOBILE,	0x0A,	0x84 },
+	{ INTEL_FAM6_KABYLAKE_MOBILE,	0x09,	0x84 },
+	{ INTEL_FAM6_SKYLAKE_X,		0x03,	0x0100013e },
+	{ INTEL_FAM6_SKYLAKE_X,		0x04,	0x0200003c },
+	{ INTEL_FAM6_SKYLAKE_MOBILE,	0x03,	0xc2 },
+	{ INTEL_FAM6_SKYLAKE_DESKTOP,	0x03,	0xc2 },
+	{ INTEL_FAM6_BROADWELL_CORE,	0x04,	0x28 },
+	{ INTEL_FAM6_BROADWELL_GT3E,	0x01,	0x1b },
+	{ INTEL_FAM6_BROADWELL_XEON_D,	0x02,	0x14 },
+	{ INTEL_FAM6_BROADWELL_XEON_D,	0x03,	0x07000011 },
+	{ INTEL_FAM6_BROADWELL_X,	0x01,	0x0b000025 },
+	{ INTEL_FAM6_HASWELL_ULT,	0x01,	0x21 },
+	{ INTEL_FAM6_HASWELL_GT3E,	0x01,	0x18 },
+	{ INTEL_FAM6_HASWELL_CORE,	0x03,	0x23 },
+	{ INTEL_FAM6_HASWELL_X,		0x02,	0x3b },
+	{ INTEL_FAM6_HASWELL_X,		0x04,	0x10 },
+	{ INTEL_FAM6_IVYBRIDGE_X,	0x04,	0x42a },
+	/* Updated in the 20180108 release; blacklist until we know otherwise */
+	{ INTEL_FAM6_ATOM_GEMINI_LAKE,	0x01,	0x22 },
+	/* Observed in the wild */
+	{ INTEL_FAM6_SANDYBRIDGE_X,	0x06,	0x61b },
+	{ INTEL_FAM6_SANDYBRIDGE_X,	0x07,	0x712 },
+};
+
+static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
+		if (c->x86_model == spectre_bad_microcodes[i].model &&
+		    c->x86_mask == spectre_bad_microcodes[i].stepping)
+			return (c->microcode <= spectre_bad_microcodes[i].microcode);
+	}
+	return false;
+}
+
 static void early_init_intel(struct cpuinfo_x86 *c)
 {
 	u64 misc_enable;
@@ -51,6 +105,19 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 		rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode);
 	}
 
+	if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) ||
+	     cpu_has(c, X86_FEATURE_STIBP) ||
+	     cpu_has(c, X86_FEATURE_AMD_SPEC_CTRL) ||
+	     cpu_has(c, X86_FEATURE_AMD_PRED_CMD) ||
+	     cpu_has(c, X86_FEATURE_AMD_STIBP)) && bad_spectre_microcode(c)) {
+		pr_warn("Intel Spectre v2 broken microcode detected; disabling SPEC_CTRL\n");
+		clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL);
+		clear_cpu_cap(c, X86_FEATURE_STIBP);
+		clear_cpu_cap(c, X86_FEATURE_AMD_SPEC_CTRL);
+		clear_cpu_cap(c, X86_FEATURE_AMD_PRED_CMD);
+		clear_cpu_cap(c, X86_FEATURE_AMD_STIBP);
+	}
+
 	/*
 	 * Atom erratum AAE44/AAF40/AAG38/AAH41:
 	 *

From 5ff6b14190322e92489254dc4d10c28f203ee5fc Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Sat, 14 Jul 2018 02:30:10 -0700
Subject: [PATCH 025/519] x86/speculation: Add basic IBPB (Indirect Branch
 Prediction Barrier) support

(cherry picked from commit 20ffa1caecca4db8f79fe665acdeaa5af815a24d)

Expose indirect_branch_prediction_barrier() for use in subsequent patches.

[ tglx: Add IBPB status to spectre_v2 sysfs file ]

Co-developed-by: KarimAllah Ahmed <karahmed@amazon.de>
Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Cc: gnomes@lxorguk.ukuu.org.uk
Cc: ak@linux.intel.com
Cc: ashok.raj@intel.com
Cc: dave.hansen@intel.com
Cc: arjan@linux.intel.com
Cc: torvalds@linux-foundation.org
Cc: peterz@infradead.org
Cc: bp@alien8.de
Cc: pbonzini@redhat.com
Cc: tim.c.chen@linux.intel.com
Cc: gregkh@linux-foundation.org
Link: https://lkml.kernel.org/r/1516896855-7642-8-git-send-email-dwmw@amazon.co.uk
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/cpufeatures.h   |  2 ++
 arch/x86/include/asm/nospec-branch.h | 13 +++++++++++++
 arch/x86/kernel/cpu/bugs.c           | 10 +++++++++-
 3 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index a5671b849837..b4e370b5b761 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -201,6 +201,8 @@
 /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
 #define X86_FEATURE_KAISER	( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */
 
+#define X86_FEATURE_IBPB		( 7*32+21) /* Indirect Branch Prediction Barrier enabled*/
+
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
 #define X86_FEATURE_VNMI        ( 8*32+ 1) /* Intel Virtual NMI */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 8b910416243c..41851afd44af 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -194,6 +194,19 @@ static inline void vmexit_fill_RSB(void)
 #endif
 }
 
+static inline void indirect_branch_prediction_barrier(void)
+{
+	asm volatile(ALTERNATIVE("",
+				 "movl %[msr], %%ecx\n\t"
+				 "movl %[val], %%eax\n\t"
+				 "movl $0, %%edx\n\t"
+				 "wrmsr",
+				 X86_FEATURE_IBPB)
+		     : : [msr] "i" (MSR_IA32_PRED_CMD),
+			 [val] "i" (PRED_CMD_IBPB)
+		     : "eax", "ecx", "edx", "memory");
+}
+
 #endif /* __ASSEMBLY__ */
 
 /*
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 2bbc74f8a4a8..7def33ada730 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -296,6 +296,13 @@ static void __init spectre_v2_select_mitigation(void)
 		setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
 		pr_info("Filling RSB on context switch\n");
 	}
+
+	/* Initialize Indirect Branch Prediction Barrier if supported */
+	if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ||
+	    boot_cpu_has(X86_FEATURE_AMD_PRED_CMD)) {
+		setup_force_cpu_cap(X86_FEATURE_IBPB);
+		pr_info("Enabling Indirect Branch Prediction Barrier\n");
+	}
 }
 
 #undef pr_fmt
@@ -325,7 +332,8 @@ ssize_t cpu_show_spectre_v2(struct device *dev,
 	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
 		return sprintf(buf, "Not affected\n");
 
-	return sprintf(buf, "%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+	return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+		       boot_cpu_has(X86_FEATURE_IBPB) ? ", IBPB" : "",
 		       spectre_v2_module_string());
 }
 #endif

From 9a016c16d87fef47ad24ce8a9f30e8fce030225e Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Sat, 14 Jul 2018 02:30:20 -0700
Subject: [PATCH 026/519] x86/cpufeatures: Clean up Spectre v2 related CPUID
 flags

(cherry picked from commit 2961298efe1ea1b6fc0d7ee8b76018fa6c0bcef2)

We want to expose the hardware features simply in /proc/cpuinfo as "ibrs",
"ibpb" and "stibp". Since AMD has separate CPUID bits for those, use them
as the user-visible bits.

When the Intel SPEC_CTRL bit is set which indicates both IBRS and IBPB
capability, set those (AMD) bits accordingly. Likewise if the Intel STIBP
bit is set, set the AMD STIBP that's used for the generic hardware
capability.

Hide the rest from /proc/cpuinfo by putting "" in the comments. Including
RETPOLINE and RETPOLINE_AMD which shouldn't be visible there. There are
patches to make the sysfs vulnerabilities information non-readable by
non-root, and the same should apply to all information about which
mitigations are actually in use. Those *shouldn't* appear in /proc/cpuinfo.

The feature bit for whether IBPB is actually used, which is needed for
ALTERNATIVEs, is renamed to X86_FEATURE_USE_IBPB.

Originally-by: Borislav Petkov <bp@suse.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: ak@linux.intel.com
Cc: dave.hansen@intel.com
Cc: karahmed@amazon.de
Cc: arjan@linux.intel.com
Cc: torvalds@linux-foundation.org
Cc: peterz@infradead.org
Cc: bp@alien8.de
Cc: pbonzini@redhat.com
Cc: tim.c.chen@linux.intel.com
Cc: gregkh@linux-foundation.org
Link: https://lkml.kernel.org/r/1517070274-12128-2-git-send-email-dwmw@amazon.co.uk
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/cpufeatures.h   | 18 ++++++++--------
 arch/x86/include/asm/nospec-branch.h |  2 +-
 arch/x86/kernel/cpu/bugs.c           |  7 +++----
 arch/x86/kernel/cpu/intel.c          | 31 +++++++++++++++++++---------
 4 files changed, 34 insertions(+), 24 deletions(-)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index b4e370b5b761..782005d7eb48 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -194,14 +194,14 @@
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
 
 #define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
-#define X86_FEATURE_RSB_CTXSW	( 7*32+19) /* Fill RSB on context switches */
+#define X86_FEATURE_RSB_CTXSW	( 7*32+19) /* "" Fill RSB on context switches */
 
-#define X86_FEATURE_RETPOLINE	( 7*32+29) /* Generic Retpoline mitigation for Spectre variant 2 */
-#define X86_FEATURE_RETPOLINE_AMD ( 7*32+30) /* AMD Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE	( 7*32+29) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_AMD ( 7*32+30) /* "" AMD Retpoline mitigation for Spectre variant 2 */
 /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
 #define X86_FEATURE_KAISER	( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */
 
-#define X86_FEATURE_IBPB		( 7*32+21) /* Indirect Branch Prediction Barrier enabled*/
+#define X86_FEATURE_USE_IBPB	( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled*/
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
@@ -253,9 +253,9 @@
 
 /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
 #define X86_FEATURE_CLZERO	(13*32+0) /* CLZERO instruction */
-#define X86_FEATURE_AMD_PRED_CMD (13*32+12) /* Prediction Command MSR (AMD) */
-#define X86_FEATURE_AMD_SPEC_CTRL (13*32+14) /* Speculation Control MSR only (AMD) */
-#define X86_FEATURE_AMD_STIBP	(13*32+15) /* Single Thread Indirect Branch Predictors (AMD) */
+#define X86_FEATURE_IBPB	(13*32+12) /* Indirect Branch Prediction Barrier */
+#define X86_FEATURE_IBRS	(13*32+14) /* Indirect Branch Restricted Speculation */
+#define X86_FEATURE_STIBP	(13*32+15) /* Single Thread Indirect Branch Predictors */
 
 /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
 #define X86_FEATURE_DTHERM	(14*32+ 0) /* Digital Thermal Sensor */
@@ -293,8 +293,8 @@
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
 #define X86_FEATURE_AVX512_4VNNIW	(18*32+ 2) /* AVX-512 Neural Network Instructions */
 #define X86_FEATURE_AVX512_4FMAPS	(18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
-#define X86_FEATURE_SPEC_CTRL		(18*32+26) /* Speculation Control (IBRS + IBPB) */
-#define X86_FEATURE_STIBP		(18*32+27) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_SPEC_CTRL		(18*32+26) /* "" Speculation Control (IBRS + IBPB) */
+#define X86_FEATURE_INTEL_STIBP		(18*32+27) /* "" Single Thread Indirect Branch Predictors */
 #define X86_FEATURE_ARCH_CAPABILITIES	(18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
 
 /*
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 41851afd44af..8dcecb912365 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -201,7 +201,7 @@ static inline void indirect_branch_prediction_barrier(void)
 				 "movl %[val], %%eax\n\t"
 				 "movl $0, %%edx\n\t"
 				 "wrmsr",
-				 X86_FEATURE_IBPB)
+				 X86_FEATURE_USE_IBPB)
 		     : : [msr] "i" (MSR_IA32_PRED_CMD),
 			 [val] "i" (PRED_CMD_IBPB)
 		     : "eax", "ecx", "edx", "memory");
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 7def33ada730..1968baf66c48 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -298,9 +298,8 @@ static void __init spectre_v2_select_mitigation(void)
 	}
 
 	/* Initialize Indirect Branch Prediction Barrier if supported */
-	if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ||
-	    boot_cpu_has(X86_FEATURE_AMD_PRED_CMD)) {
-		setup_force_cpu_cap(X86_FEATURE_IBPB);
+	if (boot_cpu_has(X86_FEATURE_IBPB)) {
+		setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
 		pr_info("Enabling Indirect Branch Prediction Barrier\n");
 	}
 }
@@ -333,7 +332,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev,
 		return sprintf(buf, "Not affected\n");
 
 	return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
-		       boot_cpu_has(X86_FEATURE_IBPB) ? ", IBPB" : "",
+		       boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
 		       spectre_v2_module_string());
 }
 #endif
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 23ba9cc0cc0d..fee94ee8efe7 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -105,17 +105,28 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 		rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode);
 	}
 
-	if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) ||
-	     cpu_has(c, X86_FEATURE_STIBP) ||
-	     cpu_has(c, X86_FEATURE_AMD_SPEC_CTRL) ||
-	     cpu_has(c, X86_FEATURE_AMD_PRED_CMD) ||
-	     cpu_has(c, X86_FEATURE_AMD_STIBP)) && bad_spectre_microcode(c)) {
-		pr_warn("Intel Spectre v2 broken microcode detected; disabling SPEC_CTRL\n");
-		clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL);
+	/*
+	 * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support,
+	 * and they also have a different bit for STIBP support. Also,
+	 * a hypervisor might have set the individual AMD bits even on
+	 * Intel CPUs, for finer-grained selection of what's available.
+	 */
+	if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
+		set_cpu_cap(c, X86_FEATURE_IBRS);
+		set_cpu_cap(c, X86_FEATURE_IBPB);
+	}
+	if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
+		set_cpu_cap(c, X86_FEATURE_STIBP);
+
+	/* Now if any of them are set, check the blacklist and clear the lot */
+	if ((cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) ||
+	     cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) {
+		pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n");
+		clear_cpu_cap(c, X86_FEATURE_IBRS);
+		clear_cpu_cap(c, X86_FEATURE_IBPB);
 		clear_cpu_cap(c, X86_FEATURE_STIBP);
-		clear_cpu_cap(c, X86_FEATURE_AMD_SPEC_CTRL);
-		clear_cpu_cap(c, X86_FEATURE_AMD_PRED_CMD);
-		clear_cpu_cap(c, X86_FEATURE_AMD_STIBP);
+		clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL);
+		clear_cpu_cap(c, X86_FEATURE_INTEL_STIBP);
 	}
 
 	/*

From 769b27207746415f530615a0f4faca12c432bbc4 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Sat, 14 Jul 2018 02:30:29 -0700
Subject: [PATCH 027/519] x86/cpuid: Fix up "virtual" IBRS/IBPB/STIBP feature
 bits on Intel

(cherry picked from commit 7fcae1118f5fd44a862aa5c3525248e35ee67c3b)

Despite the fact that all the other code there seems to be doing it, just
using set_cpu_cap() in early_intel_init() doesn't actually work.

For CPUs with PKU support, setup_pku() calls get_cpu_cap() after
c->c_init() has set those feature bits. That resets those bits back to what
was queried from the hardware.

Turning the bits off for bad microcode is easy to fix. That can just use
setup_clear_cpu_cap() to force them off for all CPUs.

I was less keen on forcing the feature bits *on* that way, just in case
of inconsistencies. I appreciate that the kernel is going to get this
utterly wrong if CPU features are not consistent, because it has already
applied alternatives by the time secondary CPUs are brought up.

But at least if setup_force_cpu_cap() isn't being used, we might have a
chance of *detecting* the lack of the corresponding bit and either
panicking or refusing to bring the offending CPU online.

So ensure that the appropriate feature bits are set within get_cpu_cap()
regardless of how many extra times it's called.

Fixes: 2961298e ("x86/cpufeatures: Clean up Spectre v2 related CPUID flags")
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: karahmed@amazon.de
Cc: peterz@infradead.org
Cc: bp@alien8.de
Link: https://lkml.kernel.org/r/1517322623-15261-1-git-send-email-dwmw@amazon.co.uk
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/common.c | 21 +++++++++++++++++++++
 arch/x86/kernel/cpu/intel.c  | 27 ++++++++-------------------
 2 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index d6c097cdbefb..72d7e5a3f5d4 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -676,6 +676,26 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
 	}
 }
 
+static void init_speculation_control(struct cpuinfo_x86 *c)
+{
+	/*
+	 * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support,
+	 * and they also have a different bit for STIBP support. Also,
+	 * a hypervisor might have set the individual AMD bits even on
+	 * Intel CPUs, for finer-grained selection of what's available.
+	 *
+	 * We use the AMD bits in 0x8000_0008 EBX as the generic hardware
+	 * features, which are visible in /proc/cpuinfo and used by the
+	 * kernel. So set those accordingly from the Intel bits.
+	 */
+	if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
+		set_cpu_cap(c, X86_FEATURE_IBRS);
+		set_cpu_cap(c, X86_FEATURE_IBPB);
+	}
+	if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
+		set_cpu_cap(c, X86_FEATURE_STIBP);
+}
+
 void get_cpu_cap(struct cpuinfo_x86 *c)
 {
 	u32 eax, ebx, ecx, edx;
@@ -768,6 +788,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
 		c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
 
 	init_scattered_cpuid_features(c);
+	init_speculation_control(c);
 }
 
 static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fee94ee8efe7..0f1318901777 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -105,28 +105,17 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 		rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode);
 	}
 
-	/*
-	 * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support,
-	 * and they also have a different bit for STIBP support. Also,
-	 * a hypervisor might have set the individual AMD bits even on
-	 * Intel CPUs, for finer-grained selection of what's available.
-	 */
-	if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
-		set_cpu_cap(c, X86_FEATURE_IBRS);
-		set_cpu_cap(c, X86_FEATURE_IBPB);
-	}
-	if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
-		set_cpu_cap(c, X86_FEATURE_STIBP);
-
 	/* Now if any of them are set, check the blacklist and clear the lot */
-	if ((cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) ||
+	if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) ||
+	     cpu_has(c, X86_FEATURE_INTEL_STIBP) ||
+	     cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) ||
 	     cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) {
 		pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n");
-		clear_cpu_cap(c, X86_FEATURE_IBRS);
-		clear_cpu_cap(c, X86_FEATURE_IBPB);
-		clear_cpu_cap(c, X86_FEATURE_STIBP);
-		clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL);
-		clear_cpu_cap(c, X86_FEATURE_INTEL_STIBP);
+		setup_clear_cpu_cap(X86_FEATURE_IBRS);
+		setup_clear_cpu_cap(X86_FEATURE_IBPB);
+		setup_clear_cpu_cap(X86_FEATURE_STIBP);
+		setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL);
+		setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP);
 	}
 
 	/*

From 1aae84c2807e6ee1358725aee9eabf1137d055e6 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sat, 14 Jul 2018 02:30:37 -0700
Subject: [PATCH 028/519] x86/pti: Mark constant arrays as __initconst

(cherry picked from commit 4bf5d56d429cbc96c23d809a08f63cd29e1a702e)

I'm seeing build failures from the two newly introduced arrays that
are marked 'const' and '__initdata', which are mutually exclusive:

arch/x86/kernel/cpu/common.c:882:43: error: 'cpu_no_speculation' causes a section type conflict with 'e820_table_firmware_init'
arch/x86/kernel/cpu/common.c:895:43: error: 'cpu_no_meltdown' causes a section type conflict with 'e820_table_firmware_init'

The correct annotation is __initconst.

Fixes: fec9434a12f3 ("x86/pti: Do not enable PTI on CPUs which are not vulnerable to Meltdown")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Link: https://lkml.kernel.org/r/20180202213959.611210-1-arnd@arndb.de
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/common.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 72d7e5a3f5d4..48499b41351c 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -817,7 +817,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
 #endif
 }
 
-static const __initdata struct x86_cpu_id cpu_no_speculation[] = {
+static const __initconst struct x86_cpu_id cpu_no_speculation[] = {
 	{ X86_VENDOR_INTEL,	6, INTEL_FAM6_ATOM_CEDARVIEW,	X86_FEATURE_ANY },
 	{ X86_VENDOR_INTEL,	6, INTEL_FAM6_ATOM_CLOVERVIEW,	X86_FEATURE_ANY },
 	{ X86_VENDOR_INTEL,	6, INTEL_FAM6_ATOM_LINCROFT,	X86_FEATURE_ANY },
@@ -830,7 +830,7 @@ static const __initdata struct x86_cpu_id cpu_no_speculation[] = {
 	{}
 };
 
-static const __initdata struct x86_cpu_id cpu_no_meltdown[] = {
+static const __initconst struct x86_cpu_id cpu_no_meltdown[] = {
 	{ X86_VENDOR_AMD },
 	{}
 };

From 659cc61a987662a6674022d5980a5b5eb0a9b4fe Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <dvlasenk@redhat.com>
Date: Sat, 14 Jul 2018 02:30:46 -0700
Subject: [PATCH 029/519] x86/asm/entry/32: Simplify pushes of zeroed
 pt_regs->REGs

commit 778843f934e362ed4ed734520f60a44a78a074b4 upstream

Use of a temporary R8 register here seems to be unnecessary.

"push %r8" is a two-byte insn (it needs REX prefix to specify R8),
"push $0" is two-byte too. It seems just using the latter would be
no worse.

Thus, code had an unnecessary "xorq %r8,%r8" insn.
It probably costs nothing in execution time here since we are probably
limited by store bandwidth at this point, but still.

Run-tested under QEMU: 32-bit calls still work:

 / # ./test_syscall_vdso32
 [RUN]	Executing 6-argument 32-bit syscall via VDSO
 [OK]	Arguments are preserved across syscall
 [NOTE]	R11 has changed:0000000000200ed7 - assuming clobbered by SYSRET insn
 [OK]	R8..R15 did not leak kernel data
 [RUN]	Executing 6-argument 32-bit syscall via INT 80
 [OK]	Arguments are preserved across syscall
 [OK]	R8..R15 did not leak kernel data
 [RUN]	Running tests under ptrace
 [RUN]	Executing 6-argument 32-bit syscall via VDSO
 [OK]	Arguments are preserved across syscall
 [NOTE]	R11 has changed:0000000000200ed7 - assuming clobbered by SYSRET insn
 [OK]	R8..R15 did not leak kernel data
 [RUN]	Executing 6-argument 32-bit syscall via INT 80
 [OK]	Arguments are preserved across syscall
 [OK]	R8..R15 did not leak kernel data

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Acked-by: Andy Lutomirski <luto@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Drewry <wad@chromium.org>
Cc: linux-kernel@vger.kernel.org
Link: http://lkml.kernel.org/r/1462201010-16846-1-git-send-email-dvlasenk@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/entry/entry_64_compat.S | 45 +++++++++++++++-----------------
 1 file changed, 21 insertions(+), 24 deletions(-)

diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index d03bf0e28b8b..e479ff847023 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -79,24 +79,23 @@ ENTRY(entry_SYSENTER_compat)
 	ASM_CLAC			/* Clear AC after saving FLAGS */
 
 	pushq	$__USER32_CS		/* pt_regs->cs */
-	xorq    %r8,%r8
-	pushq	%r8			/* pt_regs->ip = 0 (placeholder) */
+	pushq	$0			/* pt_regs->ip = 0 (placeholder) */
 	pushq	%rax			/* pt_regs->orig_ax */
 	pushq	%rdi			/* pt_regs->di */
 	pushq	%rsi			/* pt_regs->si */
 	pushq	%rdx			/* pt_regs->dx */
 	pushq	%rcx			/* pt_regs->cx */
 	pushq	$-ENOSYS		/* pt_regs->ax */
-	pushq   %r8                     /* pt_regs->r8  = 0 */
-	pushq   %r8                     /* pt_regs->r9  = 0 */
-	pushq   %r8                     /* pt_regs->r10 = 0 */
-	pushq   %r8                     /* pt_regs->r11 = 0 */
+	pushq   $0			/* pt_regs->r8  = 0 */
+	pushq   $0			/* pt_regs->r9  = 0 */
+	pushq   $0			/* pt_regs->r10 = 0 */
+	pushq   $0			/* pt_regs->r11 = 0 */
 	pushq   %rbx                    /* pt_regs->rbx */
 	pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
-	pushq   %r8                     /* pt_regs->r12 = 0 */
-	pushq   %r8                     /* pt_regs->r13 = 0 */
-	pushq   %r8                     /* pt_regs->r14 = 0 */
-	pushq   %r8                     /* pt_regs->r15 = 0 */
+	pushq   $0			/* pt_regs->r12 = 0 */
+	pushq   $0			/* pt_regs->r13 = 0 */
+	pushq   $0			/* pt_regs->r14 = 0 */
+	pushq   $0			/* pt_regs->r15 = 0 */
 	cld
 
 	/*
@@ -185,17 +184,16 @@ ENTRY(entry_SYSCALL_compat)
 	pushq	%rdx			/* pt_regs->dx */
 	pushq	%rbp			/* pt_regs->cx (stashed in bp) */
 	pushq	$-ENOSYS		/* pt_regs->ax */
-	xorq    %r8,%r8
-	pushq   %r8                     /* pt_regs->r8  = 0 */
-	pushq   %r8                     /* pt_regs->r9  = 0 */
-	pushq   %r8                     /* pt_regs->r10 = 0 */
-	pushq   %r8                     /* pt_regs->r11 = 0 */
+	pushq   $0			/* pt_regs->r8  = 0 */
+	pushq   $0			/* pt_regs->r9  = 0 */
+	pushq   $0			/* pt_regs->r10 = 0 */
+	pushq   $0			/* pt_regs->r11 = 0 */
 	pushq   %rbx                    /* pt_regs->rbx */
 	pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
-	pushq   %r8                     /* pt_regs->r12 = 0 */
-	pushq   %r8                     /* pt_regs->r13 = 0 */
-	pushq   %r8                     /* pt_regs->r14 = 0 */
-	pushq   %r8                     /* pt_regs->r15 = 0 */
+	pushq   $0			/* pt_regs->r12 = 0 */
+	pushq   $0			/* pt_regs->r13 = 0 */
+	pushq   $0			/* pt_regs->r14 = 0 */
+	pushq   $0			/* pt_regs->r15 = 0 */
 
 	/*
 	 * User mode is traced as though IRQs are on, and SYSENTER
@@ -292,11 +290,10 @@ ENTRY(entry_INT80_compat)
 	pushq	%rdx			/* pt_regs->dx */
 	pushq	%rcx			/* pt_regs->cx */
 	pushq	$-ENOSYS		/* pt_regs->ax */
-	xorq    %r8,%r8
-	pushq   %r8                     /* pt_regs->r8  = 0 */
-	pushq   %r8                     /* pt_regs->r9  = 0 */
-	pushq   %r8                     /* pt_regs->r10 = 0 */
-	pushq   %r8                     /* pt_regs->r11 = 0 */
+	pushq   $0			/* pt_regs->r8  = 0 */
+	pushq   $0			/* pt_regs->r9  = 0 */
+	pushq   $0			/* pt_regs->r10 = 0 */
+	pushq   $0			/* pt_regs->r11 = 0 */
 	pushq   %rbx                    /* pt_regs->rbx */
 	pushq   %rbp                    /* pt_regs->rbp */
 	pushq   %r12                    /* pt_regs->r12 */

From 5c91dde1312e8d9cc2b7e6a60cdc22debc711c8e Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sat, 14 Jul 2018 02:30:55 -0700
Subject: [PATCH 030/519] x86/entry/64/compat: Clear registers for compat
 syscalls, to reduce speculation attack surface

commit 6b8cf5cc9965673951f1ab3f0e3cf23d06e3e2ee upstream.

At entry userspace may have populated registers with values that could
otherwise be useful in a speculative execution attack. Clear them to
minimize the kernel's attack surface.

Originally-From: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Cc: <stable@vger.kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/151787989697.7847.4083702787288600552.stgit@dwillia2-desk3.amr.corp.intel.com
[ Made small improvements to the changelog. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/entry/entry_64_compat.S | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index e479ff847023..48c27c3fdfdb 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -87,15 +87,25 @@ ENTRY(entry_SYSENTER_compat)
 	pushq	%rcx			/* pt_regs->cx */
 	pushq	$-ENOSYS		/* pt_regs->ax */
 	pushq   $0			/* pt_regs->r8  = 0 */
+	xorq	%r8, %r8		/* nospec   r8 */
 	pushq   $0			/* pt_regs->r9  = 0 */
+	xorq	%r9, %r9		/* nospec   r9 */
 	pushq   $0			/* pt_regs->r10 = 0 */
+	xorq	%r10, %r10		/* nospec   r10 */
 	pushq   $0			/* pt_regs->r11 = 0 */
+	xorq	%r11, %r11		/* nospec   r11 */
 	pushq   %rbx                    /* pt_regs->rbx */
+	xorl	%ebx, %ebx		/* nospec   rbx */
 	pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
+	xorl	%ebp, %ebp		/* nospec   rbp */
 	pushq   $0			/* pt_regs->r12 = 0 */
+	xorq	%r12, %r12		/* nospec   r12 */
 	pushq   $0			/* pt_regs->r13 = 0 */
+	xorq	%r13, %r13		/* nospec   r13 */
 	pushq   $0			/* pt_regs->r14 = 0 */
+	xorq	%r14, %r14		/* nospec   r14 */
 	pushq   $0			/* pt_regs->r15 = 0 */
+	xorq	%r15, %r15		/* nospec   r15 */
 	cld
 
 	/*
@@ -185,15 +195,25 @@ ENTRY(entry_SYSCALL_compat)
 	pushq	%rbp			/* pt_regs->cx (stashed in bp) */
 	pushq	$-ENOSYS		/* pt_regs->ax */
 	pushq   $0			/* pt_regs->r8  = 0 */
+	xorq	%r8, %r8		/* nospec   r8 */
 	pushq   $0			/* pt_regs->r9  = 0 */
+	xorq	%r9, %r9		/* nospec   r9 */
 	pushq   $0			/* pt_regs->r10 = 0 */
+	xorq	%r10, %r10		/* nospec   r10 */
 	pushq   $0			/* pt_regs->r11 = 0 */
+	xorq	%r11, %r11		/* nospec   r11 */
 	pushq   %rbx                    /* pt_regs->rbx */
+	xorl	%ebx, %ebx		/* nospec   rbx */
 	pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
+	xorl	%ebp, %ebp		/* nospec   rbp */
 	pushq   $0			/* pt_regs->r12 = 0 */
+	xorq	%r12, %r12		/* nospec   r12 */
 	pushq   $0			/* pt_regs->r13 = 0 */
+	xorq	%r13, %r13		/* nospec   r13 */
 	pushq   $0			/* pt_regs->r14 = 0 */
+	xorq	%r14, %r14		/* nospec   r14 */
 	pushq   $0			/* pt_regs->r15 = 0 */
+	xorq	%r15, %r15		/* nospec   r15 */
 
 	/*
 	 * User mode is traced as though IRQs are on, and SYSENTER
@@ -291,15 +311,25 @@ ENTRY(entry_INT80_compat)
 	pushq	%rcx			/* pt_regs->cx */
 	pushq	$-ENOSYS		/* pt_regs->ax */
 	pushq   $0			/* pt_regs->r8  = 0 */
+	xorq	%r8, %r8		/* nospec   r8 */
 	pushq   $0			/* pt_regs->r9  = 0 */
+	xorq	%r9, %r9		/* nospec   r9 */
 	pushq   $0			/* pt_regs->r10 = 0 */
+	xorq	%r10, %r10		/* nospec   r10 */
 	pushq   $0			/* pt_regs->r11 = 0 */
+	xorq	%r11, %r11		/* nospec   r11 */
 	pushq   %rbx                    /* pt_regs->rbx */
+	xorl	%ebx, %ebx		/* nospec   rbx */
 	pushq   %rbp                    /* pt_regs->rbp */
+	xorl	%ebp, %ebp		/* nospec   rbp */
 	pushq   %r12                    /* pt_regs->r12 */
+	xorq	%r12, %r12		/* nospec   r12 */
 	pushq   %r13                    /* pt_regs->r13 */
+	xorq	%r13, %r13		/* nospec   r13 */
 	pushq   %r14                    /* pt_regs->r14 */
+	xorq	%r14, %r14		/* nospec   r14 */
 	pushq   %r15                    /* pt_regs->r15 */
+	xorq	%r15, %r15		/* nospec   r15 */
 	cld
 
 	/*

From b7c492fb9e33857cf983c7807929f1410655765c Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Sat, 14 Jul 2018 02:31:04 -0700
Subject: [PATCH 031/519] x86/speculation: Update Speculation Control microcode
 blacklist

commit 1751342095f0d2b36fa8114d8e12c5688c455ac4 upstream.

Intel have retroactively blessed the 0xc2 microcode on Skylake mobile
and desktop parts, and the Gemini Lake 0x22 microcode is apparently fine
too. We blacklisted the latter purely because it was present with all
the other problematic ones in the 2018-01-08 release, but now it's
explicitly listed as OK.

We still list 0x84 for the various Kaby Lake / Coffee Lake parts, as
that appeared in one version of the blacklist and then reverted to
0x80 again. We can change it if 0x84 is actually announced to be safe.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: arjan.van.de.ven@intel.com
Cc: jmattson@google.com
Cc: karahmed@amazon.de
Cc: kvm@vger.kernel.org
Cc: pbonzini@redhat.com
Cc: rkrcmar@redhat.com
Cc: sironi@amazon.de
Link: http://lkml.kernel.org/r/1518305967-31356-2-git-send-email-dwmw@amazon.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/intel.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 0f1318901777..71492d29456f 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -47,8 +47,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = {
 	{ INTEL_FAM6_KABYLAKE_MOBILE,	0x09,	0x84 },
 	{ INTEL_FAM6_SKYLAKE_X,		0x03,	0x0100013e },
 	{ INTEL_FAM6_SKYLAKE_X,		0x04,	0x0200003c },
-	{ INTEL_FAM6_SKYLAKE_MOBILE,	0x03,	0xc2 },
-	{ INTEL_FAM6_SKYLAKE_DESKTOP,	0x03,	0xc2 },
 	{ INTEL_FAM6_BROADWELL_CORE,	0x04,	0x28 },
 	{ INTEL_FAM6_BROADWELL_GT3E,	0x01,	0x1b },
 	{ INTEL_FAM6_BROADWELL_XEON_D,	0x02,	0x14 },
@@ -60,8 +58,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = {
 	{ INTEL_FAM6_HASWELL_X,		0x02,	0x3b },
 	{ INTEL_FAM6_HASWELL_X,		0x04,	0x10 },
 	{ INTEL_FAM6_IVYBRIDGE_X,	0x04,	0x42a },
-	/* Updated in the 20180108 release; blacklist until we know otherwise */
-	{ INTEL_FAM6_ATOM_GEMINI_LAKE,	0x01,	0x22 },
 	/* Observed in the wild */
 	{ INTEL_FAM6_SANDYBRIDGE_X,	0x06,	0x61b },
 	{ INTEL_FAM6_SANDYBRIDGE_X,	0x07,	0x712 },

From 307261be84cca663b9497a68c2fbc8bc1061f494 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Sat, 14 Jul 2018 02:31:13 -0700
Subject: [PATCH 032/519] x86/speculation: Correct Speculation Control
 microcode blacklist again

commit d37fc6d360a404b208547ba112e7dabb6533c7fc upstream.

Arjan points out that the Intel document only clears the 0xc2 microcode
on *some* parts with CPUID 506E3 (INTEL_FAM6_SKYLAKE_DESKTOP stepping 3).
For the Skylake H/S platform it's OK but for Skylake E3 which has the
same CPUID it isn't (yet) cleared.

So removing it from the blacklist was premature. Put it back for now.

Also, Arjan assures me that the 0x84 microcode for Kaby Lake which was
featured in one of the early revisions of the Intel document was never
released to the public, and won't be until/unless it is also validated
as safe. So those can change to 0x80 which is what all *other* versions
of the doc have identified.

Once the retrospective testing of existing public microcodes is done, we
should be back into a mode where new microcodes are only released in
batches and we shouldn't even need to update the blacklist for those
anyway, so this tweaking of the list isn't expected to be a thing which
keeps happening.

Requested-by: Arjan van de Ven <arjan.van.de.ven@intel.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: arjan.van.de.ven@intel.com
Cc: dave.hansen@intel.com
Cc: kvm@vger.kernel.org
Cc: pbonzini@redhat.com
Link: http://lkml.kernel.org/r/1518449255-2182-1-git-send-email-dwmw@amazon.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/intel.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 71492d29456f..b69d258f9aae 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -40,13 +40,14 @@ struct sku_microcode {
 	u32 microcode;
 };
 static const struct sku_microcode spectre_bad_microcodes[] = {
-	{ INTEL_FAM6_KABYLAKE_DESKTOP,	0x0B,	0x84 },
-	{ INTEL_FAM6_KABYLAKE_DESKTOP,	0x0A,	0x84 },
-	{ INTEL_FAM6_KABYLAKE_DESKTOP,	0x09,	0x84 },
-	{ INTEL_FAM6_KABYLAKE_MOBILE,	0x0A,	0x84 },
-	{ INTEL_FAM6_KABYLAKE_MOBILE,	0x09,	0x84 },
+	{ INTEL_FAM6_KABYLAKE_DESKTOP,	0x0B,	0x80 },
+	{ INTEL_FAM6_KABYLAKE_DESKTOP,	0x0A,	0x80 },
+	{ INTEL_FAM6_KABYLAKE_DESKTOP,	0x09,	0x80 },
+	{ INTEL_FAM6_KABYLAKE_MOBILE,	0x0A,	0x80 },
+	{ INTEL_FAM6_KABYLAKE_MOBILE,	0x09,	0x80 },
 	{ INTEL_FAM6_SKYLAKE_X,		0x03,	0x0100013e },
 	{ INTEL_FAM6_SKYLAKE_X,		0x04,	0x0200003c },
+	{ INTEL_FAM6_SKYLAKE_DESKTOP,	0x03,	0xc2 },
 	{ INTEL_FAM6_BROADWELL_CORE,	0x04,	0x28 },
 	{ INTEL_FAM6_BROADWELL_GT3E,	0x01,	0x1b },
 	{ INTEL_FAM6_BROADWELL_XEON_D,	0x02,	0x14 },

From d10b55dd5a1612a76e58bb80c7b0ec92672c0e5b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 14 Jul 2018 02:31:23 -0700
Subject: [PATCH 033/519] x86/speculation: Clean up various Spectre related
 details

commit 21e433bdb95bdf3aa48226fd3d33af608437f293 upstream.

Harmonize all the Spectre messages so that a:

    dmesg | grep -i spectre

... gives us most Spectre related kernel boot messages.

Also fix a few other details:

 - clarify a comment about firmware speculation control

 - s/KPTI/PTI

 - remove various line-breaks that made the code uglier

Acked-by: David Woodhouse <dwmw@amazon.co.uk>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/bugs.c | 25 ++++++++++---------------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 1968baf66c48..fea368ddcd85 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -162,8 +162,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
 	if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
 		return SPECTRE_V2_CMD_NONE;
 	else {
-		ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
-					  sizeof(arg));
+		ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
 		if (ret < 0)
 			return SPECTRE_V2_CMD_AUTO;
 
@@ -184,8 +183,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
 	     cmd == SPECTRE_V2_CMD_RETPOLINE_AMD ||
 	     cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) &&
 	    !IS_ENABLED(CONFIG_RETPOLINE)) {
-		pr_err("%s selected but not compiled in. Switching to AUTO select\n",
-		       mitigation_options[i].option);
+		pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option);
 		return SPECTRE_V2_CMD_AUTO;
 	}
 
@@ -255,14 +253,14 @@ static void __init spectre_v2_select_mitigation(void)
 			goto retpoline_auto;
 		break;
 	}
-	pr_err("kernel not compiled with retpoline; no mitigation available!");
+	pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!");
 	return;
 
 retpoline_auto:
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
 	retpoline_amd:
 		if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
-			pr_err("LFENCE not serializing. Switching to generic retpoline\n");
+			pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n");
 			goto retpoline_generic;
 		}
 		mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
@@ -280,7 +278,7 @@ static void __init spectre_v2_select_mitigation(void)
 	pr_info("%s\n", spectre_v2_strings[mode]);
 
 	/*
-	 * If neither SMEP or KPTI are available, there is a risk of
+	 * If neither SMEP nor PTI are available, there is a risk of
 	 * hitting userspace addresses in the RSB after a context switch
 	 * from a shallow call stack to a deeper one. To prevent this fill
 	 * the entire RSB, even when using IBRS.
@@ -294,21 +292,20 @@ static void __init spectre_v2_select_mitigation(void)
 	if ((!boot_cpu_has(X86_FEATURE_KAISER) &&
 	     !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
 		setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
-		pr_info("Filling RSB on context switch\n");
+		pr_info("Spectre v2 mitigation: Filling RSB on context switch\n");
 	}
 
 	/* Initialize Indirect Branch Prediction Barrier if supported */
 	if (boot_cpu_has(X86_FEATURE_IBPB)) {
 		setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
-		pr_info("Enabling Indirect Branch Prediction Barrier\n");
+		pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
 	}
 }
 
 #undef pr_fmt
 
 #ifdef CONFIG_SYSFS
-ssize_t cpu_show_meltdown(struct device *dev,
-			  struct device_attribute *attr, char *buf)
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
 		return sprintf(buf, "Not affected\n");
@@ -317,16 +314,14 @@ ssize_t cpu_show_meltdown(struct device *dev,
 	return sprintf(buf, "Vulnerable\n");
 }
 
-ssize_t cpu_show_spectre_v1(struct device *dev,
-			    struct device_attribute *attr, char *buf)
+ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
 		return sprintf(buf, "Not affected\n");
 	return sprintf(buf, "Mitigation: __user pointer sanitization\n");
 }
 
-ssize_t cpu_show_spectre_v2(struct device *dev,
-			    struct device_attribute *attr, char *buf)
+ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
 		return sprintf(buf, "Not affected\n");

From 1af5c9661555bff49d50d38b5723a11ad85fbc97 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sat, 14 Jul 2018 02:31:32 -0700
Subject: [PATCH 034/519] x86/speculation: Fix up array_index_nospec_mask() asm
 constraint

commit be3233fbfcb8f5acb6e3bcd0895c3ef9e100d470 upstream.

Allow the compiler to handle @size as an immediate value or memory
directly rather than allocating a register.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/151797010204.1289.1510000292250184993.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/barrier.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index e3a6f66d288c..7f5dcb64cedb 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -40,7 +40,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
 
 	asm volatile ("cmp %1,%2; sbb %0,%0;"
 			:"=r" (mask)
-			:"r"(size),"r" (index)
+			:"g"(size),"r" (index)
 			:"cc");
 	return mask;
 }

From 4b9593083546b76299b28f0abb76505b4988860f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Sat, 14 Jul 2018 02:31:40 -0700
Subject: [PATCH 035/519] x86/speculation: Add <asm/msr-index.h> dependency

commit ea00f301285ea2f07393678cd2b6057878320c9d upstream.

Joe Konno reported a compile failure resulting from using an MSR
without inclusion of <asm/msr-index.h>, and while the current code builds
fine (by accident) this needs fixing for future patches.

Reported-by: Joe Konno <joe.konno@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: arjan@linux.intel.com
Cc: bp@alien8.de
Cc: dan.j.williams@intel.com
Cc: dave.hansen@linux.intel.com
Cc: dwmw2@infradead.org
Cc: dwmw@amazon.co.uk
Cc: gregkh@linuxfoundation.org
Cc: hpa@zytor.com
Cc: jpoimboe@redhat.com
Cc: linux-tip-commits@vger.kernel.org
Cc: luto@kernel.org
Fixes: 20ffa1caecca ("x86/speculation: Add basic IBPB (Indirect Branch Prediction Barrier) support")
Link: http://lkml.kernel.org/r/20180213132819.GJ25201@hirez.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/nospec-branch.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 8dcecb912365..bca286042e4b 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -6,6 +6,7 @@
 #include <asm/alternative.h>
 #include <asm/alternative-asm.h>
 #include <asm/cpufeatures.h>
+#include <asm/msr-index.h>
 
 /*
  * Fill the CPU return stack buffer.

From 7c55236675b8426f861a6e63b75ec1e17057c8a0 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Sat, 14 Jul 2018 02:31:49 -0700
Subject: [PATCH 036/519] x86/xen: Zero MSR_IA32_SPEC_CTRL before suspend

commit 71c208dd54ab971036d83ff6d9837bae4976e623 upstream.

Older Xen versions (4.5 and before) might have problems migrating pv
guests with MSR_IA32_SPEC_CTRL having a non-zero value. So before
suspending zero that MSR and restore it after being resumed.

Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Cc: stable@vger.kernel.org
Cc: xen-devel@lists.xenproject.org
Cc: boris.ostrovsky@oracle.com
Link: https://lkml.kernel.org/r/20180226140818.4849-1-jgross@suse.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/xen/suspend.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 7f664c416faf..4ecd0de08557 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -1,11 +1,14 @@
 #include <linux/types.h>
 #include <linux/tick.h>
+#include <linux/percpu-defs.h>
 
 #include <xen/xen.h>
 #include <xen/interface/xen.h>
 #include <xen/grant_table.h>
 #include <xen/events.h>
 
+#include <asm/cpufeatures.h>
+#include <asm/msr-index.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/page.h>
 #include <asm/fixmap.h>
@@ -68,6 +71,8 @@ static void xen_pv_post_suspend(int suspend_cancelled)
 	xen_mm_unpin_all();
 }
 
+static DEFINE_PER_CPU(u64, spec_ctrl);
+
 void xen_arch_pre_suspend(void)
 {
 	if (xen_pv_domain())
@@ -84,6 +89,9 @@ void xen_arch_post_suspend(int cancelled)
 
 static void xen_vcpu_notify_restore(void *data)
 {
+	if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL))
+		wrmsrl(MSR_IA32_SPEC_CTRL, this_cpu_read(spec_ctrl));
+
 	/* Boot processor notified via generic timekeeping_resume() */
 	if (smp_processor_id() == 0)
 		return;
@@ -93,7 +101,15 @@ static void xen_vcpu_notify_restore(void *data)
 
 static void xen_vcpu_notify_suspend(void *data)
 {
+	u64 tmp;
+
 	tick_suspend_local();
+
+	if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL)) {
+		rdmsrl(MSR_IA32_SPEC_CTRL, tmp);
+		this_cpu_write(spec_ctrl, tmp);
+		wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+	}
 }
 
 void xen_arch_resume(void)

From 58ac8c59dbb3a8e8b6414524c2d8f4f0a7bbeaa4 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Sat, 14 Jul 2018 02:31:57 -0700
Subject: [PATCH 037/519] x86/mm: Factor out LDT init from context init

commit 39a0526fb3f7d93433d146304278477eb463f8af upstream

The arch-specific mm_context_t is a great place to put
protection-key allocation state.

But, we need to initialize the allocation state because pkey 0 is
always "allocated".  All of the runtime initialization of
mm_context_t is done in *_ldt() manipulation functions.  This
renames the existing LDT functions like this:

	init_new_context() -> init_new_context_ldt()
	destroy_context() -> destroy_context_ldt()

and makes init_new_context() and destroy_context() available for
generic use.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/20160212210234.DB34FCC5@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/mmu_context.h | 21 ++++++++++++++++-----
 arch/x86/kernel/ldt.c              |  4 ++--
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 9bfc5fd77015..1c4794f861d7 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -52,15 +52,15 @@ struct ldt_struct {
 /*
  * Used for LDT copy/destruction.
  */
-int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
-void destroy_context(struct mm_struct *mm);
+int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm);
+void destroy_context_ldt(struct mm_struct *mm);
 #else	/* CONFIG_MODIFY_LDT_SYSCALL */
-static inline int init_new_context(struct task_struct *tsk,
-				   struct mm_struct *mm)
+static inline int init_new_context_ldt(struct task_struct *tsk,
+				       struct mm_struct *mm)
 {
 	return 0;
 }
-static inline void destroy_context(struct mm_struct *mm) {}
+static inline void destroy_context_ldt(struct mm_struct *mm) {}
 #endif
 
 static inline void load_mm_ldt(struct mm_struct *mm)
@@ -102,6 +102,17 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 		this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
 }
 
+static inline int init_new_context(struct task_struct *tsk,
+				   struct mm_struct *mm)
+{
+	init_new_context_ldt(tsk, mm);
+	return 0;
+}
+static inline void destroy_context(struct mm_struct *mm)
+{
+	destroy_context_ldt(mm);
+}
+
 extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 		      struct task_struct *tsk);
 
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index bc429365b72a..8bc68cfc0d33 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -119,7 +119,7 @@ static void free_ldt_struct(struct ldt_struct *ldt)
  * we do not have to muck with descriptors here, that is
  * done in switch_mm() as needed.
  */
-int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
 {
 	struct ldt_struct *new_ldt;
 	struct mm_struct *old_mm;
@@ -160,7 +160,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
  *
  * 64bit: Don't touch the LDT register - we're already in the next thread.
  */
-void destroy_context(struct mm_struct *mm)
+void destroy_context_ldt(struct mm_struct *mm)
 {
 	free_ldt_struct(mm->context.ldt);
 	mm->context.ldt = NULL;

From 937dad078f557ddd6151e62f7f0028c136ffba4a Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Sat, 14 Jul 2018 02:32:07 -0700
Subject: [PATCH 038/519] x86/mm: Give each mm TLB flush generation a unique ID

commit f39681ed0f48498b80455095376f11535feea332 upstream.

This adds two new variables to mmu_context_t: ctx_id and tlb_gen.
ctx_id uniquely identifies the mm_struct and will never be reused.
For a given mm_struct (and hence ctx_id), tlb_gen is a monotonic
count of the number of times that a TLB flush has been requested.
The pair (ctx_id, tlb_gen) can be used as an identifier for TLB
flush actions and will be used in subsequent patches to reliably
determine whether all needed TLB flushes have occurred on a given
CPU.

This patch is split out for ease of review.  By itself, it has no
real effect other than creating and updating the new variables.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/413a91c24dab3ed0caa5f4e4d017d87b0857f920.1498751203.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/mmu.h         | 15 +++++++++++++--
 arch/x86/include/asm/mmu_context.h |  4 ++++
 arch/x86/mm/tlb.c                  |  2 ++
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 7680b76adafc..3359dfedc7ee 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -3,12 +3,18 @@
 
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
+#include <linux/atomic.h>
 
 /*
- * The x86 doesn't have a mmu context, but
- * we put the segment information here.
+ * x86 has arch-specific MMU state beyond what lives in mm_struct.
  */
 typedef struct {
+	/*
+	 * ctx_id uniquely identifies this mm_struct.  A ctx_id will never
+	 * be reused, and zero is not a valid ctx_id.
+	 */
+	u64 ctx_id;
+
 #ifdef CONFIG_MODIFY_LDT_SYSCALL
 	struct ldt_struct *ldt;
 #endif
@@ -24,6 +30,11 @@ typedef struct {
 	atomic_t perf_rdpmc_allowed;	/* nonzero if rdpmc is allowed */
 } mm_context_t;
 
+#define INIT_MM_CONTEXT(mm)						\
+	.context = {							\
+		.ctx_id = 1,						\
+	}
+
 void leave_mm(int cpu);
 
 #endif /* _ASM_X86_MMU_H */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 1c4794f861d7..effc12767cbf 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -11,6 +11,9 @@
 #include <asm/tlbflush.h>
 #include <asm/paravirt.h>
 #include <asm/mpx.h>
+
+extern atomic64_t last_mm_ctx_id;
+
 #ifndef CONFIG_PARAVIRT
 static inline void paravirt_activate_mm(struct mm_struct *prev,
 					struct mm_struct *next)
@@ -105,6 +108,7 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 static inline int init_new_context(struct task_struct *tsk,
 				   struct mm_struct *mm)
 {
+	mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
 	init_new_context_ldt(tsk, mm);
 	return 0;
 }
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 7cad01af6dcd..efec198d271a 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -29,6 +29,8 @@
  *	Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
  */
 
+atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
+
 struct flush_tlb_info {
 	struct mm_struct *flush_mm;
 	unsigned long flush_start;

From 2997b0617b252f6e8630c1aa410697e2b0ed3b0d Mon Sep 17 00:00:00 2001
From: Tim Chen <tim.c.chen@linux.intel.com>
Date: Sat, 14 Jul 2018 02:32:16 -0700
Subject: [PATCH 039/519] x86/speculation: Use Indirect Branch Prediction
 Barrier in context switch
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 18bf3c3ea8ece8f03b6fc58508f2dfd23c7711c7 upstream.

Flush indirect branches when switching into a process that marked itself
non dumpable. This protects high value processes like gpg better,
without having too high performance overhead.

If done naïvely, we could switch to a kernel idle thread and then back
to the original process, such as:

    process A -> idle -> process A

In such scenario, we do not have to do IBPB here even though the process
is non-dumpable, as we are switching back to the same process after a
hiatus.

To avoid the redundant IBPB, which is expensive, we track the last mm
user context ID. The cost is to have an extra u64 mm context id to track
the last mm we were using before switching to the init_mm used by idle.
Avoiding the extra IBPB is probably worth the extra memory for this
common scenario.

For those cases where tlb_defer_switch_to_init_mm() returns true (non
PCID), lazy tlb will defer switch to init_mm, so we will not be changing
the mm for the process A -> idle -> process A switch. So IBPB will be
skipped for this case.

Thanks to the reviewers and Andy Lutomirski for the suggestion of
using ctx_id which got rid of the problem of mm pointer recycling.

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: ak@linux.intel.com
Cc: karahmed@amazon.de
Cc: arjan@linux.intel.com
Cc: torvalds@linux-foundation.org
Cc: linux@dominikbrodowski.net
Cc: peterz@infradead.org
Cc: bp@alien8.de
Cc: luto@kernel.org
Cc: pbonzini@redhat.com
Link: https://lkml.kernel.org/r/1517263487-3708-1-git-send-email-dwmw@amazon.co.uk
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/tlbflush.h |  2 ++
 arch/x86/mm/tlb.c               | 31 +++++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+)

diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index e2a89d2577fb..8ce07db77299 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -68,6 +68,8 @@ static inline void invpcid_flush_all_nonglobals(void)
 struct tlb_state {
 	struct mm_struct *active_mm;
 	int state;
+	/* last user mm's ctx id */
+	u64 last_ctx_id;
 
 	/*
 	 * Access to this CR4 shadow and to H/W CR4 is protected by
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index efec198d271a..6d683bbb3502 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -10,6 +10,7 @@
 
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
+#include <asm/nospec-branch.h>
 #include <asm/cache.h>
 #include <asm/apic.h>
 #include <asm/uv/uv.h>
@@ -106,6 +107,36 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 	unsigned cpu = smp_processor_id();
 
 	if (likely(prev != next)) {
+		u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id);
+
+		/*
+		 * Avoid user/user BTB poisoning by flushing the branch
+		 * predictor when switching between processes. This stops
+		 * one process from doing Spectre-v2 attacks on another.
+		 *
+		 * As an optimization, flush indirect branches only when
+		 * switching into processes that disable dumping. This
+		 * protects high value processes like gpg, without having
+		 * too high performance overhead. IBPB is *expensive*!
+		 *
+		 * This will not flush branches when switching into kernel
+		 * threads. It will also not flush if we switch to idle
+		 * thread and back to the same process. It will flush if we
+		 * switch to a different non-dumpable process.
+		 */
+		if (tsk && tsk->mm &&
+		    tsk->mm->context.ctx_id != last_ctx_id &&
+		    get_dumpable(tsk->mm) != SUID_DUMP_USER)
+			indirect_branch_prediction_barrier();
+
+		/*
+		 * Record last user mm's context id, so we can avoid
+		 * flushing branch buffer with IBPB if we switch back
+		 * to the same user.
+		 */
+		if (next != &init_mm)
+			this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
+
 		this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
 		this_cpu_write(cpu_tlbstate.active_mm, next);
 		cpumask_set_cpu(cpu, mm_cpumask(next));

From 56c4a02fe0fc9fbdeee19eaef11c5b67c8bef371 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Sat, 14 Jul 2018 02:32:25 -0700
Subject: [PATCH 040/519] x86/spectre_v2: Don't check microcode versions when
 running under hypervisors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 36268223c1e9981d6cfc33aff8520b3bde4b8114 upstream.

As:

 1) It's known that hypervisors lie about the environment anyhow (host
    mismatch)

 2) Even if the hypervisor (Xen, KVM, VMWare, etc) provided a valid
    "correct" value, it all gets to be very murky when migration happens
    (do you provide the "new" microcode of the machine?).

And in reality the cloud vendors are the ones that should make sure that
the microcode that is running is correct and we should just sing lalalala
and trust them.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: Wanpeng Li <kernellwp@gmail.com>
Cc: kvm <kvm@vger.kernel.org>
Cc: Krčmář <rkrcmar@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20180226213019.GE9497@char.us.oracle.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/intel.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index b69d258f9aae..dcc03498cf10 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -68,6 +68,13 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
 {
 	int i;
 
+	/*
+	 * We know that the hypervisor lie to us on the microcode version so
+	 * we may as well hope that it is running the correct version.
+	 */
+	if (cpu_has(c, X86_FEATURE_HYPERVISOR))
+		return false;
+
 	for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
 		if (c->x86_model == spectre_bad_microcodes[i].model &&
 		    c->x86_mask == spectre_bad_microcodes[i].stepping)

From 7ec391255421d5d311c66d6fbfb33cdfca789b9f Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Sat, 14 Jul 2018 02:32:33 -0700
Subject: [PATCH 041/519] x86/speculation: Use IBRS if available before calling
 into firmware

commit dd84441a797150dcc49298ec95c459a8891d8bb1 upstream.

Retpoline means the kernel is safe because it has no indirect branches.
But firmware isn't, so use IBRS for firmware calls if it's available.

Block preemption while IBRS is set, although in practice the call sites
already had to be doing that.

Ignore hpwdt.c for now. It's taking spinlocks and calling into firmware
code, from an NMI handler. I don't want to touch that with a bargepole.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: arjan.van.de.ven@intel.com
Cc: bp@alien8.de
Cc: dave.hansen@intel.com
Cc: jmattson@google.com
Cc: karahmed@amazon.de
Cc: kvm@vger.kernel.org
Cc: pbonzini@redhat.com
Cc: rkrcmar@redhat.com
Link: http://lkml.kernel.org/r/1519037457-7643-2-git-send-email-dwmw@amazon.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
[ Srivatsa: Backported to 4.4.y, patching the efi_call_virt() family of functions,
  which are the 4.4.y-equivalents of arch_efi_call_virt_setup()/teardown() ]
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/apm.h           |  6 +++++
 arch/x86/include/asm/cpufeatures.h   |  1 +
 arch/x86/include/asm/efi.h           |  7 +++++
 arch/x86/include/asm/nospec-branch.h | 39 +++++++++++++++++++++-------
 arch/x86/kernel/cpu/bugs.c           | 12 ++++++++-
 arch/x86/platform/efi/efi_64.c       |  3 +++
 6 files changed, 58 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h
index 20370c6db74b..3d1ec41ae09a 100644
--- a/arch/x86/include/asm/apm.h
+++ b/arch/x86/include/asm/apm.h
@@ -6,6 +6,8 @@
 #ifndef _ASM_X86_MACH_DEFAULT_APM_H
 #define _ASM_X86_MACH_DEFAULT_APM_H
 
+#include <asm/nospec-branch.h>
+
 #ifdef APM_ZERO_SEGS
 #	define APM_DO_ZERO_SEGS \
 		"pushl %%ds\n\t" \
@@ -31,6 +33,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
 	 * N.B. We do NOT need a cld after the BIOS call
 	 * because we always save and restore the flags.
 	 */
+	firmware_restrict_branch_speculation_start();
 	__asm__ __volatile__(APM_DO_ZERO_SEGS
 		"pushl %%edi\n\t"
 		"pushl %%ebp\n\t"
@@ -43,6 +46,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
 		  "=S" (*esi)
 		: "a" (func), "b" (ebx_in), "c" (ecx_in)
 		: "memory", "cc");
+	firmware_restrict_branch_speculation_end();
 }
 
 static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in,
@@ -55,6 +59,7 @@ static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in,
 	 * N.B. We do NOT need a cld after the BIOS call
 	 * because we always save and restore the flags.
 	 */
+	firmware_restrict_branch_speculation_start();
 	__asm__ __volatile__(APM_DO_ZERO_SEGS
 		"pushl %%edi\n\t"
 		"pushl %%ebp\n\t"
@@ -67,6 +72,7 @@ static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in,
 		  "=S" (si)
 		: "a" (func), "b" (ebx_in), "c" (ecx_in)
 		: "memory", "cc");
+	firmware_restrict_branch_speculation_end();
 	return error;
 }
 
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 782005d7eb48..bc76bf39bb2f 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -202,6 +202,7 @@
 #define X86_FEATURE_KAISER	( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */
 
 #define X86_FEATURE_USE_IBPB	( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled*/
+#define X86_FEATURE_USE_IBRS_FW	( 7*32+22) /* "" Use IBRS during runtime firmware calls */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 0010c78c4998..7e5a2ffb6938 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -3,6 +3,7 @@
 
 #include <asm/fpu/api.h>
 #include <asm/pgtable.h>
+#include <asm/nospec-branch.h>
 
 /*
  * We map the EFI regions needed for runtime services non-contiguously,
@@ -39,8 +40,10 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
 ({									\
 	efi_status_t __s;						\
 	kernel_fpu_begin();						\
+	firmware_restrict_branch_speculation_start();			\
 	__s = ((efi_##f##_t __attribute__((regparm(0)))*)		\
 		efi.systab->runtime->f)(args);				\
+	firmware_restrict_branch_speculation_end();			\
 	kernel_fpu_end();						\
 	__s;								\
 })
@@ -49,8 +52,10 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
 #define __efi_call_virt(f, args...) \
 ({									\
 	kernel_fpu_begin();						\
+	firmware_restrict_branch_speculation_start();			\
 	((efi_##f##_t __attribute__((regparm(0)))*)			\
 		efi.systab->runtime->f)(args);				\
+	firmware_restrict_branch_speculation_end();			\
 	kernel_fpu_end();						\
 })
 
@@ -71,7 +76,9 @@ extern u64 asmlinkage efi_call(void *fp, ...);
 	efi_sync_low_kernel_mappings();					\
 	preempt_disable();						\
 	__kernel_fpu_begin();						\
+	firmware_restrict_branch_speculation_start();			\
 	__s = efi_call((void *)efi.systab->runtime->f, __VA_ARGS__);	\
+	firmware_restrict_branch_speculation_end();			\
 	__kernel_fpu_end();						\
 	preempt_enable();						\
 	__s;								\
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index bca286042e4b..36ded24ca381 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -195,17 +195,38 @@ static inline void vmexit_fill_RSB(void)
 #endif
 }
 
+#define alternative_msr_write(_msr, _val, _feature)		\
+	asm volatile(ALTERNATIVE("",				\
+				 "movl %[msr], %%ecx\n\t"	\
+				 "movl %[val], %%eax\n\t"	\
+				 "movl $0, %%edx\n\t"		\
+				 "wrmsr",			\
+				 _feature)			\
+		     : : [msr] "i" (_msr), [val] "i" (_val)	\
+		     : "eax", "ecx", "edx", "memory")
+
 static inline void indirect_branch_prediction_barrier(void)
 {
-	asm volatile(ALTERNATIVE("",
-				 "movl %[msr], %%ecx\n\t"
-				 "movl %[val], %%eax\n\t"
-				 "movl $0, %%edx\n\t"
-				 "wrmsr",
-				 X86_FEATURE_USE_IBPB)
-		     : : [msr] "i" (MSR_IA32_PRED_CMD),
-			 [val] "i" (PRED_CMD_IBPB)
-		     : "eax", "ecx", "edx", "memory");
+	alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB,
+			      X86_FEATURE_USE_IBPB);
+}
+
+/*
+ * With retpoline, we must use IBRS to restrict branch prediction
+ * before calling into firmware.
+ */
+static inline void firmware_restrict_branch_speculation_start(void)
+{
+	preempt_disable();
+	alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS,
+			      X86_FEATURE_USE_IBRS_FW);
+}
+
+static inline void firmware_restrict_branch_speculation_end(void)
+{
+	alternative_msr_write(MSR_IA32_SPEC_CTRL, 0,
+			      X86_FEATURE_USE_IBRS_FW);
+	preempt_enable();
 }
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index fea368ddcd85..b294fdc0faf2 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -300,6 +300,15 @@ static void __init spectre_v2_select_mitigation(void)
 		setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
 		pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
 	}
+
+	/*
+	 * Retpoline means the kernel is safe because it has no indirect
+	 * branches. But firmware isn't, so use IBRS to protect that.
+	 */
+	if (boot_cpu_has(X86_FEATURE_IBRS)) {
+		setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
+		pr_info("Enabling Restricted Speculation for firmware calls\n");
+	}
 }
 
 #undef pr_fmt
@@ -326,8 +335,9 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c
 	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
 		return sprintf(buf, "Not affected\n");
 
-	return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+	return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
 		       boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
+		       boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
 		       spectre_v2_module_string());
 }
 #endif
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index a0ac0f9c307f..f5a8cd96bae4 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -40,6 +40,7 @@
 #include <asm/fixmap.h>
 #include <asm/realmode.h>
 #include <asm/time.h>
+#include <asm/nospec-branch.h>
 
 /*
  * We allocate runtime services regions bottom-up, starting from -4G, i.e.
@@ -347,6 +348,7 @@ extern efi_status_t efi64_thunk(u32, ...);
 									\
 	efi_sync_low_kernel_mappings();					\
 	local_irq_save(flags);						\
+	firmware_restrict_branch_speculation_start();			\
 									\
 	efi_scratch.prev_cr3 = read_cr3();				\
 	write_cr3((unsigned long)efi_scratch.efi_pgt);			\
@@ -357,6 +359,7 @@ extern efi_status_t efi64_thunk(u32, ...);
 									\
 	write_cr3(efi_scratch.prev_cr3);				\
 	__flush_tlb_all();						\
+	firmware_restrict_branch_speculation_end();			\
 	local_irq_restore(flags);					\
 									\
 	__s;								\

From bdf186811576fdec0a42b554b884ed8ae2df54a2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 14 Jul 2018 02:32:43 -0700
Subject: [PATCH 042/519] x86/speculation: Move
 firmware_restrict_branch_speculation_*() from C to CPP

commit d72f4e29e6d84b7ec02ae93088aa459ac70e733b upstream.

firmware_restrict_branch_speculation_*() recently started using
preempt_enable()/disable(), but those are relatively high level
primitives and cause build failures on some 32-bit builds.

Since we want to keep <asm/nospec-branch.h> low level, convert
them to macros to avoid header hell...

Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: arjan.van.de.ven@intel.com
Cc: bp@alien8.de
Cc: dave.hansen@intel.com
Cc: jmattson@google.com
Cc: karahmed@amazon.de
Cc: kvm@vger.kernel.org
Cc: pbonzini@redhat.com
Cc: rkrcmar@redhat.com
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/nospec-branch.h | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 36ded24ca381..b9dd1d9ef8af 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -214,20 +214,22 @@ static inline void indirect_branch_prediction_barrier(void)
 /*
  * With retpoline, we must use IBRS to restrict branch prediction
  * before calling into firmware.
+ *
+ * (Implemented as CPP macros due to header hell.)
  */
-static inline void firmware_restrict_branch_speculation_start(void)
-{
-	preempt_disable();
-	alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS,
-			      X86_FEATURE_USE_IBRS_FW);
-}
+#define firmware_restrict_branch_speculation_start()			\
+do {									\
+	preempt_disable();						\
+	alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS,	\
+			      X86_FEATURE_USE_IBRS_FW);			\
+} while (0)
 
-static inline void firmware_restrict_branch_speculation_end(void)
-{
-	alternative_msr_write(MSR_IA32_SPEC_CTRL, 0,
-			      X86_FEATURE_USE_IBRS_FW);
-	preempt_enable();
-}
+#define firmware_restrict_branch_speculation_end()			\
+do {									\
+	alternative_msr_write(MSR_IA32_SPEC_CTRL, 0,			\
+			      X86_FEATURE_USE_IBRS_FW);			\
+	preempt_enable();						\
+} while (0)
 
 #endif /* __ASSEMBLY__ */
 

From 0990b1ff53bec71e4fdce971b6c4c5e1b7b91d01 Mon Sep 17 00:00:00 2001
From: Alexander Sergeyev <sergeev917@gmail.com>
Date: Sat, 14 Jul 2018 02:32:52 -0700
Subject: [PATCH 043/519] x86/speculation: Remove Skylake C2 from Speculation
 Control microcode blacklist

commit e3b3121fa8da94cb20f9e0c64ab7981ae47fd085 upstream.

In accordance with Intel's microcode revision guidance from March 6 MCU
rev 0xc2 is cleared on both Skylake H/S and Skylake Xeon E3 processors
that share CPUID 506E3.

Signed-off-by: Alexander Sergeyev <sergeev917@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Jia Zhang <qianyue.zj@alibaba-inc.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Kyle Huey <me@kylehuey.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Link: https://lkml.kernel.org/r/20180313193856.GA8580@localhost.localdomain
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/intel.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index dcc03498cf10..77d9f6809b05 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -29,7 +29,7 @@
 /*
  * Early microcode releases for the Spectre v2 mitigation were broken.
  * Information taken from;
- * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf
+ * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/03/microcode-update-guidance.pdf
  * - https://kb.vmware.com/s/article/52345
  * - Microcode revisions observed in the wild
  * - Release note from 20180108 microcode release
@@ -47,7 +47,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = {
 	{ INTEL_FAM6_KABYLAKE_MOBILE,	0x09,	0x80 },
 	{ INTEL_FAM6_SKYLAKE_X,		0x03,	0x0100013e },
 	{ INTEL_FAM6_SKYLAKE_X,		0x04,	0x0200003c },
-	{ INTEL_FAM6_SKYLAKE_DESKTOP,	0x03,	0xc2 },
 	{ INTEL_FAM6_BROADWELL_CORE,	0x04,	0x28 },
 	{ INTEL_FAM6_BROADWELL_GT3E,	0x01,	0x1b },
 	{ INTEL_FAM6_BROADWELL_XEON_D,	0x02,	0x14 },

From b4553a4ec2f64b818193ba85434b1f42c68d5aff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= <mic@digikod.net>
Date: Sat, 14 Jul 2018 02:33:00 -0700
Subject: [PATCH 044/519] selftest/seccomp: Fix the flag name
 SECCOMP_FILTER_FLAG_TSYNC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 6c045d07bb305c527140bdec4cf8ab50f7c980d8 upstream

Rename SECCOMP_FLAG_FILTER_TSYNC to SECCOMP_FILTER_FLAG_TSYNC to match
the UAPI.

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Kees Cook <keescook@chromium.org>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Cc: Will Drewry <wad@chromium.org>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 882fe83a3554..d446346d9146 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -1476,8 +1476,8 @@ TEST_F(TRACE_syscall, syscall_dropped)
 #define SECCOMP_SET_MODE_FILTER 1
 #endif
 
-#ifndef SECCOMP_FLAG_FILTER_TSYNC
-#define SECCOMP_FLAG_FILTER_TSYNC 1
+#ifndef SECCOMP_FILTER_FLAG_TSYNC
+#define SECCOMP_FILTER_FLAG_TSYNC 1
 #endif
 
 #ifndef seccomp
@@ -1592,7 +1592,7 @@ TEST(TSYNC_first)
 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
 	}
 
-	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
 		      &prog);
 	ASSERT_NE(ENOSYS, errno) {
 		TH_LOG("Kernel does not support seccomp syscall!");
@@ -1810,7 +1810,7 @@ TEST_F(TSYNC, two_siblings_with_ancestor)
 		self->sibling_count++;
 	}
 
-	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
 		      &self->apply_prog);
 	ASSERT_EQ(0, ret) {
 		TH_LOG("Could install filter on all threads!");
@@ -1871,7 +1871,7 @@ TEST_F(TSYNC, two_siblings_with_no_filter)
 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
 	}
 
-	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
 		      &self->apply_prog);
 	ASSERT_NE(ENOSYS, errno) {
 		TH_LOG("Kernel does not support seccomp syscall!");
@@ -1919,7 +1919,7 @@ TEST_F(TSYNC, two_siblings_with_one_divergence)
 		self->sibling_count++;
 	}
 
-	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
 		      &self->apply_prog);
 	ASSERT_EQ(self->sibling[0].system_tid, ret) {
 		TH_LOG("Did not fail on diverged sibling.");
@@ -1971,7 +1971,7 @@ TEST_F(TSYNC, two_siblings_not_under_filter)
 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
 	}
 
-	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
 		      &self->apply_prog);
 	ASSERT_EQ(ret, self->sibling[0].system_tid) {
 		TH_LOG("Did not fail on diverged sibling.");
@@ -2000,7 +2000,7 @@ TEST_F(TSYNC, two_siblings_not_under_filter)
 	/* Switch to the remaining sibling */
 	sib = !sib;
 
-	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
 		      &self->apply_prog);
 	ASSERT_EQ(0, ret) {
 		TH_LOG("Expected the remaining sibling to sync");
@@ -2023,7 +2023,7 @@ TEST_F(TSYNC, two_siblings_not_under_filter)
 	while (!kill(self->sibling[sib].system_tid, 0))
 		sleep(0.1);
 
-	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
 		      &self->apply_prog);
 	ASSERT_EQ(0, ret);  /* just us chickens */
 }

From 9f62897343da5fbbbd0db7441750ecc4c833a6ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= <mic@digikod.net>
Date: Sat, 14 Jul 2018 02:33:08 -0700
Subject: [PATCH 045/519] selftest/seccomp: Fix the seccomp(2) signature
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 505ce68c6da3432454c62e43c24a22ea5b1d754b upstream

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Kees Cook <keescook@chromium.org>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Cc: Will Drewry <wad@chromium.org>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index d446346d9146..29487e0437ad 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -1481,10 +1481,10 @@ TEST_F(TRACE_syscall, syscall_dropped)
 #endif
 
 #ifndef seccomp
-int seccomp(unsigned int op, unsigned int flags, struct sock_fprog *filter)
+int seccomp(unsigned int op, unsigned int flags, void *args)
 {
 	errno = 0;
-	return syscall(__NR_seccomp, op, flags, filter);
+	return syscall(__NR_seccomp, op, flags, args);
 }
 #endif
 

From 237a1870da36fcd10f67503928c485d964726d83 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Sat, 14 Jul 2018 02:33:16 -0700
Subject: [PATCH 046/519] xen: set cpu capabilities from xen_start_kernel()

Upstream commit: 0808e80cb760de2733c0527d2090ed2205a1eef8 ("xen: set
cpu capabilities from xen_start_kernel()")

There is no need to set the same capabilities for each cpu
individually. This can easily be done for all cpus when starting the
kernel.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/xen/enlighten.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index cbef64b508e1..2d7ab4e23e9e 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -460,6 +460,14 @@ static void __init xen_init_cpuid_mask(void)
 		cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32));
 }
 
+static void __init xen_init_capabilities(void)
+{
+	if (xen_pv_domain()) {
+		setup_clear_cpu_cap(X86_BUG_SYSRET_SS_ATTRS);
+		setup_force_cpu_cap(X86_FEATURE_XENPV);
+	}
+}
+
 static void xen_set_debugreg(int reg, unsigned long val)
 {
 	HYPERVISOR_set_debugreg(reg, val);
@@ -1587,6 +1595,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
 
 	xen_init_irq_ops();
 	xen_init_cpuid_mask();
+	xen_init_capabilities();
 
 #ifdef CONFIG_X86_LOCAL_APIC
 	/*
@@ -1883,14 +1892,6 @@ bool xen_hvm_need_lapic(void)
 }
 EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
 
-static void xen_set_cpu_features(struct cpuinfo_x86 *c)
-{
-	if (xen_pv_domain()) {
-		clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
-		set_cpu_cap(c, X86_FEATURE_XENPV);
-	}
-}
-
 const struct hypervisor_x86 x86_hyper_xen = {
 	.name			= "Xen",
 	.detect			= xen_platform,
@@ -1898,7 +1899,6 @@ const struct hypervisor_x86 x86_hyper_xen = {
 	.init_platform		= xen_hvm_guest_init,
 #endif
 	.x2apic_available	= xen_x2apic_para_available,
-	.set_cpu_features       = xen_set_cpu_features,
 };
 EXPORT_SYMBOL(x86_hyper_xen);
 

From e57a81c43ca5ea32bf28c4634e903e7a4a1cbbd3 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Sat, 14 Jul 2018 02:33:24 -0700
Subject: [PATCH 047/519] x86/amd: don't set X86_BUG_SYSRET_SS_ATTRS when
 running under Xen

commit def9331a12977770cc6132d79f8e6565871e8e38 upstream

When running as Xen pv guest X86_BUG_SYSRET_SS_ATTRS must not be set
on AMD cpus.

This bug/feature bit is kind of special as it will be used very early
when switching threads. Setting the bit and clearing it a little bit
later leaves a critical window where things can go wrong. This time
window has enlarged a little bit by using setup_clear_cpu_cap() instead
of the hypervisor's set_cpu_features callback. It seems this larger
window now makes it rather easy to hit the problem.

The proper solution is to never set the bit in case of Xen.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/amd.c | 5 +++--
 arch/x86/xen/enlighten.c  | 4 +---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index f4fb8f5b0be4..9b2941422b68 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -791,8 +791,9 @@ static void init_amd(struct cpuinfo_x86 *c)
 		if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM))
 			set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH);
 
-	/* AMD CPUs don't reset SS attributes on SYSRET */
-	set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
+	/* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */
+	if (!cpu_has(c, X86_FEATURE_XENPV))
+		set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 2d7ab4e23e9e..82fd84d5e1aa 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -462,10 +462,8 @@ static void __init xen_init_cpuid_mask(void)
 
 static void __init xen_init_capabilities(void)
 {
-	if (xen_pv_domain()) {
-		setup_clear_cpu_cap(X86_BUG_SYSRET_SS_ATTRS);
+	if (xen_pv_domain())
 		setup_force_cpu_cap(X86_FEATURE_XENPV);
-	}
 }
 
 static void xen_set_debugreg(int reg, unsigned long val)

From b2dab2dc776cea8e1f190523456b32b850506ce3 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 14 Jul 2018 02:33:32 -0700
Subject: [PATCH 048/519] x86/nospec: Simplify alternative_msr_write()

commit 1aa7a5735a41418d8e01fa7c9565eb2657e2ea3f upstream

The macro is not type safe and I did look for why that "g" constraint for
the asm doesn't work: it's because the asm is more fundamentally wrong.

It does

        movl %[val], %%eax

but "val" isn't a 32-bit value, so then gcc will pass it in a register,
and generate code like

        movl %rsi, %eax

and gas will complain about a nonsensical 'mov' instruction (it's moving a
64-bit register to a 32-bit one).

Passing it through memory will just hide the real bug - gcc still thinks
the memory location is 64-bit, but the "movl" will only load the first 32
bits and it all happens to work because x86 is little-endian.

Convert it to a type safe inline function with a little trick which hands
the feature into the ALTERNATIVE macro.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/nospec-branch.h | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index b9dd1d9ef8af..6403016d3445 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -195,15 +195,16 @@ static inline void vmexit_fill_RSB(void)
 #endif
 }
 
-#define alternative_msr_write(_msr, _val, _feature)		\
-	asm volatile(ALTERNATIVE("",				\
-				 "movl %[msr], %%ecx\n\t"	\
-				 "movl %[val], %%eax\n\t"	\
-				 "movl $0, %%edx\n\t"		\
-				 "wrmsr",			\
-				 _feature)			\
-		     : : [msr] "i" (_msr), [val] "i" (_val)	\
-		     : "eax", "ecx", "edx", "memory")
+static __always_inline
+void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
+{
+	asm volatile(ALTERNATIVE("", "wrmsr", %c[feature])
+		: : "c" (msr),
+		    "a" (val),
+		    "d" (val >> 32),
+		    [feature] "i" (feature)
+		: "memory");
+}
 
 static inline void indirect_branch_prediction_barrier(void)
 {

From d77421663170a2d660fa63a50c664805d132e69d Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Sat, 14 Jul 2018 02:33:40 -0700
Subject: [PATCH 049/519] x86/bugs: Concentrate bug detection into a separate
 function

commit 4a28bfe3267b68e22c663ac26185aa16c9b879ef upstream

Combine the various logic which goes through all those
x86_cpu_id matching structures in one function.

Suggested-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/common.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 48499b41351c..97558d1f9ef6 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -835,21 +835,27 @@ static const __initconst struct x86_cpu_id cpu_no_meltdown[] = {
 	{}
 };
 
-static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c)
+static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
 {
 	u64 ia32_cap = 0;
 
+	if (x86_match_cpu(cpu_no_speculation))
+		return;
+
+	setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+	setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+
 	if (x86_match_cpu(cpu_no_meltdown))
-		return false;
+		return;
 
 	if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
 		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
 
 	/* Rogue Data Cache Load? No! */
 	if (ia32_cap & ARCH_CAP_RDCL_NO)
-		return false;
+		return;
 
-	return true;
+	setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
 }
 
 /*
@@ -898,12 +904,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 
 	setup_force_cpu_cap(X86_FEATURE_ALWAYS);
 
-	if (!x86_match_cpu(cpu_no_speculation)) {
-		if (cpu_vulnerable_to_meltdown(c))
-			setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
-		setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
-		setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
-	}
+	cpu_set_bug_bits(c);
 
 	fpu__init_system(c);
 

From 96df48c0c42c6816d5b2808ed9e18a428cbf9598 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Sat, 14 Jul 2018 02:33:49 -0700
Subject: [PATCH 050/519] x86/bugs: Concentrate bug reporting into a separate
 function

commit d1059518b4789cabe34bb4b714d07e6089c82ca1 upstream

Those SysFS functions have a similar preamble, as such make common
code to handle them.

Suggested-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/bugs.c | 48 ++++++++++++++++++++++++++------------
 1 file changed, 33 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index b294fdc0faf2..75f3d4974102 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -314,30 +314,48 @@ static void __init spectre_v2_select_mitigation(void)
 #undef pr_fmt
 
 #ifdef CONFIG_SYSFS
+
+ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
+			char *buf, unsigned int bug)
+{
+	if (!boot_cpu_has_bug(bug))
+		return sprintf(buf, "Not affected\n");
+
+	switch (bug) {
+	case X86_BUG_CPU_MELTDOWN:
+		if (boot_cpu_has(X86_FEATURE_KAISER))
+			return sprintf(buf, "Mitigation: PTI\n");
+
+		break;
+
+	case X86_BUG_SPECTRE_V1:
+		return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+
+	case X86_BUG_SPECTRE_V2:
+		return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+			       boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
+			       boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
+			       spectre_v2_module_string());
+
+	default:
+		break;
+	}
+
+	return sprintf(buf, "Vulnerable\n");
+}
+
 ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
-		return sprintf(buf, "Not affected\n");
-	if (boot_cpu_has(X86_FEATURE_KAISER))
-		return sprintf(buf, "Mitigation: PTI\n");
-	return sprintf(buf, "Vulnerable\n");
+	return cpu_show_common(dev, attr, buf, X86_BUG_CPU_MELTDOWN);
 }
 
 ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
-		return sprintf(buf, "Not affected\n");
-	return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+	return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V1);
 }
 
 ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
-		return sprintf(buf, "Not affected\n");
-
-	return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
-		       boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
-		       boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
-		       spectre_v2_module_string());
+	return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V2);
 }
 #endif

From 51f37b2f0248911465d8f84fb6f547be5316a261 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Sat, 14 Jul 2018 02:33:57 -0700
Subject: [PATCH 051/519] x86/bugs: Read SPEC_CTRL MSR during boot and re-use
 reserved bits

commit 1b86883ccb8d5d9506529d42dbe1a5257cb30b18 upstream

The 336996-Speculative-Execution-Side-Channel-Mitigations.pdf refers to all
the other bits as reserved. The Intel SDM glossary defines reserved as
implementation specific - aka unknown.

As such at bootup this must be taken it into account and proper masking for
the bits in use applied.

A copy of this document is available at
https://bugzilla.kernel.org/show_bug.cgi?id=199511

[ tglx: Made x86_spec_ctrl_base __ro_after_init ]
[ Srivatsa: Removed __ro_after_init for 4.4.y ]

Suggested-by: Jon Masters <jcm@redhat.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/nospec-branch.h | 24 ++++++++++++++++++++----
 arch/x86/kernel/cpu/bugs.c           | 27 +++++++++++++++++++++++++++
 2 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 6403016d3445..daec31829827 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -172,6 +172,17 @@ enum spectre_v2_mitigation {
 	SPECTRE_V2_IBRS,
 };
 
+/*
+ * The Intel specification for the SPEC_CTRL MSR requires that we
+ * preserve any already set reserved bits at boot time (e.g. for
+ * future additions that this kernel is not currently aware of).
+ * We then set any additional mitigation bits that we want
+ * ourselves and always use this as the base for SPEC_CTRL.
+ * We also use this when handling guest entry/exit as below.
+ */
+extern void x86_spec_ctrl_set(u64);
+extern u64 x86_spec_ctrl_get_default(void);
+
 extern char __indirect_thunk_start[];
 extern char __indirect_thunk_end[];
 
@@ -208,8 +219,9 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
 
 static inline void indirect_branch_prediction_barrier(void)
 {
-	alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB,
-			      X86_FEATURE_USE_IBPB);
+	u64 val = PRED_CMD_IBPB;
+
+	alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB);
 }
 
 /*
@@ -220,14 +232,18 @@ static inline void indirect_branch_prediction_barrier(void)
  */
 #define firmware_restrict_branch_speculation_start()			\
 do {									\
+	u64 val = x86_spec_ctrl_get_default() | SPEC_CTRL_IBRS;		\
+									\
 	preempt_disable();						\
-	alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS,	\
+	alternative_msr_write(MSR_IA32_SPEC_CTRL, val,			\
 			      X86_FEATURE_USE_IBRS_FW);			\
 } while (0)
 
 #define firmware_restrict_branch_speculation_end()			\
 do {									\
-	alternative_msr_write(MSR_IA32_SPEC_CTRL, 0,			\
+	u64 val = x86_spec_ctrl_get_default();				\
+									\
+	alternative_msr_write(MSR_IA32_SPEC_CTRL, val,			\
 			      X86_FEATURE_USE_IBRS_FW);			\
 	preempt_enable();						\
 } while (0)
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 75f3d4974102..42c22042f863 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -27,6 +27,12 @@
 
 static void __init spectre_v2_select_mitigation(void);
 
+/*
+ * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any
+ * writes to SPEC_CTRL contain whatever reserved bits have been set.
+ */
+static u64 x86_spec_ctrl_base;
+
 void __init check_bugs(void)
 {
 	identify_boot_cpu();
@@ -36,6 +42,13 @@ void __init check_bugs(void)
 		print_cpu_info(&boot_cpu_data);
 	}
 
+	/*
+	 * Read the SPEC_CTRL MSR to account for reserved bits which may
+	 * have unknown values.
+	 */
+	if (boot_cpu_has(X86_FEATURE_IBRS))
+		rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+
 	/* Select the proper spectre mitigation before patching alternatives */
 	spectre_v2_select_mitigation();
 
@@ -94,6 +107,20 @@ static const char *spectre_v2_strings[] = {
 
 static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
 
+void x86_spec_ctrl_set(u64 val)
+{
+	if (val & ~SPEC_CTRL_IBRS)
+		WARN_ONCE(1, "SPEC_CTRL MSR value 0x%16llx is unknown.\n", val);
+	else
+		wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base | val);
+}
+EXPORT_SYMBOL_GPL(x86_spec_ctrl_set);
+
+u64 x86_spec_ctrl_get_default(void)
+{
+	return x86_spec_ctrl_base;
+}
+EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default);
 
 #ifdef RETPOLINE
 static bool spectre_v2_bad_module;

From 3e1ec1698244de1b808ae0142dd653e5aded91d7 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Sat, 14 Jul 2018 02:34:05 -0700
Subject: [PATCH 052/519] x86/bugs, KVM: Support the combination of guest and
 host IBRS

commit 5cf687548705412da47c9cec342fd952d71ed3d5 upstream

A guest may modify the SPEC_CTRL MSR from the value used by the
kernel. Since the kernel doesn't use IBRS, this means a value of zero is
what is needed in the host.

But the 336996-Speculative-Execution-Side-Channel-Mitigations.pdf refers to
the other bits as reserved so the kernel should respect the boot time
SPEC_CTRL value and use that.

This allows to deal with future extensions to the SPEC_CTRL interface if
any at all.

Note: This uses wrmsrl() instead of native_wrmsl(). I does not make any
difference as paravirt will over-write the callq *0xfff.. with the wrmsrl
assembler code.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
[ Srivatsa: Backported to 4.4.y, skipping the KVM changes in this patch. ]
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/nospec-branch.h | 10 ++++++++++
 arch/x86/kernel/cpu/bugs.c           | 18 ++++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index daec31829827..11db69a965dc 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -183,6 +183,16 @@ enum spectre_v2_mitigation {
 extern void x86_spec_ctrl_set(u64);
 extern u64 x86_spec_ctrl_get_default(void);
 
+/*
+ * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR
+ * the guest has, while on VMEXIT we restore the host view. This
+ * would be easier if SPEC_CTRL were architecturally maskable or
+ * shadowable for guests but this is not (currently) the case.
+ * Takes the guest view of SPEC_CTRL MSR as a parameter.
+ */
+extern void x86_spec_ctrl_set_guest(u64);
+extern void x86_spec_ctrl_restore_host(u64);
+
 extern char __indirect_thunk_start[];
 extern char __indirect_thunk_end[];
 
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 42c22042f863..e71e28154e1a 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -122,6 +122,24 @@ u64 x86_spec_ctrl_get_default(void)
 }
 EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default);
 
+void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl)
+{
+	if (!boot_cpu_has(X86_FEATURE_IBRS))
+		return;
+	if (x86_spec_ctrl_base != guest_spec_ctrl)
+		wrmsrl(MSR_IA32_SPEC_CTRL, guest_spec_ctrl);
+}
+EXPORT_SYMBOL_GPL(x86_spec_ctrl_set_guest);
+
+void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl)
+{
+	if (!boot_cpu_has(X86_FEATURE_IBRS))
+		return;
+	if (x86_spec_ctrl_base != guest_spec_ctrl)
+		wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+}
+EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host);
+
 #ifdef RETPOLINE
 static bool spectre_v2_bad_module;
 

From 498efb90b8ad36de4a51a2298887acbfc3cab616 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Sat, 14 Jul 2018 02:34:13 -0700
Subject: [PATCH 053/519] x86/cpu: Rename Merrifield2 to Moorefield

commit f5fbf848303c8704d0e1a1e7cabd08fd0a49552f upstream

Merrifield2 is actually Moorefield.

Rename it accordingly and drop tail digit from Merrifield1.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20160906184254.94440-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/intel-family.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 12fa187865c2..0b27c1ebd731 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -58,8 +58,8 @@
 #define INTEL_FAM6_ATOM_SILVERMONT1	0x37 /* BayTrail/BYT / Valleyview */
 #define INTEL_FAM6_ATOM_SILVERMONT2	0x4D /* Avaton/Rangely */
 #define INTEL_FAM6_ATOM_AIRMONT		0x4C /* CherryTrail / Braswell */
-#define INTEL_FAM6_ATOM_MERRIFIELD1	0x4A /* Tangier */
-#define INTEL_FAM6_ATOM_MERRIFIELD2	0x5A /* Annidale */
+#define INTEL_FAM6_ATOM_MERRIFIELD	0x4A /* Tangier */
+#define INTEL_FAM6_ATOM_MOOREFIELD	0x5A /* Annidale */
 #define INTEL_FAM6_ATOM_GOLDMONT	0x5C
 #define INTEL_FAM6_ATOM_DENVERTON	0x5F /* Goldmont Microserver */
 #define INTEL_FAM6_ATOM_GEMINI_LAKE	0x7A

From 05f8bae8e6b68ee20a26ec6a0683b1bcc31aef28 Mon Sep 17 00:00:00 2001
From: Piotr Luc <piotr.luc@intel.com>
Date: Sat, 14 Jul 2018 02:34:22 -0700
Subject: [PATCH 054/519] x86/cpu/intel: Add Knights Mill to Intel family

commit 0047f59834e5947d45f34f5f12eb330d158f700b upstream

Add CPUID of Knights Mill (KNM) processor to Intel family list.

Signed-off-by: Piotr Luc <piotr.luc@intel.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20161012180520.30976-1-piotr.luc@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/intel-family.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 0b27c1ebd731..e13ff5a14633 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -67,5 +67,6 @@
 /* Xeon Phi */
 
 #define INTEL_FAM6_XEON_PHI_KNL		0x57 /* Knights Landing */
+#define INTEL_FAM6_XEON_PHI_KNM		0x85 /* Knights Mill */
 
 #endif /* _ASM_X86_INTEL_FAMILY_H */

From d8067aba239cbd2bfd64cdd548a914b20c58d189 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Sat, 14 Jul 2018 02:34:31 -0700
Subject: [PATCH 055/519] x86/bugs: Expose /sys/../spec_store_bypass

commit c456442cd3a59eeb1d60293c26cbe2ff2c4e42cf upstream

Add the sysfs file for the new vulerability. It does not do much except
show the words 'Vulnerable' for recent x86 cores.

Intel cores prior to family 6 are known not to be vulnerable, and so are
some Atoms and some Xeon Phi.

It assumes that older Cyrix, Centaur, etc. cores are immune.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../ABI/testing/sysfs-devices-system-cpu      |  1 +
 arch/x86/include/asm/cpufeatures.h            |  1 +
 arch/x86/kernel/cpu/bugs.c                    |  5 ++++
 arch/x86/kernel/cpu/common.c                  | 23 +++++++++++++++++++
 drivers/base/cpu.c                            |  8 +++++++
 include/linux/cpu.h                           |  2 ++
 6 files changed, 40 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index ea6a043f5beb..50f95689ab38 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -276,6 +276,7 @@ What:		/sys/devices/system/cpu/vulnerabilities
 		/sys/devices/system/cpu/vulnerabilities/meltdown
 		/sys/devices/system/cpu/vulnerabilities/spectre_v1
 		/sys/devices/system/cpu/vulnerabilities/spectre_v2
+		/sys/devices/system/cpu/vulnerabilities/spec_store_bypass
 Date:		January 2018
 Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
 Description:	Information about CPU vulnerabilities
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index bc76bf39bb2f..08cf4f7a3f7d 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -315,5 +315,6 @@
 #define X86_BUG_CPU_MELTDOWN	X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
 #define X86_BUG_SPECTRE_V1	X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
 #define X86_BUG_SPECTRE_V2	X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
+#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */
 
 #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index e71e28154e1a..0ad13b10afcc 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -403,4 +403,9 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c
 {
 	return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V2);
 }
+
+ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS);
+}
 #endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 97558d1f9ef6..eb78ddf13f44 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -835,10 +835,33 @@ static const __initconst struct x86_cpu_id cpu_no_meltdown[] = {
 	{}
 };
 
+static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = {
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_PINEVIEW	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_LINCROFT	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_PENWELL		},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_CLOVERVIEW	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_CEDARVIEW	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_SILVERMONT1	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_AIRMONT		},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_SILVERMONT2	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_MERRIFIELD	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_CORE_YONAH		},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_XEON_PHI_KNL		},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_XEON_PHI_KNM		},
+	{ X86_VENDOR_CENTAUR,	5,					},
+	{ X86_VENDOR_INTEL,	5,					},
+	{ X86_VENDOR_NSC,	5,					},
+	{ X86_VENDOR_ANY,	4,					},
+	{}
+};
+
 static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
 {
 	u64 ia32_cap = 0;
 
+	if (!x86_match_cpu(cpu_no_spec_store_bypass))
+		setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
+
 	if (x86_match_cpu(cpu_no_speculation))
 		return;
 
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 3db71afbba93..143edea1076f 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -518,14 +518,22 @@ ssize_t __weak cpu_show_spectre_v2(struct device *dev,
 	return sprintf(buf, "Not affected\n");
 }
 
+ssize_t __weak cpu_show_spec_store_bypass(struct device *dev,
+					  struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "Not affected\n");
+}
+
 static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
 static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
 static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
+static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL);
 
 static struct attribute *cpu_root_vulnerabilities_attrs[] = {
 	&dev_attr_meltdown.attr,
 	&dev_attr_spectre_v1.attr,
 	&dev_attr_spectre_v2.attr,
+	&dev_attr_spec_store_bypass.attr,
 	NULL
 };
 
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 7e04bcd9af8e..2f9d12022100 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -46,6 +46,8 @@ extern ssize_t cpu_show_spectre_v1(struct device *dev,
 				   struct device_attribute *attr, char *buf);
 extern ssize_t cpu_show_spectre_v2(struct device *dev,
 				   struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_spec_store_bypass(struct device *dev,
+					  struct device_attribute *attr, char *buf);
 
 extern __printf(4, 5)
 struct device *cpu_device_create(struct device *parent, void *drvdata,

From 1cdf94bc21610ffbabedd5b6d85700ed1017037d Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Sat, 14 Jul 2018 02:34:39 -0700
Subject: [PATCH 056/519] x86/cpufeatures: Add X86_FEATURE_RDS

commit 0cc5fa00b0a88dad140b4e5c2cead9951ad36822 upstream

Add the CPU feature bit CPUID.7.0.EDX[31] which indicates whether the CPU
supports Reduced Data Speculation.

[ tglx: Split it out from a later patch ]

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/cpufeatures.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 08cf4f7a3f7d..3fce65d4de78 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -297,6 +297,7 @@
 #define X86_FEATURE_SPEC_CTRL		(18*32+26) /* "" Speculation Control (IBRS + IBPB) */
 #define X86_FEATURE_INTEL_STIBP		(18*32+27) /* "" Single Thread Indirect Branch Predictors */
 #define X86_FEATURE_ARCH_CAPABILITIES	(18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
+#define X86_FEATURE_RDS			(18*32+31) /* Reduced Data Speculation */
 
 /*
  * BUG word(s)

From 46ea6e547d0595f88086bc56c2f032b0e2f3f9ac Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Sat, 14 Jul 2018 02:34:47 -0700
Subject: [PATCH 057/519] x86/bugs: Provide boot parameters for the
 spec_store_bypass_disable mitigation

commit 24f7fc83b9204d20f878c57cb77d261ae825e033 upstream

Contemporary high performance processors use a common industry-wide
optimization known as "Speculative Store Bypass" in which loads from
addresses to which a recent store has occurred may (speculatively) see an
older value. Intel refers to this feature as "Memory Disambiguation" which
is part of their "Smart Memory Access" capability.

Memory Disambiguation can expose a cache side-channel attack against such
speculatively read values. An attacker can create exploit code that allows
them to read memory outside of a sandbox environment (for example,
malicious JavaScript in a web page), or to perform more complex attacks
against code running within the same privilege level, e.g. via the stack.

As a first step to mitigate against such attacks, provide two boot command
line control knobs:

 nospec_store_bypass_disable
 spec_store_bypass_disable=[off,auto,on]

By default affected x86 processors will power on with Speculative
Store Bypass enabled. Hence the provided kernel parameters are written
from the point of view of whether to enable a mitigation or not.
The parameters are as follows:

 - auto - Kernel detects whether your CPU model contains an implementation
	  of Speculative Store Bypass and picks the most appropriate
	  mitigation.

 - on   - disable Speculative Store Bypass
 - off  - enable Speculative Store Bypass

[ tglx: Reordered the checks so that the whole evaluation is not done
  	when the CPU does not support RDS ]

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/kernel-parameters.txt  |  33 +++++++++
 arch/x86/include/asm/cpufeatures.h   |   1 +
 arch/x86/include/asm/nospec-branch.h |   6 ++
 arch/x86/kernel/cpu/bugs.c           | 103 +++++++++++++++++++++++++++
 4 files changed, 143 insertions(+)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e60d0b5809c1..dc138b8d9ecb 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2460,6 +2460,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			allow data leaks with this option, which is equivalent
 			to spectre_v2=off.
 
+	nospec_store_bypass_disable
+			[HW] Disable all mitigations for the Speculative Store Bypass vulnerability
+
 	noxsave		[BUGS=X86] Disables x86 extended register state save
 			and restore using xsave. The kernel will fallback to
 			enabling legacy floating-point and sse state.
@@ -3623,6 +3626,36 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			Not specifying this option is equivalent to
 			spectre_v2=auto.
 
+	spec_store_bypass_disable=
+			[HW] Control Speculative Store Bypass (SSB) Disable mitigation
+			(Speculative Store Bypass vulnerability)
+
+			Certain CPUs are vulnerable to an exploit against a
+			a common industry wide performance optimization known
+			as "Speculative Store Bypass" in which recent stores
+			to the same memory location may not be observed by
+			later loads during speculative execution. The idea
+			is that such stores are unlikely and that they can
+			be detected prior to instruction retirement at the
+			end of a particular speculation execution window.
+
+			In vulnerable processors, the speculatively forwarded
+			store can be used in a cache side channel attack, for
+			example to read memory to which the attacker does not
+			directly have access (e.g. inside sandboxed code).
+
+			This parameter controls whether the Speculative Store
+			Bypass optimization is used.
+
+			on     - Unconditionally disable Speculative Store Bypass
+			off    - Unconditionally enable Speculative Store Bypass
+			auto   - Kernel detects whether the CPU model contains an
+				 implementation of Speculative Store Bypass and
+				 picks the most appropriate mitigation
+
+			Not specifying this option is equivalent to
+			spec_store_bypass_disable=auto.
+
 	spia_io_base=	[HW,MTD]
 	spia_fio_base=
 	spia_pedr=
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 3fce65d4de78..9510f5f075c6 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -203,6 +203,7 @@
 
 #define X86_FEATURE_USE_IBPB	( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled*/
 #define X86_FEATURE_USE_IBRS_FW	( 7*32+22) /* "" Use IBRS during runtime firmware calls */
+#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE	( 7*32+23) /* "" Disable Speculative Store Bypass. */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 11db69a965dc..c786d01faf51 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -193,6 +193,12 @@ extern u64 x86_spec_ctrl_get_default(void);
 extern void x86_spec_ctrl_set_guest(u64);
 extern void x86_spec_ctrl_restore_host(u64);
 
+/* The Speculative Store Bypass disable variants */
+enum ssb_mitigation {
+	SPEC_STORE_BYPASS_NONE,
+	SPEC_STORE_BYPASS_DISABLE,
+};
+
 extern char __indirect_thunk_start[];
 extern char __indirect_thunk_end[];
 
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 0ad13b10afcc..826aa81a1b37 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -26,6 +26,7 @@
 #include <asm/intel-family.h>
 
 static void __init spectre_v2_select_mitigation(void);
+static void __init ssb_select_mitigation(void);
 
 /*
  * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any
@@ -52,6 +53,12 @@ void __init check_bugs(void)
 	/* Select the proper spectre mitigation before patching alternatives */
 	spectre_v2_select_mitigation();
 
+	/*
+	 * Select proper mitigation for any exposure to the Speculative Store
+	 * Bypass vulnerability.
+	 */
+	ssb_select_mitigation();
+
 #ifdef CONFIG_X86_32
 	/*
 	 * Check whether we are able to run this kernel safely on SMP.
@@ -356,6 +363,99 @@ static void __init spectre_v2_select_mitigation(void)
 	}
 }
 
+#undef pr_fmt
+#define pr_fmt(fmt)	"Speculative Store Bypass: " fmt
+
+static enum ssb_mitigation ssb_mode = SPEC_STORE_BYPASS_NONE;
+
+/* The kernel command line selection */
+enum ssb_mitigation_cmd {
+	SPEC_STORE_BYPASS_CMD_NONE,
+	SPEC_STORE_BYPASS_CMD_AUTO,
+	SPEC_STORE_BYPASS_CMD_ON,
+};
+
+static const char *ssb_strings[] = {
+	[SPEC_STORE_BYPASS_NONE]	= "Vulnerable",
+	[SPEC_STORE_BYPASS_DISABLE]	= "Mitigation: Speculative Store Bypass disabled"
+};
+
+static const struct {
+	const char *option;
+	enum ssb_mitigation_cmd cmd;
+} ssb_mitigation_options[] = {
+	{ "auto",	SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */
+	{ "on",		SPEC_STORE_BYPASS_CMD_ON },   /* Disable Speculative Store Bypass */
+	{ "off",	SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */
+};
+
+static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void)
+{
+	enum ssb_mitigation_cmd cmd = SPEC_STORE_BYPASS_CMD_AUTO;
+	char arg[20];
+	int ret, i;
+
+	if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) {
+		return SPEC_STORE_BYPASS_CMD_NONE;
+	} else {
+		ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable",
+					  arg, sizeof(arg));
+		if (ret < 0)
+			return SPEC_STORE_BYPASS_CMD_AUTO;
+
+		for (i = 0; i < ARRAY_SIZE(ssb_mitigation_options); i++) {
+			if (!match_option(arg, ret, ssb_mitigation_options[i].option))
+				continue;
+
+			cmd = ssb_mitigation_options[i].cmd;
+			break;
+		}
+
+		if (i >= ARRAY_SIZE(ssb_mitigation_options)) {
+			pr_err("unknown option (%s). Switching to AUTO select\n", arg);
+			return SPEC_STORE_BYPASS_CMD_AUTO;
+		}
+	}
+
+	return cmd;
+}
+
+static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void)
+{
+	enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE;
+	enum ssb_mitigation_cmd cmd;
+
+	if (!boot_cpu_has(X86_FEATURE_RDS))
+		return mode;
+
+	cmd = ssb_parse_cmdline();
+	if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS) &&
+	    (cmd == SPEC_STORE_BYPASS_CMD_NONE ||
+	     cmd == SPEC_STORE_BYPASS_CMD_AUTO))
+		return mode;
+
+	switch (cmd) {
+	case SPEC_STORE_BYPASS_CMD_AUTO:
+	case SPEC_STORE_BYPASS_CMD_ON:
+		mode = SPEC_STORE_BYPASS_DISABLE;
+		break;
+	case SPEC_STORE_BYPASS_CMD_NONE:
+		break;
+	}
+
+	if (mode != SPEC_STORE_BYPASS_NONE)
+		setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE);
+	return mode;
+}
+
+static void ssb_select_mitigation()
+{
+	ssb_mode = __ssb_select_mitigation();
+
+	if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
+		pr_info("%s\n", ssb_strings[ssb_mode]);
+}
+
 #undef pr_fmt
 
 #ifdef CONFIG_SYSFS
@@ -382,6 +482,9 @@ ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
 			       boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
 			       spectre_v2_module_string());
 
+	case X86_BUG_SPEC_STORE_BYPASS:
+		return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);
+
 	default:
 		break;
 	}

From 7dc950c1ce909c11c3985802b1aba6b655d8dc23 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Sat, 14 Jul 2018 02:34:55 -0700
Subject: [PATCH 058/519] x86/bugs/intel: Set proper CPU features and setup RDS

commit 772439717dbf703b39990be58d8d4e3e4ad0598a upstream

Intel CPUs expose methods to:

 - Detect whether RDS capability is available via CPUID.7.0.EDX[31],

 - The SPEC_CTRL MSR(0x48), bit 2 set to enable RDS.

 - MSR_IA32_ARCH_CAPABILITIES, Bit(4) no need to enable RRS.

With that in mind if spec_store_bypass_disable=[auto,on] is selected set at
boot-time the SPEC_CTRL MSR to enable RDS if the platform requires it.

Note that this does not fix the KVM case where the SPEC_CTRL is exposed to
guests which can muck with it, see patch titled :
 KVM/SVM/VMX/x86/spectre_v2: Support the combination of guest and host IBRS.

And for the firmware (IBRS to be set), see patch titled:
 x86/spectre_v2: Read SPEC_CTRL MSR during boot and re-use reserved bits

[ tglx: Distangled it from the intel implementation and kept the call order ]

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/msr-index.h |  6 ++++++
 arch/x86/kernel/cpu/bugs.c       | 30 ++++++++++++++++++++++++++++--
 arch/x86/kernel/cpu/common.c     | 10 ++++++----
 arch/x86/kernel/cpu/cpu.h        |  3 +++
 arch/x86/kernel/cpu/intel.c      |  1 +
 5 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index f4701f0e613a..a29edb723431 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -35,6 +35,7 @@
 #define MSR_IA32_SPEC_CTRL		0x00000048 /* Speculation Control */
 #define SPEC_CTRL_IBRS			(1 << 0)   /* Indirect Branch Restricted Speculation */
 #define SPEC_CTRL_STIBP			(1 << 1)   /* Single Thread Indirect Branch Predictors */
+#define SPEC_CTRL_RDS			(1 << 2)   /* Reduced Data Speculation */
 
 #define MSR_IA32_PRED_CMD		0x00000049 /* Prediction Command */
 #define PRED_CMD_IBPB			(1 << 0)   /* Indirect Branch Prediction Barrier */
@@ -56,6 +57,11 @@
 #define MSR_IA32_ARCH_CAPABILITIES	0x0000010a
 #define ARCH_CAP_RDCL_NO		(1 << 0)   /* Not susceptible to Meltdown */
 #define ARCH_CAP_IBRS_ALL		(1 << 1)   /* Enhanced IBRS support */
+#define ARCH_CAP_RDS_NO			(1 << 4)   /*
+						    * Not susceptible to Speculative Store Bypass
+						    * attack, so no Reduced Data Speculation control
+						    * required.
+						    */
 
 #define MSR_IA32_BBL_CR_CTL		0x00000119
 #define MSR_IA32_BBL_CR_CTL3		0x0000011e
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 826aa81a1b37..56b84a5506d7 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -116,7 +116,7 @@ static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
 
 void x86_spec_ctrl_set(u64 val)
 {
-	if (val & ~SPEC_CTRL_IBRS)
+	if (val & ~(SPEC_CTRL_IBRS | SPEC_CTRL_RDS))
 		WARN_ONCE(1, "SPEC_CTRL MSR value 0x%16llx is unknown.\n", val);
 	else
 		wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base | val);
@@ -443,8 +443,28 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void)
 		break;
 	}
 
-	if (mode != SPEC_STORE_BYPASS_NONE)
+	/*
+	 * We have three CPU feature flags that are in play here:
+	 *  - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
+	 *  - X86_FEATURE_RDS - CPU is able to turn off speculative store bypass
+	 *  - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation
+	 */
+	if (mode != SPEC_STORE_BYPASS_NONE) {
 		setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE);
+		/*
+		 * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses
+		 * a completely different MSR and bit dependent on family.
+		 */
+		switch (boot_cpu_data.x86_vendor) {
+		case X86_VENDOR_INTEL:
+			x86_spec_ctrl_base |= SPEC_CTRL_RDS;
+			x86_spec_ctrl_set(SPEC_CTRL_RDS);
+			break;
+		case X86_VENDOR_AMD:
+			break;
+		}
+	}
+
 	return mode;
 }
 
@@ -458,6 +478,12 @@ static void ssb_select_mitigation()
 
 #undef pr_fmt
 
+void x86_spec_ctrl_setup_ap(void)
+{
+	if (boot_cpu_has(X86_FEATURE_IBRS))
+		x86_spec_ctrl_set(x86_spec_ctrl_base & (SPEC_CTRL_IBRS | SPEC_CTRL_RDS));
+}
+
 #ifdef CONFIG_SYSFS
 
 ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index eb78ddf13f44..2f1d403cb0ea 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -859,7 +859,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
 {
 	u64 ia32_cap = 0;
 
-	if (!x86_match_cpu(cpu_no_spec_store_bypass))
+	if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
+		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+
+	if (!x86_match_cpu(cpu_no_spec_store_bypass) &&
+	   !(ia32_cap & ARCH_CAP_RDS_NO))
 		setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
 
 	if (x86_match_cpu(cpu_no_speculation))
@@ -871,9 +875,6 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
 	if (x86_match_cpu(cpu_no_meltdown))
 		return;
 
-	if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
-		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
-
 	/* Rogue Data Cache Load? No! */
 	if (ia32_cap & ARCH_CAP_RDCL_NO)
 		return;
@@ -1216,6 +1217,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c)
 	enable_sep_cpu();
 #endif
 	mtrr_ap_init();
+	x86_spec_ctrl_setup_ap();
 }
 
 struct msr_range {
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 2584265d4745..3b19d82f7932 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -46,4 +46,7 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[],
 
 extern void get_cpu_cap(struct cpuinfo_x86 *c);
 extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
+
+extern void x86_spec_ctrl_setup_ap(void);
+
 #endif /* ARCH_X86_CPU_H */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 77d9f6809b05..ac25d1e5e8e8 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -119,6 +119,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 		setup_clear_cpu_cap(X86_FEATURE_STIBP);
 		setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL);
 		setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP);
+		setup_clear_cpu_cap(X86_FEATURE_RDS);
 	}
 
 	/*

From d9a58c4316857347b0ef77e94bde43379c87a746 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Sat, 14 Jul 2018 02:35:03 -0700
Subject: [PATCH 059/519] x86/bugs: Whitelist allowed SPEC_CTRL MSR values

commit 1115a859f33276fe8afb31c60cf9d8e657872558 upstream

Intel and AMD SPEC_CTRL (0x48) MSR semantics may differ in the
future (or in fact use different MSRs for the same functionality).

As such a run-time mechanism is required to whitelist the appropriate MSR
values.

[ tglx: Made the variable __ro_after_init ]
[ Srivatsa: Removed __ro_after_init for 4.4.y ]

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/bugs.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 56b84a5506d7..c37e2110d383 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -34,6 +34,12 @@ static void __init ssb_select_mitigation(void);
  */
 static u64 x86_spec_ctrl_base;
 
+/*
+ * The vendor and possibly platform specific bits which can be modified in
+ * x86_spec_ctrl_base.
+ */
+static u64 x86_spec_ctrl_mask = ~SPEC_CTRL_IBRS;
+
 void __init check_bugs(void)
 {
 	identify_boot_cpu();
@@ -116,7 +122,7 @@ static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
 
 void x86_spec_ctrl_set(u64 val)
 {
-	if (val & ~(SPEC_CTRL_IBRS | SPEC_CTRL_RDS))
+	if (val & x86_spec_ctrl_mask)
 		WARN_ONCE(1, "SPEC_CTRL MSR value 0x%16llx is unknown.\n", val);
 	else
 		wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base | val);
@@ -458,6 +464,7 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void)
 		switch (boot_cpu_data.x86_vendor) {
 		case X86_VENDOR_INTEL:
 			x86_spec_ctrl_base |= SPEC_CTRL_RDS;
+			x86_spec_ctrl_mask &= ~SPEC_CTRL_RDS;
 			x86_spec_ctrl_set(SPEC_CTRL_RDS);
 			break;
 		case X86_VENDOR_AMD:
@@ -481,7 +488,7 @@ static void ssb_select_mitigation()
 void x86_spec_ctrl_setup_ap(void)
 {
 	if (boot_cpu_has(X86_FEATURE_IBRS))
-		x86_spec_ctrl_set(x86_spec_ctrl_base & (SPEC_CTRL_IBRS | SPEC_CTRL_RDS));
+		x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask);
 }
 
 #ifdef CONFIG_SYSFS

From ec5bf1a308faac133951877c8b5fbbb0413529cb Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Sat, 14 Jul 2018 02:35:12 -0700
Subject: [PATCH 060/519] x86/bugs/AMD: Add support to disable RDS on Fam[15,
 16, 17]h if requested

commit 764f3c21588a059cd783c6ba0734d4db2d72822d upstream

AMD does not need the Speculative Store Bypass mitigation to be enabled.

The parameters for this are already available and can be done via MSR
C001_1020. Each family uses a different bit in that MSR for this.

[ tglx: Expose the bit mask via a variable and move the actual MSR fiddling
  	into the bugs code as that's the right thing to do and also required
	to prepare for dynamic enable/disable ]

[ Srivatsa: Removed __ro_after_init for 4.4.y ]

Suggested-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/cpufeatures.h   |  1 +
 arch/x86/include/asm/nospec-branch.h |  4 ++++
 arch/x86/kernel/cpu/amd.c            | 26 ++++++++++++++++++++++++++
 arch/x86/kernel/cpu/bugs.c           | 27 ++++++++++++++++++++++++++-
 arch/x86/kernel/cpu/common.c         |  4 ++++
 5 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 9510f5f075c6..b7cdd1c05132 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -204,6 +204,7 @@
 #define X86_FEATURE_USE_IBPB	( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled*/
 #define X86_FEATURE_USE_IBRS_FW	( 7*32+22) /* "" Use IBRS during runtime firmware calls */
 #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE	( 7*32+23) /* "" Disable Speculative Store Bypass. */
+#define X86_FEATURE_AMD_RDS	(7*32+24)  /* "" AMD RDS implementation */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index c786d01faf51..ac2fdc9666e4 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -199,6 +199,10 @@ enum ssb_mitigation {
 	SPEC_STORE_BYPASS_DISABLE,
 };
 
+/* AMD specific Speculative Store Bypass MSR data */
+extern u64 x86_amd_ls_cfg_base;
+extern u64 x86_amd_ls_cfg_rds_mask;
+
 extern char __indirect_thunk_start[];
 extern char __indirect_thunk_end[];
 
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 9b2941422b68..4452f387ed32 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -9,6 +9,7 @@
 #include <asm/processor.h>
 #include <asm/apic.h>
 #include <asm/cpu.h>
+#include <asm/nospec-branch.h>
 #include <asm/smp.h>
 #include <asm/pci-direct.h>
 #include <asm/delay.h>
@@ -519,6 +520,26 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
 
 	if (cpu_has(c, X86_FEATURE_MWAITX))
 		use_mwaitx_delay();
+
+	if (c->x86 >= 0x15 && c->x86 <= 0x17) {
+		unsigned int bit;
+
+		switch (c->x86) {
+		case 0x15: bit = 54; break;
+		case 0x16: bit = 33; break;
+		case 0x17: bit = 10; break;
+		default: return;
+		}
+		/*
+		 * Try to cache the base value so further operations can
+		 * avoid RMW. If that faults, do not enable RDS.
+		 */
+		if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) {
+			setup_force_cpu_cap(X86_FEATURE_RDS);
+			setup_force_cpu_cap(X86_FEATURE_AMD_RDS);
+			x86_amd_ls_cfg_rds_mask = 1ULL << bit;
+		}
+	}
 }
 
 static void early_init_amd(struct cpuinfo_x86 *c)
@@ -794,6 +815,11 @@ static void init_amd(struct cpuinfo_x86 *c)
 	/* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */
 	if (!cpu_has(c, X86_FEATURE_XENPV))
 		set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
+
+	if (boot_cpu_has(X86_FEATURE_AMD_RDS)) {
+		set_cpu_cap(c, X86_FEATURE_RDS);
+		set_cpu_cap(c, X86_FEATURE_AMD_RDS);
+	}
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index c37e2110d383..b8911afbd6b8 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -40,6 +40,13 @@ static u64 x86_spec_ctrl_base;
  */
 static u64 x86_spec_ctrl_mask = ~SPEC_CTRL_IBRS;
 
+/*
+ * AMD specific MSR info for Speculative Store Bypass control.
+ * x86_amd_ls_cfg_rds_mask is initialized in identify_boot_cpu().
+ */
+u64 x86_amd_ls_cfg_base;
+u64 x86_amd_ls_cfg_rds_mask;
+
 void __init check_bugs(void)
 {
 	identify_boot_cpu();
@@ -51,7 +58,8 @@ void __init check_bugs(void)
 
 	/*
 	 * Read the SPEC_CTRL MSR to account for reserved bits which may
-	 * have unknown values.
+	 * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD
+	 * init code as it is not enumerated and depends on the family.
 	 */
 	if (boot_cpu_has(X86_FEATURE_IBRS))
 		rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
@@ -153,6 +161,14 @@ void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl)
 }
 EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host);
 
+static void x86_amd_rds_enable(void)
+{
+	u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_rds_mask;
+
+	if (boot_cpu_has(X86_FEATURE_AMD_RDS))
+		wrmsrl(MSR_AMD64_LS_CFG, msrval);
+}
+
 #ifdef RETPOLINE
 static bool spectre_v2_bad_module;
 
@@ -442,6 +458,11 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void)
 
 	switch (cmd) {
 	case SPEC_STORE_BYPASS_CMD_AUTO:
+		/*
+		 * AMD platforms by default don't need SSB mitigation.
+		 */
+		if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+			break;
 	case SPEC_STORE_BYPASS_CMD_ON:
 		mode = SPEC_STORE_BYPASS_DISABLE;
 		break;
@@ -468,6 +489,7 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void)
 			x86_spec_ctrl_set(SPEC_CTRL_RDS);
 			break;
 		case X86_VENDOR_AMD:
+			x86_amd_rds_enable();
 			break;
 		}
 	}
@@ -489,6 +511,9 @@ void x86_spec_ctrl_setup_ap(void)
 {
 	if (boot_cpu_has(X86_FEATURE_IBRS))
 		x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask);
+
+	if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
+		x86_amd_rds_enable();
 }
 
 #ifdef CONFIG_SYSFS
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 2f1d403cb0ea..7405c8653f7a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -851,6 +851,10 @@ static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = {
 	{ X86_VENDOR_CENTAUR,	5,					},
 	{ X86_VENDOR_INTEL,	5,					},
 	{ X86_VENDOR_NSC,	5,					},
+	{ X86_VENDOR_AMD,	0x12,					},
+	{ X86_VENDOR_AMD,	0x11,					},
+	{ X86_VENDOR_AMD,	0x10,					},
+	{ X86_VENDOR_AMD,	0xf,					},
 	{ X86_VENDOR_ANY,	4,					},
 	{}
 };

From 49d8e36618f7524611409b8608dd54d399e7097f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 14 Jul 2018 02:35:20 -0700
Subject: [PATCH 061/519] x86/speculation: Create spec-ctrl.h to avoid include
 hell

commit 28a2775217b17208811fa43a9e96bd1fdf417b86 upstream

Having everything in nospec-branch.h creates a hell of dependencies when
adding the prctl based switching mechanism. Move everything which is not
required in nospec-branch.h to spec-ctrl.h and fix up the includes in the
relevant files.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/nospec-branch.h | 14 --------------
 arch/x86/include/asm/spec-ctrl.h     | 21 +++++++++++++++++++++
 arch/x86/kernel/cpu/amd.c            |  2 +-
 arch/x86/kernel/cpu/bugs.c           |  2 +-
 arch/x86/kvm/svm.c                   |  2 +-
 arch/x86/kvm/vmx.c                   |  2 +-
 6 files changed, 25 insertions(+), 18 deletions(-)
 create mode 100644 arch/x86/include/asm/spec-ctrl.h

diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index ac2fdc9666e4..47c454c29535 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -183,26 +183,12 @@ enum spectre_v2_mitigation {
 extern void x86_spec_ctrl_set(u64);
 extern u64 x86_spec_ctrl_get_default(void);
 
-/*
- * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR
- * the guest has, while on VMEXIT we restore the host view. This
- * would be easier if SPEC_CTRL were architecturally maskable or
- * shadowable for guests but this is not (currently) the case.
- * Takes the guest view of SPEC_CTRL MSR as a parameter.
- */
-extern void x86_spec_ctrl_set_guest(u64);
-extern void x86_spec_ctrl_restore_host(u64);
-
 /* The Speculative Store Bypass disable variants */
 enum ssb_mitigation {
 	SPEC_STORE_BYPASS_NONE,
 	SPEC_STORE_BYPASS_DISABLE,
 };
 
-/* AMD specific Speculative Store Bypass MSR data */
-extern u64 x86_amd_ls_cfg_base;
-extern u64 x86_amd_ls_cfg_rds_mask;
-
 extern char __indirect_thunk_start[];
 extern char __indirect_thunk_end[];
 
diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
new file mode 100644
index 000000000000..3ad64420a06e
--- /dev/null
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SPECCTRL_H_
+#define _ASM_X86_SPECCTRL_H_
+
+#include <asm/nospec-branch.h>
+
+/*
+ * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR
+ * the guest has, while on VMEXIT we restore the host view. This
+ * would be easier if SPEC_CTRL were architecturally maskable or
+ * shadowable for guests but this is not (currently) the case.
+ * Takes the guest view of SPEC_CTRL MSR as a parameter.
+ */
+extern void x86_spec_ctrl_set_guest(u64);
+extern void x86_spec_ctrl_restore_host(u64);
+
+/* AMD specific Speculative Store Bypass MSR data */
+extern u64 x86_amd_ls_cfg_base;
+extern u64 x86_amd_ls_cfg_rds_mask;
+
+#endif
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 4452f387ed32..14e984916f2f 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -9,7 +9,7 @@
 #include <asm/processor.h>
 #include <asm/apic.h>
 #include <asm/cpu.h>
-#include <asm/nospec-branch.h>
+#include <asm/spec-ctrl.h>
 #include <asm/smp.h>
 #include <asm/pci-direct.h>
 #include <asm/delay.h>
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index b8911afbd6b8..47a3cc08a889 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -12,7 +12,7 @@
 #include <linux/cpu.h>
 #include <linux/module.h>
 
-#include <asm/nospec-branch.h>
+#include <asm/spec-ctrl.h>
 #include <asm/cmdline.h>
 #include <asm/bugs.h>
 #include <asm/processor.h>
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 42654375b73f..df7827a981dd 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -37,7 +37,7 @@
 #include <asm/desc.h>
 #include <asm/debugreg.h>
 #include <asm/kvm_para.h>
-#include <asm/nospec-branch.h>
+#include <asm/spec-ctrl.h>
 
 #include <asm/virtext.h>
 #include "trace.h"
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 63c44a9bf6bb..18143886b186 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -48,7 +48,7 @@
 #include <asm/kexec.h>
 #include <asm/apic.h>
 #include <asm/irq_remapping.h>
-#include <asm/nospec-branch.h>
+#include <asm/spec-ctrl.h>
 
 #include "trace.h"
 #include "pmu.h"

From 13fa2c65c9a8c2cd5f2a9799891582c40b6f5cfa Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 14 Jul 2018 02:35:28 -0700
Subject: [PATCH 062/519] prctl: Add speculation control prctls

commit b617cfc858161140d69cc0b5cc211996b557a1c7 upstream

Add two new prctls to control aspects of speculation related vulnerabilites
and their mitigations to provide finer grained control over performance
impacting mitigations.

PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature
which is selected with arg2 of prctl(2). The return value uses bit 0-2 with
the following meaning:

Bit  Define           Description
0    PR_SPEC_PRCTL    Mitigation can be controlled per task by
                      PR_SET_SPECULATION_CTRL
1    PR_SPEC_ENABLE   The speculation feature is enabled, mitigation is
                      disabled
2    PR_SPEC_DISABLE  The speculation feature is disabled, mitigation is
                      enabled

If all bits are 0 the CPU is not affected by the speculation misfeature.

If PR_SPEC_PRCTL is set, then the per task control of the mitigation is
available. If not set, prctl(PR_SET_SPECULATION_CTRL) for the speculation
misfeature will fail.

PR_SET_SPECULATION_CTRL allows to control the speculation misfeature, which
is selected by arg2 of prctl(2) per task. arg3 is used to hand in the
control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE.

The common return values are:

EINVAL  prctl is not implemented by the architecture or the unused prctl()
        arguments are not 0
ENODEV  arg2 is selecting a not supported speculation misfeature

PR_SET_SPECULATION_CTRL has these additional return values:

ERANGE  arg3 is incorrect, i.e. it's not either PR_SPEC_ENABLE or PR_SPEC_DISABLE
ENXIO   prctl control of the selected speculation misfeature is disabled

The first supported controlable speculation misfeature is
PR_SPEC_STORE_BYPASS. Add the define so this can be shared between
architectures.

Based on an initial patch from Tim Chen and mostly rewritten.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/spec_ctrl.txt | 86 +++++++++++++++++++++++++++++++++++++
 include/linux/nospec.h      |  5 +++
 include/uapi/linux/prctl.h  | 11 +++++
 kernel/sys.c                | 20 +++++++++
 4 files changed, 122 insertions(+)
 create mode 100644 Documentation/spec_ctrl.txt

diff --git a/Documentation/spec_ctrl.txt b/Documentation/spec_ctrl.txt
new file mode 100644
index 000000000000..ddbebcd01208
--- /dev/null
+++ b/Documentation/spec_ctrl.txt
@@ -0,0 +1,86 @@
+===================
+Speculation Control
+===================
+
+Quite some CPUs have speculation related misfeatures which are in fact
+vulnerabilites causing data leaks in various forms even accross privilege
+domains.
+
+The kernel provides mitigation for such vulnerabilities in various
+forms. Some of these mitigations are compile time configurable and some on
+the kernel command line.
+
+There is also a class of mitigations which are very expensive, but they can
+be restricted to a certain set of processes or tasks in controlled
+environments. The mechanism to control these mitigations is via
+:manpage:`prctl(2)`.
+
+There are two prctl options which are related to this:
+
+ * PR_GET_SPECULATION_CTRL
+
+ * PR_SET_SPECULATION_CTRL
+
+PR_GET_SPECULATION_CTRL
+-----------------------
+
+PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature
+which is selected with arg2 of prctl(2). The return value uses bits 0-2 with
+the following meaning:
+
+==== ================ ===================================================
+Bit  Define           Description
+==== ================ ===================================================
+0    PR_SPEC_PRCTL    Mitigation can be controlled per task by
+                      PR_SET_SPECULATION_CTRL
+1    PR_SPEC_ENABLE   The speculation feature is enabled, mitigation is
+                      disabled
+2    PR_SPEC_DISABLE  The speculation feature is disabled, mitigation is
+                      enabled
+==== ================ ===================================================
+
+If all bits are 0 the CPU is not affected by the speculation misfeature.
+
+If PR_SPEC_PRCTL is set, then the per task control of the mitigation is
+available. If not set, prctl(PR_SET_SPECULATION_CTRL) for the speculation
+misfeature will fail.
+
+PR_SET_SPECULATION_CTRL
+-----------------------
+PR_SET_SPECULATION_CTRL allows to control the speculation misfeature, which
+is selected by arg2 of :manpage:`prctl(2)` per task. arg3 is used to hand
+in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE.
+
+Common error codes
+------------------
+======= =================================================================
+Value   Meaning
+======= =================================================================
+EINVAL  The prctl is not implemented by the architecture or unused
+        prctl(2) arguments are not 0
+
+ENODEV  arg2 is selecting a not supported speculation misfeature
+======= =================================================================
+
+PR_SET_SPECULATION_CTRL error codes
+-----------------------------------
+======= =================================================================
+Value   Meaning
+======= =================================================================
+0       Success
+
+ERANGE  arg3 is incorrect, i.e. it's neither PR_SPEC_ENABLE nor
+        PR_SPEC_DISABLE
+
+ENXIO   Control of the selected speculation misfeature is not possible.
+        See PR_GET_SPECULATION_CTRL.
+======= =================================================================
+
+Speculation misfeature controls
+-------------------------------
+- PR_SPEC_STORE_BYPASS: Speculative Store Bypass
+
+  Invocations:
+   * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, 0, 0, 0);
+   * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0);
+   * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0);
diff --git a/include/linux/nospec.h b/include/linux/nospec.h
index e791ebc65c9c..700bb8a4e4ea 100644
--- a/include/linux/nospec.h
+++ b/include/linux/nospec.h
@@ -55,4 +55,9 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
 									\
 	(typeof(_i)) (_i & _mask);					\
 })
+
+/* Speculation control prctl */
+int arch_prctl_spec_ctrl_get(unsigned long which);
+int arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl);
+
 #endif /* _LINUX_NOSPEC_H */
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index a8d0759a9e40..3b316be71c56 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -197,4 +197,15 @@ struct prctl_mm_map {
 # define PR_CAP_AMBIENT_LOWER		3
 # define PR_CAP_AMBIENT_CLEAR_ALL	4
 
+/* Per task speculation control */
+#define PR_GET_SPECULATION_CTRL		52
+#define PR_SET_SPECULATION_CTRL		53
+/* Speculation control variants */
+# define PR_SPEC_STORE_BYPASS		0
+/* Return and control values for PR_SET/GET_SPECULATION_CTRL */
+# define PR_SPEC_NOT_AFFECTED		0
+# define PR_SPEC_PRCTL			(1UL << 0)
+# define PR_SPEC_ENABLE			(1UL << 1)
+# define PR_SPEC_DISABLE		(1UL << 2)
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index 6624919ef0e7..d80c33f9aff7 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2075,6 +2075,16 @@ static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
 }
 #endif
 
+int __weak arch_prctl_spec_ctrl_get(unsigned long which)
+{
+	return -EINVAL;
+}
+
+int __weak arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl)
+{
+	return -EINVAL;
+}
+
 SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		unsigned long, arg4, unsigned long, arg5)
 {
@@ -2269,6 +2279,16 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 	case PR_GET_FP_MODE:
 		error = GET_FP_MODE(me);
 		break;
+	case PR_GET_SPECULATION_CTRL:
+		if (arg3 || arg4 || arg5)
+			return -EINVAL;
+		error = arch_prctl_spec_ctrl_get(arg2);
+		break;
+	case PR_SET_SPECULATION_CTRL:
+		if (arg4 || arg5)
+			return -EINVAL;
+		error = arch_prctl_spec_ctrl_set(arg2, arg3);
+		break;
 	default:
 		error = -EINVAL;
 		break;

From 1a529899c65aff1f4f2f4875e876457d0c2341c5 Mon Sep 17 00:00:00 2001
From: Kyle Huey <me@kylehuey.com>
Date: Sat, 14 Jul 2018 02:35:36 -0700
Subject: [PATCH 063/519] x86/process: Optimize TIF checks in
 __switch_to_xtra()

commit af8b3cd3934ec60f4c2a420d19a9d416554f140b upstream

Help the compiler to avoid reevaluating the thread flags for each checked
bit by reordering the bit checks and providing an explicit xor for
evaluation.

With default defconfigs for each arch,

x86_64: arch/x86/kernel/process.o
text       data     bss     dec     hex
3056       8577      16   11649    2d81	Before
3024	   8577      16	  11617	   2d61	After

i386: arch/x86/kernel/process.o
text       data     bss     dec     hex
2957	   8673	      8	  11638	   2d76	Before
2925	   8673       8	  11606	   2d56	After

Originally-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Kyle Huey <khuey@kylehuey.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Link: http://lkml.kernel.org/r/20170214081104.9244-2-khuey@kylehuey.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

[dwmw2: backported to make TIF_RDS handling simpler.
        No deferred TR reload.]
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/process.c | 66 ++++++++++++++++++++++-----------------
 1 file changed, 37 insertions(+), 29 deletions(-)

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 7c5c5dc90ffa..cc0f28878d09 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -188,48 +188,56 @@ int set_tsc_mode(unsigned int val)
 	return 0;
 }
 
-void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
-		      struct tss_struct *tss)
+static inline void switch_to_bitmap(struct tss_struct *tss,
+				    struct thread_struct *prev,
+				    struct thread_struct *next,
+				    unsigned long tifp, unsigned long tifn)
 {
-	struct thread_struct *prev, *next;
-
-	prev = &prev_p->thread;
-	next = &next_p->thread;
-
-	if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^
-	    test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) {
-		unsigned long debugctl = get_debugctlmsr();
-
-		debugctl &= ~DEBUGCTLMSR_BTF;
-		if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP))
-			debugctl |= DEBUGCTLMSR_BTF;
-
-		update_debugctlmsr(debugctl);
-	}
-
-	if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
-	    test_tsk_thread_flag(next_p, TIF_NOTSC)) {
-		/* prev and next are different */
-		if (test_tsk_thread_flag(next_p, TIF_NOTSC))
-			hard_disable_TSC();
-		else
-			hard_enable_TSC();
-	}
-
-	if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
+	if (tifn & _TIF_IO_BITMAP) {
 		/*
 		 * Copy the relevant range of the IO bitmap.
 		 * Normally this is 128 bytes or less:
 		 */
 		memcpy(tss->io_bitmap, next->io_bitmap_ptr,
 		       max(prev->io_bitmap_max, next->io_bitmap_max));
-	} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
+	} else if (tifp & _TIF_IO_BITMAP) {
 		/*
 		 * Clear any possible leftover bits:
 		 */
 		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
 	}
+}
+
+void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
+		      struct tss_struct *tss)
+{
+	struct thread_struct *prev, *next;
+	unsigned long tifp, tifn;
+
+	prev = &prev_p->thread;
+	next = &next_p->thread;
+
+	tifn = READ_ONCE(task_thread_info(next_p)->flags);
+	tifp = READ_ONCE(task_thread_info(prev_p)->flags);
+	switch_to_bitmap(tss, prev, next, tifp, tifn);
+
 	propagate_user_return_notify(prev_p, next_p);
+
+	if ((tifp ^ tifn) & _TIF_BLOCKSTEP) {
+		unsigned long debugctl = get_debugctlmsr();
+
+		debugctl &= ~DEBUGCTLMSR_BTF;
+		if (tifn & _TIF_BLOCKSTEP)
+			debugctl |= DEBUGCTLMSR_BTF;
+		update_debugctlmsr(debugctl);
+	}
+
+	if ((tifp ^ tifn) & _TIF_NOTSC) {
+		if (tifn & _TIF_NOTSC)
+			hard_disable_TSC();
+		else
+			hard_enable_TSC();
+	}
 }
 
 /*

From 5c5e95c4e50fbec4f101e057520a762662d6e7d7 Mon Sep 17 00:00:00 2001
From: Kyle Huey <me@kylehuey.com>
Date: Sat, 14 Jul 2018 02:35:44 -0700
Subject: [PATCH 064/519] x86/process: Correct and optimize TIF_BLOCKSTEP
 switch

commit b9894a2f5bd18b1691cb6872c9afe32b148d0132 upstream

The debug control MSR is "highly magical" as the blockstep bit can be
cleared by hardware under not well documented circumstances.

So a task switch relying on the bit set by the previous task (according to
the previous tasks thread flags) can trip over this and not update the flag
for the next task.

To fix this its required to handle DEBUGCTLMSR_BTF when either the previous
or the next or both tasks have the TIF_BLOCKSTEP flag set.

While at it avoid branching within the TIF_BLOCKSTEP case and evaluating
boot_cpu_data twice in kernels without CONFIG_X86_DEBUGCTLMSR.

x86_64: arch/x86/kernel/process.o
text	data	bss	dec	 hex
3024    8577    16      11617    2d61	Before
3008	8577	16	11601	 2d51	After

i386: No change

[ tglx: Made the shift value explicit, use a local variable to make the
code readable and massaged changelog]

Originally-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Kyle Huey <khuey@kylehuey.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Link: http://lkml.kernel.org/r/20170214081104.9244-3-khuey@kylehuey.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/msr-index.h |  1 +
 arch/x86/kernel/process.c        | 12 +++++++-----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index a29edb723431..71a2c84013b3 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -150,6 +150,7 @@
 
 /* DEBUGCTLMSR bits (others vary by model): */
 #define DEBUGCTLMSR_LBR			(1UL <<  0) /* last branch recording */
+#define DEBUGCTLMSR_BTF_SHIFT		1
 #define DEBUGCTLMSR_BTF			(1UL <<  1) /* single-step on branches */
 #define DEBUGCTLMSR_TR			(1UL <<  6)
 #define DEBUGCTLMSR_BTS			(1UL <<  7)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index cc0f28878d09..166aef38ec98 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -223,13 +223,15 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 
 	propagate_user_return_notify(prev_p, next_p);
 
-	if ((tifp ^ tifn) & _TIF_BLOCKSTEP) {
-		unsigned long debugctl = get_debugctlmsr();
+	if ((tifp & _TIF_BLOCKSTEP || tifn & _TIF_BLOCKSTEP) &&
+	    arch_has_block_step()) {
+		unsigned long debugctl, msk;
 
+		rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
 		debugctl &= ~DEBUGCTLMSR_BTF;
-		if (tifn & _TIF_BLOCKSTEP)
-			debugctl |= DEBUGCTLMSR_BTF;
-		update_debugctlmsr(debugctl);
+		msk = tifn & _TIF_BLOCKSTEP;
+		debugctl |= (msk >> TIF_BLOCKSTEP) << DEBUGCTLMSR_BTF_SHIFT;
+		wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
 	}
 
 	if ((tifp ^ tifn) & _TIF_NOTSC) {

From a1cb23a5e2ea4ed75d3ac37c6a0739c5435406ff Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 14 Jul 2018 02:35:52 -0700
Subject: [PATCH 065/519] x86/process: Optimize TIF_NOTSC switch

commit 5a920155e388ec22a22e0532fb695b9215c9b34d upstream

Provide and use a toggle helper instead of doing it with a branch.

x86_64: arch/x86/kernel/process.o
text	   data	    bss	    dec	    hex
3008	   8577	     16	  11601	   2d51 Before
2976       8577      16	  11569	   2d31 After

i386: arch/x86/kernel/process.o
text	   data	    bss	    dec	    hex
2925	   8673	      8	  11606	   2d56 Before
2893	   8673       8	  11574	   2d36 After

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Link: http://lkml.kernel.org/r/20170214081104.9244-4-khuey@kylehuey.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/tlbflush.h | 10 ++++++++++
 arch/x86/kernel/process.c       | 22 ++++------------------
 2 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 8ce07db77299..72cfe3e53af1 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -111,6 +111,16 @@ static inline void cr4_clear_bits(unsigned long mask)
 	}
 }
 
+static inline void cr4_toggle_bits(unsigned long mask)
+{
+	unsigned long cr4;
+
+	cr4 = this_cpu_read(cpu_tlbstate.cr4);
+	cr4 ^= mask;
+	this_cpu_write(cpu_tlbstate.cr4, cr4);
+	__write_cr4(cr4);
+}
+
 /* Read the CR4 shadow. */
 static inline unsigned long cr4_read_shadow(void)
 {
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 166aef38ec98..d112963902fe 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -130,11 +130,6 @@ void flush_thread(void)
 	fpu__clear(&tsk->thread.fpu);
 }
 
-static void hard_disable_TSC(void)
-{
-	cr4_set_bits(X86_CR4_TSD);
-}
-
 void disable_TSC(void)
 {
 	preempt_disable();
@@ -143,15 +138,10 @@ void disable_TSC(void)
 		 * Must flip the CPU state synchronously with
 		 * TIF_NOTSC in the current running context.
 		 */
-		hard_disable_TSC();
+		cr4_set_bits(X86_CR4_TSD);
 	preempt_enable();
 }
 
-static void hard_enable_TSC(void)
-{
-	cr4_clear_bits(X86_CR4_TSD);
-}
-
 static void enable_TSC(void)
 {
 	preempt_disable();
@@ -160,7 +150,7 @@ static void enable_TSC(void)
 		 * Must flip the CPU state synchronously with
 		 * TIF_NOTSC in the current running context.
 		 */
-		hard_enable_TSC();
+		cr4_clear_bits(X86_CR4_TSD);
 	preempt_enable();
 }
 
@@ -234,12 +224,8 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 		wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
 	}
 
-	if ((tifp ^ tifn) & _TIF_NOTSC) {
-		if (tifn & _TIF_NOTSC)
-			hard_disable_TSC();
-		else
-			hard_enable_TSC();
-	}
+	if ((tifp ^ tifn) & _TIF_NOTSC)
+		cr4_toggle_bits(X86_CR4_TSD);
 }
 
 /*

From b04a020d0745a7ba18800e86ea678676aeb21278 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 14 Jul 2018 02:36:00 -0700
Subject: [PATCH 066/519] x86/process: Allow runtime control of Speculative
 Store Bypass

commit 885f82bfbc6fefb6664ea27965c3ab9ac4194b8c upstream

The Speculative Store Bypass vulnerability can be mitigated with the
Reduced Data Speculation (RDS) feature. To allow finer grained control of
this eventually expensive mitigation a per task mitigation control is
required.

Add a new TIF_RDS flag and put it into the group of TIF flags which are
evaluated for mismatch in switch_to(). If these bits differ in the previous
and the next task, then the slow path function __switch_to_xtra() is
invoked. Implement the TIF_RDS dependent mitigation control in the slow
path.

If the prctl for controlling Speculative Store Bypass is disabled or no
task uses the prctl then there is no overhead in the switch_to() fast
path.

Update the KVM related speculation control functions to take TID_RDS into
account as well.

Based on a patch from Tim Chen. Completely rewritten.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/msr-index.h   |  3 ++-
 arch/x86/include/asm/spec-ctrl.h   | 17 +++++++++++++++++
 arch/x86/include/asm/thread_info.h |  6 ++++--
 arch/x86/kernel/cpu/bugs.c         | 26 +++++++++++++++++++++-----
 arch/x86/kernel/process.c          | 22 ++++++++++++++++++++++
 5 files changed, 66 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 71a2c84013b3..883cf0d6b1f9 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -35,7 +35,8 @@
 #define MSR_IA32_SPEC_CTRL		0x00000048 /* Speculation Control */
 #define SPEC_CTRL_IBRS			(1 << 0)   /* Indirect Branch Restricted Speculation */
 #define SPEC_CTRL_STIBP			(1 << 1)   /* Single Thread Indirect Branch Predictors */
-#define SPEC_CTRL_RDS			(1 << 2)   /* Reduced Data Speculation */
+#define SPEC_CTRL_RDS_SHIFT		2	   /* Reduced Data Speculation bit */
+#define SPEC_CTRL_RDS			(1 << SPEC_CTRL_RDS_SHIFT)   /* Reduced Data Speculation */
 
 #define MSR_IA32_PRED_CMD		0x00000049 /* Prediction Command */
 #define PRED_CMD_IBPB			(1 << 0)   /* Indirect Branch Prediction Barrier */
diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
index 3ad64420a06e..45ef00ad5105 100644
--- a/arch/x86/include/asm/spec-ctrl.h
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -2,6 +2,7 @@
 #ifndef _ASM_X86_SPECCTRL_H_
 #define _ASM_X86_SPECCTRL_H_
 
+#include <linux/thread_info.h>
 #include <asm/nospec-branch.h>
 
 /*
@@ -18,4 +19,20 @@ extern void x86_spec_ctrl_restore_host(u64);
 extern u64 x86_amd_ls_cfg_base;
 extern u64 x86_amd_ls_cfg_rds_mask;
 
+/* The Intel SPEC CTRL MSR base value cache */
+extern u64 x86_spec_ctrl_base;
+
+static inline u64 rds_tif_to_spec_ctrl(u64 tifn)
+{
+	BUILD_BUG_ON(TIF_RDS < SPEC_CTRL_RDS_SHIFT);
+	return (tifn & _TIF_RDS) >> (TIF_RDS - SPEC_CTRL_RDS_SHIFT);
+}
+
+static inline u64 rds_tif_to_amd_ls_cfg(u64 tifn)
+{
+	return (tifn & _TIF_RDS) ? x86_amd_ls_cfg_rds_mask : 0ULL;
+}
+
+extern void speculative_store_bypass_update(void);
+
 #endif
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 18c9aaa8c043..36a49b4ba4b5 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -92,6 +92,7 @@ struct thread_info {
 #define TIF_SIGPENDING		2	/* signal pending */
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
 #define TIF_SINGLESTEP		4	/* reenable singlestep on user return*/
+#define TIF_RDS			5	/* Reduced data speculation */
 #define TIF_SYSCALL_EMU		6	/* syscall emulation active */
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_SECCOMP		8	/* secure computing */
@@ -114,8 +115,9 @@ struct thread_info {
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
-#define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
+#define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
+#define _TIF_RDS		(1 << TIF_RDS)
 #define _TIF_SYSCALL_EMU	(1 << TIF_SYSCALL_EMU)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
@@ -147,7 +149,7 @@ struct thread_info {
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW							\
-	(_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
+	(_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_RDS)
 
 #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 47a3cc08a889..0f8303ec0532 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -32,7 +32,7 @@ static void __init ssb_select_mitigation(void);
  * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any
  * writes to SPEC_CTRL contain whatever reserved bits have been set.
  */
-static u64 x86_spec_ctrl_base;
+u64 x86_spec_ctrl_base;
 
 /*
  * The vendor and possibly platform specific bits which can be modified in
@@ -139,25 +139,41 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_set);
 
 u64 x86_spec_ctrl_get_default(void)
 {
-	return x86_spec_ctrl_base;
+	u64 msrval = x86_spec_ctrl_base;
+
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+		msrval |= rds_tif_to_spec_ctrl(current_thread_info()->flags);
+	return msrval;
 }
 EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default);
 
 void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl)
 {
+	u64 host = x86_spec_ctrl_base;
+
 	if (!boot_cpu_has(X86_FEATURE_IBRS))
 		return;
-	if (x86_spec_ctrl_base != guest_spec_ctrl)
+
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+		host |= rds_tif_to_spec_ctrl(current_thread_info()->flags);
+
+	if (host != guest_spec_ctrl)
 		wrmsrl(MSR_IA32_SPEC_CTRL, guest_spec_ctrl);
 }
 EXPORT_SYMBOL_GPL(x86_spec_ctrl_set_guest);
 
 void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl)
 {
+	u64 host = x86_spec_ctrl_base;
+
 	if (!boot_cpu_has(X86_FEATURE_IBRS))
 		return;
-	if (x86_spec_ctrl_base != guest_spec_ctrl)
-		wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+		host |= rds_tif_to_spec_ctrl(current_thread_info()->flags);
+
+	if (host != guest_spec_ctrl)
+		wrmsrl(MSR_IA32_SPEC_CTRL, host);
 }
 EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host);
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index d112963902fe..9689e92e72dd 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -31,6 +31,7 @@
 #include <asm/tlbflush.h>
 #include <asm/mce.h>
 #include <asm/vm86.h>
+#include <asm/spec-ctrl.h>
 
 /*
  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -198,6 +199,24 @@ static inline void switch_to_bitmap(struct tss_struct *tss,
 	}
 }
 
+static __always_inline void __speculative_store_bypass_update(unsigned long tifn)
+{
+	u64 msr;
+
+	if (static_cpu_has(X86_FEATURE_AMD_RDS)) {
+		msr = x86_amd_ls_cfg_base | rds_tif_to_amd_ls_cfg(tifn);
+		wrmsrl(MSR_AMD64_LS_CFG, msr);
+	} else {
+		msr = x86_spec_ctrl_base | rds_tif_to_spec_ctrl(tifn);
+		wrmsrl(MSR_IA32_SPEC_CTRL, msr);
+	}
+}
+
+void speculative_store_bypass_update(void)
+{
+	__speculative_store_bypass_update(current_thread_info()->flags);
+}
+
 void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 		      struct tss_struct *tss)
 {
@@ -226,6 +245,9 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 
 	if ((tifp ^ tifn) & _TIF_NOTSC)
 		cr4_toggle_bits(X86_CR4_TSD);
+
+	if ((tifp ^ tifn) & _TIF_RDS)
+		__speculative_store_bypass_update(tifn);
 }
 
 /*

From 2cb00ce1273d48dafce848f4e0ea353eb5839475 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 14 Jul 2018 02:36:09 -0700
Subject: [PATCH 067/519] x86/speculation: Add prctl for Speculative Store
 Bypass mitigation

commit a73ec77ee17ec556fe7f165d00314cb7c047b1ac upstream

Add prctl based control for Speculative Store Bypass mitigation and make it
the default mitigation for Intel and AMD.

Andi Kleen provided the following rationale (slightly redacted):

 There are multiple levels of impact of Speculative Store Bypass:

 1) JITed sandbox.
    It cannot invoke system calls, but can do PRIME+PROBE and may have call
    interfaces to other code

 2) Native code process.
    No protection inside the process at this level.

 3) Kernel.

 4) Between processes.

 The prctl tries to protect against case (1) doing attacks.

 If the untrusted code can do random system calls then control is already
 lost in a much worse way. So there needs to be system call protection in
 some way (using a JIT not allowing them or seccomp). Or rather if the
 process can subvert its environment somehow to do the prctl it can already
 execute arbitrary code, which is much worse than SSB.

 To put it differently, the point of the prctl is to not allow JITed code
 to read data it shouldn't read from its JITed sandbox. If it already has
 escaped its sandbox then it can already read everything it wants in its
 address space, and do much worse.

 The ability to control Speculative Store Bypass allows to enable the
 protection selectively without affecting overall system performance.

Based on an initial patch from Tim Chen. Completely rewritten.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/kernel-parameters.txt  |  6 +-
 arch/x86/include/asm/nospec-branch.h |  1 +
 arch/x86/kernel/cpu/bugs.c           | 83 ++++++++++++++++++++++++----
 3 files changed, 79 insertions(+), 11 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index dc138b8d9ecb..80202debbdbe 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3651,7 +3651,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			off    - Unconditionally enable Speculative Store Bypass
 			auto   - Kernel detects whether the CPU model contains an
 				 implementation of Speculative Store Bypass and
-				 picks the most appropriate mitigation
+				 picks the most appropriate mitigation.
+			prctl  - Control Speculative Store Bypass per thread
+				 via prctl. Speculative Store Bypass is enabled
+				 for a process by default. The state of the control
+				 is inherited on fork.
 
 			Not specifying this option is equivalent to
 			spec_store_bypass_disable=auto.
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 47c454c29535..155d955ab801 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -187,6 +187,7 @@ extern u64 x86_spec_ctrl_get_default(void);
 enum ssb_mitigation {
 	SPEC_STORE_BYPASS_NONE,
 	SPEC_STORE_BYPASS_DISABLE,
+	SPEC_STORE_BYPASS_PRCTL,
 };
 
 extern char __indirect_thunk_start[];
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 0f8303ec0532..bcfccd3d6542 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -11,6 +11,8 @@
 #include <linux/utsname.h>
 #include <linux/cpu.h>
 #include <linux/module.h>
+#include <linux/nospec.h>
+#include <linux/prctl.h>
 
 #include <asm/spec-ctrl.h>
 #include <asm/cmdline.h>
@@ -411,20 +413,23 @@ enum ssb_mitigation_cmd {
 	SPEC_STORE_BYPASS_CMD_NONE,
 	SPEC_STORE_BYPASS_CMD_AUTO,
 	SPEC_STORE_BYPASS_CMD_ON,
+	SPEC_STORE_BYPASS_CMD_PRCTL,
 };
 
 static const char *ssb_strings[] = {
 	[SPEC_STORE_BYPASS_NONE]	= "Vulnerable",
-	[SPEC_STORE_BYPASS_DISABLE]	= "Mitigation: Speculative Store Bypass disabled"
+	[SPEC_STORE_BYPASS_DISABLE]	= "Mitigation: Speculative Store Bypass disabled",
+	[SPEC_STORE_BYPASS_PRCTL]	= "Mitigation: Speculative Store Bypass disabled via prctl"
 };
 
 static const struct {
 	const char *option;
 	enum ssb_mitigation_cmd cmd;
 } ssb_mitigation_options[] = {
-	{ "auto",	SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */
-	{ "on",		SPEC_STORE_BYPASS_CMD_ON },   /* Disable Speculative Store Bypass */
-	{ "off",	SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */
+	{ "auto",	SPEC_STORE_BYPASS_CMD_AUTO },  /* Platform decides */
+	{ "on",		SPEC_STORE_BYPASS_CMD_ON },    /* Disable Speculative Store Bypass */
+	{ "off",	SPEC_STORE_BYPASS_CMD_NONE },  /* Don't touch Speculative Store Bypass */
+	{ "prctl",	SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */
 };
 
 static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void)
@@ -474,14 +479,15 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void)
 
 	switch (cmd) {
 	case SPEC_STORE_BYPASS_CMD_AUTO:
-		/*
-		 * AMD platforms by default don't need SSB mitigation.
-		 */
-		if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
-			break;
+		/* Choose prctl as the default mode */
+		mode = SPEC_STORE_BYPASS_PRCTL;
+		break;
 	case SPEC_STORE_BYPASS_CMD_ON:
 		mode = SPEC_STORE_BYPASS_DISABLE;
 		break;
+	case SPEC_STORE_BYPASS_CMD_PRCTL:
+		mode = SPEC_STORE_BYPASS_PRCTL;
+		break;
 	case SPEC_STORE_BYPASS_CMD_NONE:
 		break;
 	}
@@ -492,7 +498,7 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void)
 	 *  - X86_FEATURE_RDS - CPU is able to turn off speculative store bypass
 	 *  - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation
 	 */
-	if (mode != SPEC_STORE_BYPASS_NONE) {
+	if (mode == SPEC_STORE_BYPASS_DISABLE) {
 		setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE);
 		/*
 		 * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses
@@ -523,6 +529,63 @@ static void ssb_select_mitigation()
 
 #undef pr_fmt
 
+static int ssb_prctl_set(unsigned long ctrl)
+{
+	bool rds = !!test_tsk_thread_flag(current, TIF_RDS);
+
+	if (ssb_mode != SPEC_STORE_BYPASS_PRCTL)
+		return -ENXIO;
+
+	if (ctrl == PR_SPEC_ENABLE)
+		clear_tsk_thread_flag(current, TIF_RDS);
+	else
+		set_tsk_thread_flag(current, TIF_RDS);
+
+	if (rds != !!test_tsk_thread_flag(current, TIF_RDS))
+		speculative_store_bypass_update();
+
+	return 0;
+}
+
+static int ssb_prctl_get(void)
+{
+	switch (ssb_mode) {
+	case SPEC_STORE_BYPASS_DISABLE:
+		return PR_SPEC_DISABLE;
+	case SPEC_STORE_BYPASS_PRCTL:
+		if (test_tsk_thread_flag(current, TIF_RDS))
+			return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
+		return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
+	default:
+		if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
+			return PR_SPEC_ENABLE;
+		return PR_SPEC_NOT_AFFECTED;
+	}
+}
+
+int arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl)
+{
+	if (ctrl != PR_SPEC_ENABLE && ctrl != PR_SPEC_DISABLE)
+		return -ERANGE;
+
+	switch (which) {
+	case PR_SPEC_STORE_BYPASS:
+		return ssb_prctl_set(ctrl);
+	default:
+		return -ENODEV;
+	}
+}
+
+int arch_prctl_spec_ctrl_get(unsigned long which)
+{
+	switch (which) {
+	case PR_SPEC_STORE_BYPASS:
+		return ssb_prctl_get();
+	default:
+		return -ENODEV;
+	}
+}
+
 void x86_spec_ctrl_setup_ap(void)
 {
 	if (boot_cpu_has(X86_FEATURE_IBRS))

From b6f4a6285d7979b45d629e65c880279930b98ef1 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sat, 14 Jul 2018 02:36:17 -0700
Subject: [PATCH 068/519] nospec: Allow getting/setting on non-current task

commit 7bbf1373e228840bb0295a2ca26d548ef37f448e upstream

Adjust arch_prctl_get/set_spec_ctrl() to operate on tasks other than
current.

This is needed both for /proc/$pid/status queries and for seccomp (since
thread-syncing can trigger seccomp in non-current threads).

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/bugs.c | 27 ++++++++++++++++-----------
 include/linux/nospec.h     |  7 +++++--
 kernel/sys.c               |  9 +++++----
 3 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index bcfccd3d6542..64b54a4c30f5 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -529,31 +529,35 @@ static void ssb_select_mitigation()
 
 #undef pr_fmt
 
-static int ssb_prctl_set(unsigned long ctrl)
+static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
 {
-	bool rds = !!test_tsk_thread_flag(current, TIF_RDS);
+	bool rds = !!test_tsk_thread_flag(task, TIF_RDS);
 
 	if (ssb_mode != SPEC_STORE_BYPASS_PRCTL)
 		return -ENXIO;
 
 	if (ctrl == PR_SPEC_ENABLE)
-		clear_tsk_thread_flag(current, TIF_RDS);
+		clear_tsk_thread_flag(task, TIF_RDS);
 	else
-		set_tsk_thread_flag(current, TIF_RDS);
+		set_tsk_thread_flag(task, TIF_RDS);
 
-	if (rds != !!test_tsk_thread_flag(current, TIF_RDS))
+	/*
+	 * If being set on non-current task, delay setting the CPU
+	 * mitigation until it is next scheduled.
+	 */
+	if (task == current && rds != !!test_tsk_thread_flag(task, TIF_RDS))
 		speculative_store_bypass_update();
 
 	return 0;
 }
 
-static int ssb_prctl_get(void)
+static int ssb_prctl_get(struct task_struct *task)
 {
 	switch (ssb_mode) {
 	case SPEC_STORE_BYPASS_DISABLE:
 		return PR_SPEC_DISABLE;
 	case SPEC_STORE_BYPASS_PRCTL:
-		if (test_tsk_thread_flag(current, TIF_RDS))
+		if (test_tsk_thread_flag(task, TIF_RDS))
 			return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
 		return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
 	default:
@@ -563,24 +567,25 @@ static int ssb_prctl_get(void)
 	}
 }
 
-int arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl)
+int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
+			     unsigned long ctrl)
 {
 	if (ctrl != PR_SPEC_ENABLE && ctrl != PR_SPEC_DISABLE)
 		return -ERANGE;
 
 	switch (which) {
 	case PR_SPEC_STORE_BYPASS:
-		return ssb_prctl_set(ctrl);
+		return ssb_prctl_set(task, ctrl);
 	default:
 		return -ENODEV;
 	}
 }
 
-int arch_prctl_spec_ctrl_get(unsigned long which)
+int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
 {
 	switch (which) {
 	case PR_SPEC_STORE_BYPASS:
-		return ssb_prctl_get();
+		return ssb_prctl_get(task);
 	default:
 		return -ENODEV;
 	}
diff --git a/include/linux/nospec.h b/include/linux/nospec.h
index 700bb8a4e4ea..a908c954484d 100644
--- a/include/linux/nospec.h
+++ b/include/linux/nospec.h
@@ -7,6 +7,8 @@
 #define _LINUX_NOSPEC_H
 #include <asm/barrier.h>
 
+struct task_struct;
+
 /**
  * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise
  * @index: array element index
@@ -57,7 +59,8 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
 })
 
 /* Speculation control prctl */
-int arch_prctl_spec_ctrl_get(unsigned long which);
-int arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl);
+int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which);
+int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
+			     unsigned long ctrl);
 
 #endif /* _LINUX_NOSPEC_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index d80c33f9aff7..f718742e55e6 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2075,12 +2075,13 @@ static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
 }
 #endif
 
-int __weak arch_prctl_spec_ctrl_get(unsigned long which)
+int __weak arch_prctl_spec_ctrl_get(struct task_struct *t, unsigned long which)
 {
 	return -EINVAL;
 }
 
-int __weak arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl)
+int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which,
+				    unsigned long ctrl)
 {
 	return -EINVAL;
 }
@@ -2282,12 +2283,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 	case PR_GET_SPECULATION_CTRL:
 		if (arg3 || arg4 || arg5)
 			return -EINVAL;
-		error = arch_prctl_spec_ctrl_get(arg2);
+		error = arch_prctl_spec_ctrl_get(me, arg2);
 		break;
 	case PR_SET_SPECULATION_CTRL:
 		if (arg4 || arg5)
 			return -EINVAL;
-		error = arch_prctl_spec_ctrl_set(arg2, arg3);
+		error = arch_prctl_spec_ctrl_set(me, arg2, arg3);
 		break;
 	default:
 		error = -EINVAL;

From 484964fa3e5a0d8467891aab8368dab34e8eb13c Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sat, 14 Jul 2018 02:36:25 -0700
Subject: [PATCH 069/519] proc: Provide details on speculation flaw mitigations

commit fae1fa0fc6cca8beee3ab8ed71d54f9a78fa3f64 upstream

As done with seccomp and no_new_privs, also show speculation flaw
mitigation state in /proc/$pid/status.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/proc/array.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/fs/proc/array.c b/fs/proc/array.c
index b6c00ce0e29e..bb48358b99a3 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -79,6 +79,7 @@
 #include <linux/delayacct.h>
 #include <linux/seq_file.h>
 #include <linux/pid_namespace.h>
+#include <linux/prctl.h>
 #include <linux/ptrace.h>
 #include <linux/tracehook.h>
 #include <linux/string_helpers.h>
@@ -332,6 +333,28 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
 #ifdef CONFIG_SECCOMP
 	seq_printf(m, "Seccomp:\t%d\n", p->seccomp.mode);
 #endif
+	seq_printf(m, "\nSpeculation Store Bypass:\t");
+	switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) {
+	case -EINVAL:
+		seq_printf(m, "unknown");
+		break;
+	case PR_SPEC_NOT_AFFECTED:
+		seq_printf(m, "not vulnerable");
+		break;
+	case PR_SPEC_PRCTL | PR_SPEC_DISABLE:
+		seq_printf(m, "thread mitigated");
+		break;
+	case PR_SPEC_PRCTL | PR_SPEC_ENABLE:
+		seq_printf(m, "thread vulnerable");
+		break;
+	case PR_SPEC_DISABLE:
+		seq_printf(m, "globally mitigated");
+		break;
+	default:
+		seq_printf(m, "vulnerable");
+		break;
+	}
+	seq_putc(m, '\n');
 }
 
 static inline void task_context_switch_counts(struct seq_file *m,

From 0b1174054e0f4afd999c56ddecbbfb18f598f099 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sat, 14 Jul 2018 02:36:33 -0700
Subject: [PATCH 070/519] seccomp: Enable speculation flaw mitigations

commit 5c3070890d06ff82eecb808d02d2ca39169533ef upstream

When speculation flaw mitigations are opt-in (via prctl), using seccomp
will automatically opt-in to these protections, since using seccomp
indicates at least some level of sandboxing is desired.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/seccomp.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index efd384f3f852..bfb1ee845ba6 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -16,6 +16,8 @@
 #include <linux/atomic.h>
 #include <linux/audit.h>
 #include <linux/compat.h>
+#include <linux/nospec.h>
+#include <linux/prctl.h>
 #include <linux/sched.h>
 #include <linux/seccomp.h>
 #include <linux/slab.h>
@@ -214,6 +216,19 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
 	return true;
 }
 
+/*
+ * If a given speculation mitigation is opt-in (prctl()-controlled),
+ * select it, by disabling speculation (enabling mitigation).
+ */
+static inline void spec_mitigate(struct task_struct *task,
+				 unsigned long which)
+{
+	int state = arch_prctl_spec_ctrl_get(task, which);
+
+	if (state > 0 && (state & PR_SPEC_PRCTL))
+		arch_prctl_spec_ctrl_set(task, which, PR_SPEC_DISABLE);
+}
+
 static inline void seccomp_assign_mode(struct task_struct *task,
 				       unsigned long seccomp_mode)
 {
@@ -225,6 +240,8 @@ static inline void seccomp_assign_mode(struct task_struct *task,
 	 * filter) is set.
 	 */
 	smp_mb__before_atomic();
+	/* Assume seccomp processes want speculation flaw mitigation. */
+	spec_mitigate(task, PR_SPEC_STORE_BYPASS);
 	set_tsk_thread_flag(task, TIF_SECCOMP);
 }
 

From 3f9cb20f9126db1edb1fad78a0e94ff8e9ae94e2 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 14 Jul 2018 02:36:41 -0700
Subject: [PATCH 071/519] prctl: Add force disable speculation

commit 356e4bfff2c5489e016fdb925adbf12a1e3950ee upstream

For certain use cases it is desired to enforce mitigations so they cannot
be undone afterwards. That's important for loader stubs which want to
prevent a child from disabling the mitigation again. Will also be used for
seccomp(). The extra state preserving of the prctl state for SSB is a
preparatory step for EBPF dymanic speculation control.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/spec_ctrl.txt | 34 +++++++++++++++++++++-------------
 arch/x86/kernel/cpu/bugs.c  | 35 +++++++++++++++++++++++++----------
 fs/proc/array.c             |  3 +++
 include/linux/sched.h       |  9 +++++++++
 include/uapi/linux/prctl.h  |  1 +
 5 files changed, 59 insertions(+), 23 deletions(-)

diff --git a/Documentation/spec_ctrl.txt b/Documentation/spec_ctrl.txt
index ddbebcd01208..1b3690d30943 100644
--- a/Documentation/spec_ctrl.txt
+++ b/Documentation/spec_ctrl.txt
@@ -25,19 +25,21 @@ PR_GET_SPECULATION_CTRL
 -----------------------
 
 PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature
-which is selected with arg2 of prctl(2). The return value uses bits 0-2 with
+which is selected with arg2 of prctl(2). The return value uses bits 0-3 with
 the following meaning:
 
-==== ================ ===================================================
-Bit  Define           Description
-==== ================ ===================================================
-0    PR_SPEC_PRCTL    Mitigation can be controlled per task by
-                      PR_SET_SPECULATION_CTRL
-1    PR_SPEC_ENABLE   The speculation feature is enabled, mitigation is
-                      disabled
-2    PR_SPEC_DISABLE  The speculation feature is disabled, mitigation is
-                      enabled
-==== ================ ===================================================
+==== ===================== ===================================================
+Bit  Define                Description
+==== ===================== ===================================================
+0    PR_SPEC_PRCTL         Mitigation can be controlled per task by
+                           PR_SET_SPECULATION_CTRL
+1    PR_SPEC_ENABLE        The speculation feature is enabled, mitigation is
+                           disabled
+2    PR_SPEC_DISABLE       The speculation feature is disabled, mitigation is
+                           enabled
+3    PR_SPEC_FORCE_DISABLE Same as PR_SPEC_DISABLE, but cannot be undone. A
+                           subsequent prctl(..., PR_SPEC_ENABLE) will fail.
+==== ===================== ===================================================
 
 If all bits are 0 the CPU is not affected by the speculation misfeature.
 
@@ -47,9 +49,11 @@ misfeature will fail.
 
 PR_SET_SPECULATION_CTRL
 -----------------------
+
 PR_SET_SPECULATION_CTRL allows to control the speculation misfeature, which
 is selected by arg2 of :manpage:`prctl(2)` per task. arg3 is used to hand
-in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE.
+in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE or
+PR_SPEC_FORCE_DISABLE.
 
 Common error codes
 ------------------
@@ -70,10 +74,13 @@ Value   Meaning
 0       Success
 
 ERANGE  arg3 is incorrect, i.e. it's neither PR_SPEC_ENABLE nor
-        PR_SPEC_DISABLE
+        PR_SPEC_DISABLE nor PR_SPEC_FORCE_DISABLE
 
 ENXIO   Control of the selected speculation misfeature is not possible.
         See PR_GET_SPECULATION_CTRL.
+
+EPERM   Speculation was disabled with PR_SPEC_FORCE_DISABLE and caller
+        tried to enable it again.
 ======= =================================================================
 
 Speculation misfeature controls
@@ -84,3 +91,4 @@ Speculation misfeature controls
    * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, 0, 0, 0);
    * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0);
    * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0);
+   * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 64b54a4c30f5..d6897caa8f47 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -531,21 +531,37 @@ static void ssb_select_mitigation()
 
 static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
 {
-	bool rds = !!test_tsk_thread_flag(task, TIF_RDS);
+	bool update;
 
 	if (ssb_mode != SPEC_STORE_BYPASS_PRCTL)
 		return -ENXIO;
 
-	if (ctrl == PR_SPEC_ENABLE)
-		clear_tsk_thread_flag(task, TIF_RDS);
-	else
-		set_tsk_thread_flag(task, TIF_RDS);
+	switch (ctrl) {
+	case PR_SPEC_ENABLE:
+		/* If speculation is force disabled, enable is not allowed */
+		if (task_spec_ssb_force_disable(task))
+			return -EPERM;
+		task_clear_spec_ssb_disable(task);
+		update = test_and_clear_tsk_thread_flag(task, TIF_RDS);
+		break;
+	case PR_SPEC_DISABLE:
+		task_set_spec_ssb_disable(task);
+		update = !test_and_set_tsk_thread_flag(task, TIF_RDS);
+		break;
+	case PR_SPEC_FORCE_DISABLE:
+		task_set_spec_ssb_disable(task);
+		task_set_spec_ssb_force_disable(task);
+		update = !test_and_set_tsk_thread_flag(task, TIF_RDS);
+		break;
+	default:
+		return -ERANGE;
+	}
 
 	/*
 	 * If being set on non-current task, delay setting the CPU
 	 * mitigation until it is next scheduled.
 	 */
-	if (task == current && rds != !!test_tsk_thread_flag(task, TIF_RDS))
+	if (task == current && update)
 		speculative_store_bypass_update();
 
 	return 0;
@@ -557,7 +573,9 @@ static int ssb_prctl_get(struct task_struct *task)
 	case SPEC_STORE_BYPASS_DISABLE:
 		return PR_SPEC_DISABLE;
 	case SPEC_STORE_BYPASS_PRCTL:
-		if (test_tsk_thread_flag(task, TIF_RDS))
+		if (task_spec_ssb_force_disable(task))
+			return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
+		if (task_spec_ssb_disable(task))
 			return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
 		return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
 	default:
@@ -570,9 +588,6 @@ static int ssb_prctl_get(struct task_struct *task)
 int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
 			     unsigned long ctrl)
 {
-	if (ctrl != PR_SPEC_ENABLE && ctrl != PR_SPEC_DISABLE)
-		return -ERANGE;
-
 	switch (which) {
 	case PR_SPEC_STORE_BYPASS:
 		return ssb_prctl_set(task, ctrl);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index bb48358b99a3..31414787c971 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -341,6 +341,9 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
 	case PR_SPEC_NOT_AFFECTED:
 		seq_printf(m, "not vulnerable");
 		break;
+	case PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE:
+		seq_printf(m, "thread force mitigated");
+		break;
 	case PR_SPEC_PRCTL | PR_SPEC_DISABLE:
 		seq_printf(m, "thread mitigated");
 		break;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 90bea398e5e0..725498cc5d30 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2167,6 +2167,8 @@ static inline void memalloc_noio_restore(unsigned int flags)
 #define PFA_NO_NEW_PRIVS 0	/* May not gain new privileges. */
 #define PFA_SPREAD_PAGE  1      /* Spread page cache over cpuset */
 #define PFA_SPREAD_SLAB  2      /* Spread some slab caches over cpuset */
+#define PFA_SPEC_SSB_DISABLE		4	/* Speculative Store Bypass disabled */
+#define PFA_SPEC_SSB_FORCE_DISABLE	5	/* Speculative Store Bypass force disabled*/
 
 
 #define TASK_PFA_TEST(name, func)					\
@@ -2190,6 +2192,13 @@ TASK_PFA_TEST(SPREAD_SLAB, spread_slab)
 TASK_PFA_SET(SPREAD_SLAB, spread_slab)
 TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
 
+TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable)
+TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable)
+TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)
+
+TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
+TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
+
 /*
  * task->jobctl flags
  */
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 3b316be71c56..64776b72e1eb 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -207,5 +207,6 @@ struct prctl_mm_map {
 # define PR_SPEC_PRCTL			(1UL << 0)
 # define PR_SPEC_ENABLE			(1UL << 1)
 # define PR_SPEC_DISABLE		(1UL << 2)
+# define PR_SPEC_FORCE_DISABLE		(1UL << 3)
 
 #endif /* _LINUX_PRCTL_H */

From a08c3f484c34df1e3bec3c47818d570483bf67fa Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 14 Jul 2018 02:36:49 -0700
Subject: [PATCH 072/519] seccomp: Use PR_SPEC_FORCE_DISABLE

commit b849a812f7eb92e96d1c8239b06581b2cfd8b275 upstream

Use PR_SPEC_FORCE_DISABLE in seccomp() because seccomp does not allow to
widen restrictions.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/seccomp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index bfb1ee845ba6..f33539f31bc0 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -226,7 +226,7 @@ static inline void spec_mitigate(struct task_struct *task,
 	int state = arch_prctl_spec_ctrl_get(task, which);
 
 	if (state > 0 && (state & PR_SPEC_PRCTL))
-		arch_prctl_spec_ctrl_set(task, which, PR_SPEC_DISABLE);
+		arch_prctl_spec_ctrl_set(task, which, PR_SPEC_FORCE_DISABLE);
 }
 
 static inline void seccomp_assign_mode(struct task_struct *task,

From c463c0f037f2d83aea54415ed7c61deb0b90333b Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sat, 14 Jul 2018 02:36:57 -0700
Subject: [PATCH 073/519] seccomp: Add filter flag to opt-out of SSB mitigation

commit 00a02d0c502a06d15e07b857f8ff921e3e402675 upstream

If a seccomp user is not interested in Speculative Store Bypass mitigation
by default, it can set the new SECCOMP_FILTER_FLAG_SPEC_ALLOW flag when
adding filters.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/seccomp.h                       |  3 +-
 include/uapi/linux/seccomp.h                  |  4 +-
 kernel/seccomp.c                              | 19 +++--
 tools/testing/selftests/seccomp/seccomp_bpf.c | 78 ++++++++++++++++++-
 4 files changed, 93 insertions(+), 11 deletions(-)

diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 2296e6b2f690..5a53d34bba26 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -3,7 +3,8 @@
 
 #include <uapi/linux/seccomp.h>
 
-#define SECCOMP_FILTER_FLAG_MASK	(SECCOMP_FILTER_FLAG_TSYNC)
+#define SECCOMP_FILTER_FLAG_MASK	(SECCOMP_FILTER_FLAG_TSYNC	| \
+					 SECCOMP_FILTER_FLAG_SPEC_ALLOW)
 
 #ifdef CONFIG_SECCOMP
 
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 0f238a43ff1e..e4acb615792b 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -15,7 +15,9 @@
 #define SECCOMP_SET_MODE_FILTER	1
 
 /* Valid flags for SECCOMP_SET_MODE_FILTER */
-#define SECCOMP_FILTER_FLAG_TSYNC	1
+#define SECCOMP_FILTER_FLAG_TSYNC	(1UL << 0)
+/* In v4.14+ SECCOMP_FILTER_FLAG_LOG is (1UL << 1) */
+#define SECCOMP_FILTER_FLAG_SPEC_ALLOW	(1UL << 2)
 
 /*
  * All BPF programs must return a 32-bit value.
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index f33539f31bc0..4bb8a5a5d68a 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -230,7 +230,8 @@ static inline void spec_mitigate(struct task_struct *task,
 }
 
 static inline void seccomp_assign_mode(struct task_struct *task,
-				       unsigned long seccomp_mode)
+				       unsigned long seccomp_mode,
+				       unsigned long flags)
 {
 	assert_spin_locked(&task->sighand->siglock);
 
@@ -240,8 +241,9 @@ static inline void seccomp_assign_mode(struct task_struct *task,
 	 * filter) is set.
 	 */
 	smp_mb__before_atomic();
-	/* Assume seccomp processes want speculation flaw mitigation. */
-	spec_mitigate(task, PR_SPEC_STORE_BYPASS);
+	/* Assume default seccomp processes want spec flaw mitigation. */
+	if ((flags & SECCOMP_FILTER_FLAG_SPEC_ALLOW) == 0)
+		spec_mitigate(task, PR_SPEC_STORE_BYPASS);
 	set_tsk_thread_flag(task, TIF_SECCOMP);
 }
 
@@ -309,7 +311,7 @@ static inline pid_t seccomp_can_sync_threads(void)
  * without dropping the locks.
  *
  */
-static inline void seccomp_sync_threads(void)
+static inline void seccomp_sync_threads(unsigned long flags)
 {
 	struct task_struct *thread, *caller;
 
@@ -350,7 +352,8 @@ static inline void seccomp_sync_threads(void)
 		 * allow one thread to transition the other.
 		 */
 		if (thread->seccomp.mode == SECCOMP_MODE_DISABLED)
-			seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
+			seccomp_assign_mode(thread, SECCOMP_MODE_FILTER,
+					    flags);
 	}
 }
 
@@ -469,7 +472,7 @@ static long seccomp_attach_filter(unsigned int flags,
 
 	/* Now that the new filter is in place, synchronize to all threads. */
 	if (flags & SECCOMP_FILTER_FLAG_TSYNC)
-		seccomp_sync_threads();
+		seccomp_sync_threads(flags);
 
 	return 0;
 }
@@ -764,7 +767,7 @@ static long seccomp_set_mode_strict(void)
 #ifdef TIF_NOTSC
 	disable_TSC();
 #endif
-	seccomp_assign_mode(current, seccomp_mode);
+	seccomp_assign_mode(current, seccomp_mode, 0);
 	ret = 0;
 
 out:
@@ -822,7 +825,7 @@ static long seccomp_set_mode_filter(unsigned int flags,
 	/* Do not free the successfully attached filter. */
 	prepared = NULL;
 
-	seccomp_assign_mode(current, seccomp_mode);
+	seccomp_assign_mode(current, seccomp_mode, flags);
 out:
 	spin_unlock_irq(&current->sighand->siglock);
 	if (flags & SECCOMP_FILTER_FLAG_TSYNC)
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 29487e0437ad..b3f345433ec7 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -1477,7 +1477,11 @@ TEST_F(TRACE_syscall, syscall_dropped)
 #endif
 
 #ifndef SECCOMP_FILTER_FLAG_TSYNC
-#define SECCOMP_FILTER_FLAG_TSYNC 1
+#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
+#endif
+
+#ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW
+#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
 #endif
 
 #ifndef seccomp
@@ -1576,6 +1580,78 @@ TEST(seccomp_syscall_mode_lock)
 	}
 }
 
+/*
+ * Test detection of known and unknown filter flags. Userspace needs to be able
+ * to check if a filter flag is supported by the current kernel and a good way
+ * of doing that is by attempting to enter filter mode, with the flag bit in
+ * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates
+ * that the flag is valid and EINVAL indicates that the flag is invalid.
+ */
+TEST(detect_seccomp_filter_flags)
+{
+	unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
+				 SECCOMP_FILTER_FLAG_SPEC_ALLOW };
+	unsigned int flag, all_flags;
+	int i;
+	long ret;
+
+	/* Test detection of known-good filter flags */
+	for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
+		int bits = 0;
+
+		flag = flags[i];
+		/* Make sure the flag is a single bit! */
+		while (flag) {
+			if (flag & 0x1)
+				bits ++;
+			flag >>= 1;
+		}
+		ASSERT_EQ(1, bits);
+		flag = flags[i];
+
+		ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
+		ASSERT_NE(ENOSYS, errno) {
+			TH_LOG("Kernel does not support seccomp syscall!");
+		}
+		EXPECT_EQ(-1, ret);
+		EXPECT_EQ(EFAULT, errno) {
+			TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
+			       flag);
+		}
+
+		all_flags |= flag;
+	}
+
+	/* Test detection of all known-good filter flags */
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, all_flags, NULL);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EFAULT, errno) {
+		TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
+		       all_flags);
+	}
+
+	/* Test detection of an unknown filter flag */
+	flag = -1;
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!",
+		       flag);
+	}
+
+	/*
+	 * Test detection of an unknown filter flag that may simply need to be
+	 * added to this test
+	 */
+	flag = flags[ARRAY_SIZE(flags) - 1] << 1;
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?",
+		       flag);
+	}
+}
+
 TEST(TSYNC_first)
 {
 	struct sock_filter filter[] = {

From 9237a1b0828962191107e702cf56c88db9f9d455 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 14 Jul 2018 02:37:05 -0700
Subject: [PATCH 074/519] seccomp: Move speculation migitation control to arch
 code

commit 8bf37d8c067bb7eb8e7c381bdadf9bd89182b6bc upstream

The migitation control is simpler to implement in architecture code as it
avoids the extra function call to check the mode. Aside of that having an
explicit seccomp enabled mode in the architecture mitigations would require
even more workarounds.

Move it into architecture code and provide a weak function in the seccomp
code. Remove the 'which' argument as this allows the architecture to decide
which mitigations are relevant for seccomp.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/bugs.c | 29 ++++++++++++++++++-----------
 include/linux/nospec.h     |  2 ++
 kernel/seccomp.c           | 15 ++-------------
 3 files changed, 22 insertions(+), 24 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index d6897caa8f47..b005ef721dc7 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -567,6 +567,24 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
 	return 0;
 }
 
+int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
+			     unsigned long ctrl)
+{
+	switch (which) {
+	case PR_SPEC_STORE_BYPASS:
+		return ssb_prctl_set(task, ctrl);
+	default:
+		return -ENODEV;
+	}
+}
+
+#ifdef CONFIG_SECCOMP
+void arch_seccomp_spec_mitigate(struct task_struct *task)
+{
+	ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE);
+}
+#endif
+
 static int ssb_prctl_get(struct task_struct *task)
 {
 	switch (ssb_mode) {
@@ -585,17 +603,6 @@ static int ssb_prctl_get(struct task_struct *task)
 	}
 }
 
-int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
-			     unsigned long ctrl)
-{
-	switch (which) {
-	case PR_SPEC_STORE_BYPASS:
-		return ssb_prctl_set(task, ctrl);
-	default:
-		return -ENODEV;
-	}
-}
-
 int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
 {
 	switch (which) {
diff --git a/include/linux/nospec.h b/include/linux/nospec.h
index a908c954484d..0c5ef54fd416 100644
--- a/include/linux/nospec.h
+++ b/include/linux/nospec.h
@@ -62,5 +62,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
 int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which);
 int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
 			     unsigned long ctrl);
+/* Speculation control for seccomp enforced mitigation */
+void arch_seccomp_spec_mitigate(struct task_struct *task);
 
 #endif /* _LINUX_NOSPEC_H */
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 4bb8a5a5d68a..9a9203b15cde 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -216,18 +216,7 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
 	return true;
 }
 
-/*
- * If a given speculation mitigation is opt-in (prctl()-controlled),
- * select it, by disabling speculation (enabling mitigation).
- */
-static inline void spec_mitigate(struct task_struct *task,
-				 unsigned long which)
-{
-	int state = arch_prctl_spec_ctrl_get(task, which);
-
-	if (state > 0 && (state & PR_SPEC_PRCTL))
-		arch_prctl_spec_ctrl_set(task, which, PR_SPEC_FORCE_DISABLE);
-}
+void __weak arch_seccomp_spec_mitigate(struct task_struct *task) { }
 
 static inline void seccomp_assign_mode(struct task_struct *task,
 				       unsigned long seccomp_mode,
@@ -243,7 +232,7 @@ static inline void seccomp_assign_mode(struct task_struct *task,
 	smp_mb__before_atomic();
 	/* Assume default seccomp processes want spec flaw mitigation. */
 	if ((flags & SECCOMP_FILTER_FLAG_SPEC_ALLOW) == 0)
-		spec_mitigate(task, PR_SPEC_STORE_BYPASS);
+		arch_seccomp_spec_mitigate(task);
 	set_tsk_thread_flag(task, TIF_SECCOMP);
 }
 

From afc6bf9131efc36d4ae8a003e8597119a2190661 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sat, 14 Jul 2018 02:37:13 -0700
Subject: [PATCH 075/519] x86/speculation: Make "seccomp" the default mode for
 Speculative Store Bypass

commit f21b53b20c754021935ea43364dbf53778eeba32 upstream

Unless explicitly opted out of, anything running under seccomp will have
SSB mitigations enabled. Choosing the "prctl" mode will disable this.

[ tglx: Adjusted it to the new arch_seccomp_spec_mitigate() mechanism ]

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/kernel-parameters.txt  | 26 ++++++++++++++--------
 arch/x86/include/asm/nospec-branch.h |  1 +
 arch/x86/kernel/cpu/bugs.c           | 32 ++++++++++++++++++++--------
 3 files changed, 41 insertions(+), 18 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 80202debbdbe..3fd53e193b7f 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3647,19 +3647,27 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			This parameter controls whether the Speculative Store
 			Bypass optimization is used.
 
-			on     - Unconditionally disable Speculative Store Bypass
-			off    - Unconditionally enable Speculative Store Bypass
-			auto   - Kernel detects whether the CPU model contains an
-				 implementation of Speculative Store Bypass and
-				 picks the most appropriate mitigation.
-			prctl  - Control Speculative Store Bypass per thread
-				 via prctl. Speculative Store Bypass is enabled
-				 for a process by default. The state of the control
-				 is inherited on fork.
+			on      - Unconditionally disable Speculative Store Bypass
+			off     - Unconditionally enable Speculative Store Bypass
+			auto    - Kernel detects whether the CPU model contains an
+				  implementation of Speculative Store Bypass and
+				  picks the most appropriate mitigation. If the
+				  CPU is not vulnerable, "off" is selected. If the
+				  CPU is vulnerable the default mitigation is
+				  architecture and Kconfig dependent. See below.
+			prctl   - Control Speculative Store Bypass per thread
+				  via prctl. Speculative Store Bypass is enabled
+				  for a process by default. The state of the control
+				  is inherited on fork.
+			seccomp - Same as "prctl" above, but all seccomp threads
+				  will disable SSB unless they explicitly opt out.
 
 			Not specifying this option is equivalent to
 			spec_store_bypass_disable=auto.
 
+			Default mitigations:
+			X86:	If CONFIG_SECCOMP=y "seccomp", otherwise "prctl"
+
 	spia_io_base=	[HW,MTD]
 	spia_fio_base=
 	spia_pedr=
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 155d955ab801..930c15941157 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -188,6 +188,7 @@ enum ssb_mitigation {
 	SPEC_STORE_BYPASS_NONE,
 	SPEC_STORE_BYPASS_DISABLE,
 	SPEC_STORE_BYPASS_PRCTL,
+	SPEC_STORE_BYPASS_SECCOMP,
 };
 
 extern char __indirect_thunk_start[];
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index b005ef721dc7..6fd3fcf680bf 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -414,22 +414,25 @@ enum ssb_mitigation_cmd {
 	SPEC_STORE_BYPASS_CMD_AUTO,
 	SPEC_STORE_BYPASS_CMD_ON,
 	SPEC_STORE_BYPASS_CMD_PRCTL,
+	SPEC_STORE_BYPASS_CMD_SECCOMP,
 };
 
 static const char *ssb_strings[] = {
 	[SPEC_STORE_BYPASS_NONE]	= "Vulnerable",
 	[SPEC_STORE_BYPASS_DISABLE]	= "Mitigation: Speculative Store Bypass disabled",
-	[SPEC_STORE_BYPASS_PRCTL]	= "Mitigation: Speculative Store Bypass disabled via prctl"
+	[SPEC_STORE_BYPASS_PRCTL]	= "Mitigation: Speculative Store Bypass disabled via prctl",
+	[SPEC_STORE_BYPASS_SECCOMP]	= "Mitigation: Speculative Store Bypass disabled via prctl and seccomp",
 };
 
 static const struct {
 	const char *option;
 	enum ssb_mitigation_cmd cmd;
 } ssb_mitigation_options[] = {
-	{ "auto",	SPEC_STORE_BYPASS_CMD_AUTO },  /* Platform decides */
-	{ "on",		SPEC_STORE_BYPASS_CMD_ON },    /* Disable Speculative Store Bypass */
-	{ "off",	SPEC_STORE_BYPASS_CMD_NONE },  /* Don't touch Speculative Store Bypass */
-	{ "prctl",	SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */
+	{ "auto",	SPEC_STORE_BYPASS_CMD_AUTO },    /* Platform decides */
+	{ "on",		SPEC_STORE_BYPASS_CMD_ON },      /* Disable Speculative Store Bypass */
+	{ "off",	SPEC_STORE_BYPASS_CMD_NONE },    /* Don't touch Speculative Store Bypass */
+	{ "prctl",	SPEC_STORE_BYPASS_CMD_PRCTL },   /* Disable Speculative Store Bypass via prctl */
+	{ "seccomp",	SPEC_STORE_BYPASS_CMD_SECCOMP }, /* Disable Speculative Store Bypass via prctl and seccomp */
 };
 
 static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void)
@@ -479,8 +482,15 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void)
 
 	switch (cmd) {
 	case SPEC_STORE_BYPASS_CMD_AUTO:
-		/* Choose prctl as the default mode */
-		mode = SPEC_STORE_BYPASS_PRCTL;
+	case SPEC_STORE_BYPASS_CMD_SECCOMP:
+		/*
+		 * Choose prctl+seccomp as the default mode if seccomp is
+		 * enabled.
+		 */
+		if (IS_ENABLED(CONFIG_SECCOMP))
+			mode = SPEC_STORE_BYPASS_SECCOMP;
+		else
+			mode = SPEC_STORE_BYPASS_PRCTL;
 		break;
 	case SPEC_STORE_BYPASS_CMD_ON:
 		mode = SPEC_STORE_BYPASS_DISABLE;
@@ -528,12 +538,14 @@ static void ssb_select_mitigation()
 }
 
 #undef pr_fmt
+#define pr_fmt(fmt)     "Speculation prctl: " fmt
 
 static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
 {
 	bool update;
 
-	if (ssb_mode != SPEC_STORE_BYPASS_PRCTL)
+	if (ssb_mode != SPEC_STORE_BYPASS_PRCTL &&
+	    ssb_mode != SPEC_STORE_BYPASS_SECCOMP)
 		return -ENXIO;
 
 	switch (ctrl) {
@@ -581,7 +593,8 @@ int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
 #ifdef CONFIG_SECCOMP
 void arch_seccomp_spec_mitigate(struct task_struct *task)
 {
-	ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE);
+	if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP)
+		ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE);
 }
 #endif
 
@@ -590,6 +603,7 @@ static int ssb_prctl_get(struct task_struct *task)
 	switch (ssb_mode) {
 	case SPEC_STORE_BYPASS_DISABLE:
 		return PR_SPEC_DISABLE;
+	case SPEC_STORE_BYPASS_SECCOMP:
 	case SPEC_STORE_BYPASS_PRCTL:
 		if (task_spec_ssb_force_disable(task))
 			return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;

From 6e2119e4b8767a6c3a415875ad09596ada00755c Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Sat, 14 Jul 2018 02:37:21 -0700
Subject: [PATCH 076/519] x86/bugs: Rename _RDS to _SSBD

commit 9f65fb29374ee37856dbad847b4e121aab72b510 upstream

Intel collateral will reference the SSB mitigation bit in IA32_SPEC_CTL[2]
as SSBD (Speculative Store Bypass Disable).

Hence changing it.

It is unclear yet what the MSR_IA32_ARCH_CAPABILITIES (0x10a) Bit(4) name
is going to be. Following the rename it would be SSBD_NO but that rolls out
to Speculative Store Bypass Disable No.

Also fixed the missing space in X86_FEATURE_AMD_SSBD.

[ tglx: Fixup x86_amd_rds_enable() and rds_tif_to_amd_ls_cfg() as well ]

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
[ Srivatsa: Backported to 4.4.y, skipping the KVM changes in this patch. ]
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/cpufeatures.h |  4 ++--
 arch/x86/include/asm/msr-index.h   | 10 ++++-----
 arch/x86/include/asm/spec-ctrl.h   | 12 +++++-----
 arch/x86/include/asm/thread_info.h |  6 ++---
 arch/x86/kernel/cpu/amd.c          | 14 ++++++------
 arch/x86/kernel/cpu/bugs.c         | 36 +++++++++++++++---------------
 arch/x86/kernel/cpu/common.c       |  2 +-
 arch/x86/kernel/cpu/intel.c        |  2 +-
 arch/x86/kernel/process.c          |  8 +++----
 9 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index b7cdd1c05132..97926be21a7f 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -204,7 +204,7 @@
 #define X86_FEATURE_USE_IBPB	( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled*/
 #define X86_FEATURE_USE_IBRS_FW	( 7*32+22) /* "" Use IBRS during runtime firmware calls */
 #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE	( 7*32+23) /* "" Disable Speculative Store Bypass. */
-#define X86_FEATURE_AMD_RDS	(7*32+24)  /* "" AMD RDS implementation */
+#define X86_FEATURE_AMD_SSBD	(7*32+24)  /* "" AMD SSBD implementation */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
@@ -299,7 +299,7 @@
 #define X86_FEATURE_SPEC_CTRL		(18*32+26) /* "" Speculation Control (IBRS + IBPB) */
 #define X86_FEATURE_INTEL_STIBP		(18*32+27) /* "" Single Thread Indirect Branch Predictors */
 #define X86_FEATURE_ARCH_CAPABILITIES	(18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
-#define X86_FEATURE_RDS			(18*32+31) /* Reduced Data Speculation */
+#define X86_FEATURE_SSBD		(18*32+31) /* Speculative Store Bypass Disable */
 
 /*
  * BUG word(s)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 883cf0d6b1f9..2ea2ff1a81e8 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -35,8 +35,8 @@
 #define MSR_IA32_SPEC_CTRL		0x00000048 /* Speculation Control */
 #define SPEC_CTRL_IBRS			(1 << 0)   /* Indirect Branch Restricted Speculation */
 #define SPEC_CTRL_STIBP			(1 << 1)   /* Single Thread Indirect Branch Predictors */
-#define SPEC_CTRL_RDS_SHIFT		2	   /* Reduced Data Speculation bit */
-#define SPEC_CTRL_RDS			(1 << SPEC_CTRL_RDS_SHIFT)   /* Reduced Data Speculation */
+#define SPEC_CTRL_SSBD_SHIFT		2	   /* Speculative Store Bypass Disable bit */
+#define SPEC_CTRL_SSBD			(1 << SPEC_CTRL_SSBD_SHIFT)   /* Speculative Store Bypass Disable */
 
 #define MSR_IA32_PRED_CMD		0x00000049 /* Prediction Command */
 #define PRED_CMD_IBPB			(1 << 0)   /* Indirect Branch Prediction Barrier */
@@ -58,10 +58,10 @@
 #define MSR_IA32_ARCH_CAPABILITIES	0x0000010a
 #define ARCH_CAP_RDCL_NO		(1 << 0)   /* Not susceptible to Meltdown */
 #define ARCH_CAP_IBRS_ALL		(1 << 1)   /* Enhanced IBRS support */
-#define ARCH_CAP_RDS_NO			(1 << 4)   /*
+#define ARCH_CAP_SSBD_NO		(1 << 4)   /*
 						    * Not susceptible to Speculative Store Bypass
-						    * attack, so no Reduced Data Speculation control
-						    * required.
+						    * attack, so no Speculative Store Bypass
+						    * control required.
 						    */
 
 #define MSR_IA32_BBL_CR_CTL		0x00000119
diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
index 45ef00ad5105..dc21209790bf 100644
--- a/arch/x86/include/asm/spec-ctrl.h
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -17,20 +17,20 @@ extern void x86_spec_ctrl_restore_host(u64);
 
 /* AMD specific Speculative Store Bypass MSR data */
 extern u64 x86_amd_ls_cfg_base;
-extern u64 x86_amd_ls_cfg_rds_mask;
+extern u64 x86_amd_ls_cfg_ssbd_mask;
 
 /* The Intel SPEC CTRL MSR base value cache */
 extern u64 x86_spec_ctrl_base;
 
-static inline u64 rds_tif_to_spec_ctrl(u64 tifn)
+static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn)
 {
-	BUILD_BUG_ON(TIF_RDS < SPEC_CTRL_RDS_SHIFT);
-	return (tifn & _TIF_RDS) >> (TIF_RDS - SPEC_CTRL_RDS_SHIFT);
+	BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT);
+	return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
 }
 
-static inline u64 rds_tif_to_amd_ls_cfg(u64 tifn)
+static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn)
 {
-	return (tifn & _TIF_RDS) ? x86_amd_ls_cfg_rds_mask : 0ULL;
+	return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL;
 }
 
 extern void speculative_store_bypass_update(void);
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 36a49b4ba4b5..a96e88b243ef 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -92,7 +92,7 @@ struct thread_info {
 #define TIF_SIGPENDING		2	/* signal pending */
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
 #define TIF_SINGLESTEP		4	/* reenable singlestep on user return*/
-#define TIF_RDS			5	/* Reduced data speculation */
+#define TIF_SSBD		5	/* Reduced data speculation */
 #define TIF_SYSCALL_EMU		6	/* syscall emulation active */
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_SECCOMP		8	/* secure computing */
@@ -117,7 +117,7 @@ struct thread_info {
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
 #define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
-#define _TIF_RDS		(1 << TIF_RDS)
+#define _TIF_SSBD		(1 << TIF_SSBD)
 #define _TIF_SYSCALL_EMU	(1 << TIF_SYSCALL_EMU)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
@@ -149,7 +149,7 @@ struct thread_info {
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW							\
-	(_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_RDS)
+	(_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD)
 
 #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 14e984916f2f..bd0edb2a21fc 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -532,12 +532,12 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
 		}
 		/*
 		 * Try to cache the base value so further operations can
-		 * avoid RMW. If that faults, do not enable RDS.
+		 * avoid RMW. If that faults, do not enable SSBD.
 		 */
 		if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) {
-			setup_force_cpu_cap(X86_FEATURE_RDS);
-			setup_force_cpu_cap(X86_FEATURE_AMD_RDS);
-			x86_amd_ls_cfg_rds_mask = 1ULL << bit;
+			setup_force_cpu_cap(X86_FEATURE_SSBD);
+			setup_force_cpu_cap(X86_FEATURE_AMD_SSBD);
+			x86_amd_ls_cfg_ssbd_mask = 1ULL << bit;
 		}
 	}
 }
@@ -816,9 +816,9 @@ static void init_amd(struct cpuinfo_x86 *c)
 	if (!cpu_has(c, X86_FEATURE_XENPV))
 		set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
 
-	if (boot_cpu_has(X86_FEATURE_AMD_RDS)) {
-		set_cpu_cap(c, X86_FEATURE_RDS);
-		set_cpu_cap(c, X86_FEATURE_AMD_RDS);
+	if (boot_cpu_has(X86_FEATURE_AMD_SSBD)) {
+		set_cpu_cap(c, X86_FEATURE_SSBD);
+		set_cpu_cap(c, X86_FEATURE_AMD_SSBD);
 	}
 }
 
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 6fd3fcf680bf..812e92a49216 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -44,10 +44,10 @@ static u64 x86_spec_ctrl_mask = ~SPEC_CTRL_IBRS;
 
 /*
  * AMD specific MSR info for Speculative Store Bypass control.
- * x86_amd_ls_cfg_rds_mask is initialized in identify_boot_cpu().
+ * x86_amd_ls_cfg_ssbd_mask is initialized in identify_boot_cpu().
  */
 u64 x86_amd_ls_cfg_base;
-u64 x86_amd_ls_cfg_rds_mask;
+u64 x86_amd_ls_cfg_ssbd_mask;
 
 void __init check_bugs(void)
 {
@@ -144,7 +144,7 @@ u64 x86_spec_ctrl_get_default(void)
 	u64 msrval = x86_spec_ctrl_base;
 
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
-		msrval |= rds_tif_to_spec_ctrl(current_thread_info()->flags);
+		msrval |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags);
 	return msrval;
 }
 EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default);
@@ -157,7 +157,7 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl)
 		return;
 
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
-		host |= rds_tif_to_spec_ctrl(current_thread_info()->flags);
+		host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags);
 
 	if (host != guest_spec_ctrl)
 		wrmsrl(MSR_IA32_SPEC_CTRL, guest_spec_ctrl);
@@ -172,18 +172,18 @@ void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl)
 		return;
 
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
-		host |= rds_tif_to_spec_ctrl(current_thread_info()->flags);
+		host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags);
 
 	if (host != guest_spec_ctrl)
 		wrmsrl(MSR_IA32_SPEC_CTRL, host);
 }
 EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host);
 
-static void x86_amd_rds_enable(void)
+static void x86_amd_ssb_disable(void)
 {
-	u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_rds_mask;
+	u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask;
 
-	if (boot_cpu_has(X86_FEATURE_AMD_RDS))
+	if (boot_cpu_has(X86_FEATURE_AMD_SSBD))
 		wrmsrl(MSR_AMD64_LS_CFG, msrval);
 }
 
@@ -471,7 +471,7 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void)
 	enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE;
 	enum ssb_mitigation_cmd cmd;
 
-	if (!boot_cpu_has(X86_FEATURE_RDS))
+	if (!boot_cpu_has(X86_FEATURE_SSBD))
 		return mode;
 
 	cmd = ssb_parse_cmdline();
@@ -505,7 +505,7 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void)
 	/*
 	 * We have three CPU feature flags that are in play here:
 	 *  - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
-	 *  - X86_FEATURE_RDS - CPU is able to turn off speculative store bypass
+	 *  - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass
 	 *  - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation
 	 */
 	if (mode == SPEC_STORE_BYPASS_DISABLE) {
@@ -516,12 +516,12 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void)
 		 */
 		switch (boot_cpu_data.x86_vendor) {
 		case X86_VENDOR_INTEL:
-			x86_spec_ctrl_base |= SPEC_CTRL_RDS;
-			x86_spec_ctrl_mask &= ~SPEC_CTRL_RDS;
-			x86_spec_ctrl_set(SPEC_CTRL_RDS);
+			x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
+			x86_spec_ctrl_mask &= ~SPEC_CTRL_SSBD;
+			x86_spec_ctrl_set(SPEC_CTRL_SSBD);
 			break;
 		case X86_VENDOR_AMD:
-			x86_amd_rds_enable();
+			x86_amd_ssb_disable();
 			break;
 		}
 	}
@@ -554,16 +554,16 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
 		if (task_spec_ssb_force_disable(task))
 			return -EPERM;
 		task_clear_spec_ssb_disable(task);
-		update = test_and_clear_tsk_thread_flag(task, TIF_RDS);
+		update = test_and_clear_tsk_thread_flag(task, TIF_SSBD);
 		break;
 	case PR_SPEC_DISABLE:
 		task_set_spec_ssb_disable(task);
-		update = !test_and_set_tsk_thread_flag(task, TIF_RDS);
+		update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
 		break;
 	case PR_SPEC_FORCE_DISABLE:
 		task_set_spec_ssb_disable(task);
 		task_set_spec_ssb_force_disable(task);
-		update = !test_and_set_tsk_thread_flag(task, TIF_RDS);
+		update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
 		break;
 	default:
 		return -ERANGE;
@@ -633,7 +633,7 @@ void x86_spec_ctrl_setup_ap(void)
 		x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask);
 
 	if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
-		x86_amd_rds_enable();
+		x86_amd_ssb_disable();
 }
 
 #ifdef CONFIG_SYSFS
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 7405c8653f7a..6f3a5d74acc8 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -867,7 +867,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
 		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
 
 	if (!x86_match_cpu(cpu_no_spec_store_bypass) &&
-	   !(ia32_cap & ARCH_CAP_RDS_NO))
+	   !(ia32_cap & ARCH_CAP_SSBD_NO))
 		setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
 
 	if (x86_match_cpu(cpu_no_speculation))
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index ac25d1e5e8e8..a34e35731be4 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -119,7 +119,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 		setup_clear_cpu_cap(X86_FEATURE_STIBP);
 		setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL);
 		setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP);
-		setup_clear_cpu_cap(X86_FEATURE_RDS);
+		setup_clear_cpu_cap(X86_FEATURE_SSBD);
 	}
 
 	/*
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 9689e92e72dd..57d4ba250c6a 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -203,11 +203,11 @@ static __always_inline void __speculative_store_bypass_update(unsigned long tifn
 {
 	u64 msr;
 
-	if (static_cpu_has(X86_FEATURE_AMD_RDS)) {
-		msr = x86_amd_ls_cfg_base | rds_tif_to_amd_ls_cfg(tifn);
+	if (static_cpu_has(X86_FEATURE_AMD_SSBD)) {
+		msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn);
 		wrmsrl(MSR_AMD64_LS_CFG, msr);
 	} else {
-		msr = x86_spec_ctrl_base | rds_tif_to_spec_ctrl(tifn);
+		msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn);
 		wrmsrl(MSR_IA32_SPEC_CTRL, msr);
 	}
 }
@@ -246,7 +246,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 	if ((tifp ^ tifn) & _TIF_NOTSC)
 		cr4_toggle_bits(X86_CR4_TSD);
 
-	if ((tifp ^ tifn) & _TIF_RDS)
+	if ((tifp ^ tifn) & _TIF_SSBD)
 		__speculative_store_bypass_update(tifn);
 }
 

From 765897c6486de605eae3f94f77f2c800c9a2a254 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Sat, 14 Jul 2018 02:37:29 -0700
Subject: [PATCH 077/519] proc: Use underscores for SSBD in 'status'

commit e96f46ee8587607a828f783daa6eb5b44d25004d upstream

The style for the 'status' file is CamelCase or this. _.

Fixes: fae1fa0fc ("proc: Provide details on speculation flaw mitigations")
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/proc/array.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 31414787c971..cb71cbae606d 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -333,7 +333,7 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
 #ifdef CONFIG_SECCOMP
 	seq_printf(m, "Seccomp:\t%d\n", p->seccomp.mode);
 #endif
-	seq_printf(m, "\nSpeculation Store Bypass:\t");
+	seq_printf(m, "\nSpeculation_Store_Bypass:\t");
 	switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) {
 	case -EINVAL:
 		seq_printf(m, "unknown");

From e5eea0486470acbe7aa20a0533543c47c942ec93 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Sat, 14 Jul 2018 02:37:37 -0700
Subject: [PATCH 078/519] Documentation/spec_ctrl: Do some minor cleanups

commit dd0792699c4058e63c0715d9a7c2d40226fcdddc upstream

Fix some typos, improve formulations, end sentences with a fullstop.

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/spec_ctrl.txt | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/Documentation/spec_ctrl.txt b/Documentation/spec_ctrl.txt
index 1b3690d30943..32f3d55c54b7 100644
--- a/Documentation/spec_ctrl.txt
+++ b/Documentation/spec_ctrl.txt
@@ -2,13 +2,13 @@
 Speculation Control
 ===================
 
-Quite some CPUs have speculation related misfeatures which are in fact
-vulnerabilites causing data leaks in various forms even accross privilege
-domains.
+Quite some CPUs have speculation-related misfeatures which are in
+fact vulnerabilities causing data leaks in various forms even across
+privilege domains.
 
 The kernel provides mitigation for such vulnerabilities in various
-forms. Some of these mitigations are compile time configurable and some on
-the kernel command line.
+forms. Some of these mitigations are compile-time configurable and some
+can be supplied on the kernel command line.
 
 There is also a class of mitigations which are very expensive, but they can
 be restricted to a certain set of processes or tasks in controlled
@@ -32,18 +32,18 @@ the following meaning:
 Bit  Define                Description
 ==== ===================== ===================================================
 0    PR_SPEC_PRCTL         Mitigation can be controlled per task by
-                           PR_SET_SPECULATION_CTRL
+                           PR_SET_SPECULATION_CTRL.
 1    PR_SPEC_ENABLE        The speculation feature is enabled, mitigation is
-                           disabled
+                           disabled.
 2    PR_SPEC_DISABLE       The speculation feature is disabled, mitigation is
-                           enabled
+                           enabled.
 3    PR_SPEC_FORCE_DISABLE Same as PR_SPEC_DISABLE, but cannot be undone. A
                            subsequent prctl(..., PR_SPEC_ENABLE) will fail.
 ==== ===================== ===================================================
 
 If all bits are 0 the CPU is not affected by the speculation misfeature.
 
-If PR_SPEC_PRCTL is set, then the per task control of the mitigation is
+If PR_SPEC_PRCTL is set, then the per-task control of the mitigation is
 available. If not set, prctl(PR_SET_SPECULATION_CTRL) for the speculation
 misfeature will fail.
 
@@ -61,9 +61,9 @@ Common error codes
 Value   Meaning
 ======= =================================================================
 EINVAL  The prctl is not implemented by the architecture or unused
-        prctl(2) arguments are not 0
+        prctl(2) arguments are not 0.
 
-ENODEV  arg2 is selecting a not supported speculation misfeature
+ENODEV  arg2 is selecting a not supported speculation misfeature.
 ======= =================================================================
 
 PR_SET_SPECULATION_CTRL error codes
@@ -74,7 +74,7 @@ Value   Meaning
 0       Success
 
 ERANGE  arg3 is incorrect, i.e. it's neither PR_SPEC_ENABLE nor
-        PR_SPEC_DISABLE nor PR_SPEC_FORCE_DISABLE
+        PR_SPEC_DISABLE nor PR_SPEC_FORCE_DISABLE.
 
 ENXIO   Control of the selected speculation misfeature is not possible.
         See PR_GET_SPECULATION_CTRL.

From 631474e1cee0fbc0f346664aea5ee5b1c3600649 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Sat, 14 Jul 2018 02:37:45 -0700
Subject: [PATCH 079/519] x86/bugs: Fix __ssb_select_mitigation() return type

commit d66d8ff3d21667b41eddbe86b35ab411e40d8c5f upstream

__ssb_select_mitigation() returns one of the members of enum ssb_mitigation,
not ssb_mitigation_cmd; fix the prototype to reflect that.

Fixes: 24f7fc83b9204 ("x86/bugs: Provide boot parameters for the spec_store_bypass_disable mitigation")
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/bugs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 812e92a49216..5b58b76254a2 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -466,7 +466,7 @@ static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void)
 	return cmd;
 }
 
-static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void)
+static enum ssb_mitigation __init __ssb_select_mitigation(void)
 {
 	enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE;
 	enum ssb_mitigation_cmd cmd;

From 103b28d8a271c1d650eb5b09bd7a53d8915b51d6 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Sat, 14 Jul 2018 02:37:53 -0700
Subject: [PATCH 080/519] x86/bugs: Make cpu_show_common() static

commit 7bb4d366cba992904bffa4820d24e70a3de93e76 upstream

cpu_show_common() is not used outside of arch/x86/kernel/cpu/bugs.c, so
make it static.

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/bugs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 5b58b76254a2..512be6852395 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -638,7 +638,7 @@ void x86_spec_ctrl_setup_ap(void)
 
 #ifdef CONFIG_SYSFS
 
-ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
+static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
 			char *buf, unsigned int bug)
 {
 	if (!boot_cpu_has_bug(bug))

From 95bef2217ece77c345e627eba9cd2e85ada8eeb2 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Sat, 14 Jul 2018 02:38:01 -0700
Subject: [PATCH 081/519] x86/bugs: Fix the parameters alignment and missing
 void

commit ffed645e3be0e32f8e9ab068d257aee8d0fe8eec upstream

Fixes: 7bb4d366c ("x86/bugs: Make cpu_show_common() static")
Fixes: 24f7fc83b ("x86/bugs: Provide boot parameters for the spec_store_bypass_disable mitigation")
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/bugs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 512be6852395..84de0fc30ea9 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -529,7 +529,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
 	return mode;
 }
 
-static void ssb_select_mitigation()
+static void ssb_select_mitigation(void)
 {
 	ssb_mode = __ssb_select_mitigation();
 
@@ -639,7 +639,7 @@ void x86_spec_ctrl_setup_ap(void)
 #ifdef CONFIG_SYSFS
 
 static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
-			char *buf, unsigned int bug)
+			       char *buf, unsigned int bug)
 {
 	if (!boot_cpu_has_bug(bug))
 		return sprintf(buf, "Not affected\n");

From 714f18858ceda6f2b8335686f1f019560fe89283 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Sat, 14 Jul 2018 02:38:08 -0700
Subject: [PATCH 082/519] x86/cpu: Make alternative_msr_write work for 32-bit
 code

commit 5f2b745f5e1304f438f9b2cd03ebc8120b6e0d3b upstream

Cast val and (val >> 32) to (u32), so that they fit in a
general-purpose register in both 32-bit and 64-bit code.

[ tglx: Made it u32 instead of uintptr_t ]

Fixes: c65732e4f721 ("x86/cpu: Restore CPUID_8000_0008_EBX reload")
Signed-off-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/nospec-branch.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 930c15941157..640c11b25913 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -219,8 +219,8 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
 {
 	asm volatile(ALTERNATIVE("", "wrmsr", %c[feature])
 		: : "c" (msr),
-		    "a" (val),
-		    "d" (val >> 32),
+		    "a" ((u32)val),
+		    "d" ((u32)(val >> 32)),
 		    [feature] "i" (feature)
 		: "memory");
 }

From 4f4a2c70cf2ecd17ef3899c754fee30caa343286 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Sat, 14 Jul 2018 02:38:17 -0700
Subject: [PATCH 083/519] x86/speculation: Use synthetic bits for
 IBRS/IBPB/STIBP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit e7c587da125291db39ddf1f49b18e5970adbac17 upstream

Intel and AMD have different CPUID bits hence for those use synthetic bits
which get set on the respective vendor's in init_speculation_control(). So
that debacles like what the commit message of

  c65732e4f721 ("x86/cpu: Restore CPUID_8000_0008_EBX reload")

talks about don't happen anymore.

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Tested-by: Jörg Otte <jrg.otte@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Link: https://lkml.kernel.org/r/20180504161815.GG9257@pd.tnic
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
[ Srivatsa: Backported to 4.4.y, skipping the KVM changes in this patch. ]
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/cpufeatures.h | 12 ++++++++----
 arch/x86/kernel/cpu/common.c       | 14 ++++++++++----
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 97926be21a7f..9f64d10adeba 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -204,7 +204,10 @@
 #define X86_FEATURE_USE_IBPB	( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled*/
 #define X86_FEATURE_USE_IBRS_FW	( 7*32+22) /* "" Use IBRS during runtime firmware calls */
 #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE	( 7*32+23) /* "" Disable Speculative Store Bypass. */
-#define X86_FEATURE_AMD_SSBD	(7*32+24)  /* "" AMD SSBD implementation */
+#define X86_FEATURE_AMD_SSBD	( 7*32+24) /* "" AMD SSBD implementation */
+#define X86_FEATURE_IBRS	( 7*32+25) /* Indirect Branch Restricted Speculation */
+#define X86_FEATURE_IBPB	( 7*32+26) /* Indirect Branch Prediction Barrier */
+#define X86_FEATURE_STIBP	( 7*32+27) /* Single Thread Indirect Branch Predictors */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
@@ -256,9 +259,9 @@
 
 /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
 #define X86_FEATURE_CLZERO	(13*32+0) /* CLZERO instruction */
-#define X86_FEATURE_IBPB	(13*32+12) /* Indirect Branch Prediction Barrier */
-#define X86_FEATURE_IBRS	(13*32+14) /* Indirect Branch Restricted Speculation */
-#define X86_FEATURE_STIBP	(13*32+15) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_AMD_IBPB	(13*32+12) /* Indirect Branch Prediction Barrier */
+#define X86_FEATURE_AMD_IBRS	(13*32+14) /* Indirect Branch Restricted Speculation */
+#define X86_FEATURE_AMD_STIBP	(13*32+15) /* Single Thread Indirect Branch Predictors */
 
 /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
 #define X86_FEATURE_DTHERM	(14*32+ 0) /* Digital Thermal Sensor */
@@ -293,6 +296,7 @@
 #define X86_FEATURE_SUCCOR	(17*32+1) /* Uncorrectable error containment and recovery */
 #define X86_FEATURE_SMCA	(17*32+3) /* Scalable MCA */
 
+
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
 #define X86_FEATURE_AVX512_4VNNIW	(18*32+ 2) /* AVX-512 Neural Network Instructions */
 #define X86_FEATURE_AVX512_4FMAPS	(18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 6f3a5d74acc8..f2b579ff104c 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -683,17 +683,23 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
 	 * and they also have a different bit for STIBP support. Also,
 	 * a hypervisor might have set the individual AMD bits even on
 	 * Intel CPUs, for finer-grained selection of what's available.
-	 *
-	 * We use the AMD bits in 0x8000_0008 EBX as the generic hardware
-	 * features, which are visible in /proc/cpuinfo and used by the
-	 * kernel. So set those accordingly from the Intel bits.
 	 */
 	if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
 		set_cpu_cap(c, X86_FEATURE_IBRS);
 		set_cpu_cap(c, X86_FEATURE_IBPB);
 	}
+
 	if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
 		set_cpu_cap(c, X86_FEATURE_STIBP);
+
+	if (cpu_has(c, X86_FEATURE_AMD_IBRS))
+		set_cpu_cap(c, X86_FEATURE_IBRS);
+
+	if (cpu_has(c, X86_FEATURE_AMD_IBPB))
+		set_cpu_cap(c, X86_FEATURE_IBPB);
+
+	if (cpu_has(c, X86_FEATURE_AMD_STIBP))
+		set_cpu_cap(c, X86_FEATURE_STIBP);
 }
 
 void get_cpu_cap(struct cpuinfo_x86 *c)

From e4bb3382cbe9173e7f6e3a13fd1cb39c3a72671f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 14 Jul 2018 02:38:25 -0700
Subject: [PATCH 084/519] x86/cpufeatures: Disentangle MSR_SPEC_CTRL
 enumeration from IBRS

commit 7eb8956a7fec3c1f0abc2a5517dada99ccc8a961 upstream

The availability of the SPEC_CTRL MSR is enumerated by a CPUID bit on
Intel and implied by IBRS or STIBP support on AMD. That's just confusing
and in case an AMD CPU has IBRS not supported because the underlying
problem has been fixed but has another bit valid in the SPEC_CTRL MSR,
the thing falls apart.

Add a synthetic feature bit X86_FEATURE_MSR_SPEC_CTRL to denote the
availability on both Intel and AMD.

While at it replace the boot_cpu_has() checks with static_cpu_has() where
possible. This prevents late microcode loading from exposing SPEC_CTRL, but
late loading is already very limited as it does not reevaluate the
mitigation options and other bits and pieces. Having static_cpu_has() is
the simplest and least fragile solution.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/cpufeatures.h |  3 +++
 arch/x86/kernel/cpu/bugs.c         | 18 +++++++++++-------
 arch/x86/kernel/cpu/common.c       |  9 +++++++--
 arch/x86/kernel/cpu/intel.c        |  1 +
 4 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 9f64d10adeba..dd04cd739553 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -198,6 +198,9 @@
 
 #define X86_FEATURE_RETPOLINE	( 7*32+29) /* "" Generic Retpoline mitigation for Spectre variant 2 */
 #define X86_FEATURE_RETPOLINE_AMD ( 7*32+30) /* "" AMD Retpoline mitigation for Spectre variant 2 */
+
+#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
+
 /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
 #define X86_FEATURE_KAISER	( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */
 
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 84de0fc30ea9..e23e2899d783 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -63,7 +63,7 @@ void __init check_bugs(void)
 	 * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD
 	 * init code as it is not enumerated and depends on the family.
 	 */
-	if (boot_cpu_has(X86_FEATURE_IBRS))
+	if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
 		rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
 
 	/* Select the proper spectre mitigation before patching alternatives */
@@ -143,7 +143,7 @@ u64 x86_spec_ctrl_get_default(void)
 {
 	u64 msrval = x86_spec_ctrl_base;
 
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+	if (static_cpu_has(X86_FEATURE_SPEC_CTRL))
 		msrval |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags);
 	return msrval;
 }
@@ -153,10 +153,12 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl)
 {
 	u64 host = x86_spec_ctrl_base;
 
-	if (!boot_cpu_has(X86_FEATURE_IBRS))
+	/* Is MSR_SPEC_CTRL implemented ? */
+	if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
 		return;
 
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+	/* Intel controls SSB in MSR_SPEC_CTRL */
+	if (static_cpu_has(X86_FEATURE_SPEC_CTRL))
 		host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags);
 
 	if (host != guest_spec_ctrl)
@@ -168,10 +170,12 @@ void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl)
 {
 	u64 host = x86_spec_ctrl_base;
 
-	if (!boot_cpu_has(X86_FEATURE_IBRS))
+	/* Is MSR_SPEC_CTRL implemented ? */
+	if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
 		return;
 
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+	/* Intel controls SSB in MSR_SPEC_CTRL */
+	if (static_cpu_has(X86_FEATURE_SPEC_CTRL))
 		host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags);
 
 	if (host != guest_spec_ctrl)
@@ -629,7 +633,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
 
 void x86_spec_ctrl_setup_ap(void)
 {
-	if (boot_cpu_has(X86_FEATURE_IBRS))
+	if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
 		x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask);
 
 	if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f2b579ff104c..1f70ff15eb62 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -687,19 +687,24 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
 	if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
 		set_cpu_cap(c, X86_FEATURE_IBRS);
 		set_cpu_cap(c, X86_FEATURE_IBPB);
+		set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
 	}
 
 	if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
 		set_cpu_cap(c, X86_FEATURE_STIBP);
 
-	if (cpu_has(c, X86_FEATURE_AMD_IBRS))
+	if (cpu_has(c, X86_FEATURE_AMD_IBRS)) {
 		set_cpu_cap(c, X86_FEATURE_IBRS);
+		set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
+	}
 
 	if (cpu_has(c, X86_FEATURE_AMD_IBPB))
 		set_cpu_cap(c, X86_FEATURE_IBPB);
 
-	if (cpu_has(c, X86_FEATURE_AMD_STIBP))
+	if (cpu_has(c, X86_FEATURE_AMD_STIBP)) {
 		set_cpu_cap(c, X86_FEATURE_STIBP);
+		set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
+	}
 }
 
 void get_cpu_cap(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index a34e35731be4..9a84e75cbec5 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -118,6 +118,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 		setup_clear_cpu_cap(X86_FEATURE_IBPB);
 		setup_clear_cpu_cap(X86_FEATURE_STIBP);
 		setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL);
+		setup_clear_cpu_cap(X86_FEATURE_MSR_SPEC_CTRL);
 		setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP);
 		setup_clear_cpu_cap(X86_FEATURE_SSBD);
 	}

From 11a0b92f6d57853550f927fe91190b745a5ab945 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 14 Jul 2018 02:38:33 -0700
Subject: [PATCH 085/519] x86/cpufeatures: Disentangle SSBD enumeration

commit 52817587e706686fcdb27f14c1b000c92f266c96 upstream

The SSBD enumeration is similarly to the other bits magically shared
between Intel and AMD though the mechanisms are different.

Make X86_FEATURE_SSBD synthetic and set it depending on the vendor specific
features or family dependent setup.

Change the Intel bit to X86_FEATURE_SPEC_CTRL_SSBD to denote that SSBD is
controlled via MSR_SPEC_CTRL and fix up the usage sites.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/cpufeatures.h |  6 ++++--
 arch/x86/kernel/cpu/amd.c          |  7 +------
 arch/x86/kernel/cpu/bugs.c         | 10 +++++-----
 arch/x86/kernel/cpu/common.c       |  3 +++
 arch/x86/kernel/cpu/intel.c        |  1 +
 arch/x86/kernel/process.c          |  2 +-
 6 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index dd04cd739553..7dd1bba49172 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -200,6 +200,7 @@
 #define X86_FEATURE_RETPOLINE_AMD ( 7*32+30) /* "" AMD Retpoline mitigation for Spectre variant 2 */
 
 #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
+#define X86_FEATURE_SSBD	( 7*32+17) /* Speculative Store Bypass Disable */
 
 /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
 #define X86_FEATURE_KAISER	( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */
@@ -207,7 +208,8 @@
 #define X86_FEATURE_USE_IBPB	( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled*/
 #define X86_FEATURE_USE_IBRS_FW	( 7*32+22) /* "" Use IBRS during runtime firmware calls */
 #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE	( 7*32+23) /* "" Disable Speculative Store Bypass. */
-#define X86_FEATURE_AMD_SSBD	( 7*32+24) /* "" AMD SSBD implementation */
+#define X86_FEATURE_LS_CFG_SSBD	( 7*32+24) /* "" AMD SSBD implementation */
+
 #define X86_FEATURE_IBRS	( 7*32+25) /* Indirect Branch Restricted Speculation */
 #define X86_FEATURE_IBPB	( 7*32+26) /* Indirect Branch Prediction Barrier */
 #define X86_FEATURE_STIBP	( 7*32+27) /* Single Thread Indirect Branch Predictors */
@@ -306,7 +308,7 @@
 #define X86_FEATURE_SPEC_CTRL		(18*32+26) /* "" Speculation Control (IBRS + IBPB) */
 #define X86_FEATURE_INTEL_STIBP		(18*32+27) /* "" Single Thread Indirect Branch Predictors */
 #define X86_FEATURE_ARCH_CAPABILITIES	(18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
-#define X86_FEATURE_SSBD		(18*32+31) /* Speculative Store Bypass Disable */
+#define X86_FEATURE_SPEC_CTRL_SSBD	(18*32+31) /* "" Speculative Store Bypass Disable */
 
 /*
  * BUG word(s)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index bd0edb2a21fc..a97fd6730f12 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -535,8 +535,8 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
 		 * avoid RMW. If that faults, do not enable SSBD.
 		 */
 		if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) {
+			setup_force_cpu_cap(X86_FEATURE_LS_CFG_SSBD);
 			setup_force_cpu_cap(X86_FEATURE_SSBD);
-			setup_force_cpu_cap(X86_FEATURE_AMD_SSBD);
 			x86_amd_ls_cfg_ssbd_mask = 1ULL << bit;
 		}
 	}
@@ -815,11 +815,6 @@ static void init_amd(struct cpuinfo_x86 *c)
 	/* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */
 	if (!cpu_has(c, X86_FEATURE_XENPV))
 		set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
-
-	if (boot_cpu_has(X86_FEATURE_AMD_SSBD)) {
-		set_cpu_cap(c, X86_FEATURE_SSBD);
-		set_cpu_cap(c, X86_FEATURE_AMD_SSBD);
-	}
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index e23e2899d783..9be729267794 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -157,8 +157,8 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl)
 	if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
 		return;
 
-	/* Intel controls SSB in MSR_SPEC_CTRL */
-	if (static_cpu_has(X86_FEATURE_SPEC_CTRL))
+	/* SSBD controlled in MSR_SPEC_CTRL */
+	if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD))
 		host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags);
 
 	if (host != guest_spec_ctrl)
@@ -174,8 +174,8 @@ void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl)
 	if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
 		return;
 
-	/* Intel controls SSB in MSR_SPEC_CTRL */
-	if (static_cpu_has(X86_FEATURE_SPEC_CTRL))
+	/* SSBD controlled in MSR_SPEC_CTRL */
+	if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD))
 		host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags);
 
 	if (host != guest_spec_ctrl)
@@ -187,7 +187,7 @@ static void x86_amd_ssb_disable(void)
 {
 	u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask;
 
-	if (boot_cpu_has(X86_FEATURE_AMD_SSBD))
+	if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD))
 		wrmsrl(MSR_AMD64_LS_CFG, msrval);
 }
 
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 1f70ff15eb62..10977236c6eb 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -693,6 +693,9 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
 	if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
 		set_cpu_cap(c, X86_FEATURE_STIBP);
 
+	if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD))
+		set_cpu_cap(c, X86_FEATURE_SSBD);
+
 	if (cpu_has(c, X86_FEATURE_AMD_IBRS)) {
 		set_cpu_cap(c, X86_FEATURE_IBRS);
 		set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 9a84e75cbec5..4dce22d3cb06 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -121,6 +121,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 		setup_clear_cpu_cap(X86_FEATURE_MSR_SPEC_CTRL);
 		setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP);
 		setup_clear_cpu_cap(X86_FEATURE_SSBD);
+		setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL_SSBD);
 	}
 
 	/*
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 57d4ba250c6a..8cefbd2bb348 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -203,7 +203,7 @@ static __always_inline void __speculative_store_bypass_update(unsigned long tifn
 {
 	u64 msr;
 
-	if (static_cpu_has(X86_FEATURE_AMD_SSBD)) {
+	if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) {
 		msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn);
 		wrmsrl(MSR_AMD64_LS_CFG, msr);
 	} else {

From 4ba461d426490b6ed7e8298c4d3b7a13aa5d2686 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Sat, 14 Jul 2018 02:38:41 -0700
Subject: [PATCH 086/519] x86/cpu/AMD: Fix erratum 1076 (CPB bit)

commit f7f3dc00f61261cdc9ccd8b886f21bc4dffd6fd9 upstream

CPUID Fn8000_0007_EDX[CPB] is wrongly 0 on models up to B1. But they do
support CPB (AMD's Core Performance Boosting cpufreq CPU feature), so fix that.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sherry Hurwitz <sherry.hurwitz@amd.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170907170821.16021-1-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/amd.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index a97fd6730f12..87f4a0d73c5e 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -713,6 +713,16 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
 	}
 }
 
+static void init_amd_zn(struct cpuinfo_x86 *c)
+{
+	/*
+	 * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects
+	 * all up to and including B1.
+	 */
+	if (c->x86_model <= 1 && c->x86_mask <= 1)
+		set_cpu_cap(c, X86_FEATURE_CPB);
+}
+
 static void init_amd(struct cpuinfo_x86 *c)
 {
 	u32 dummy;
@@ -743,6 +753,7 @@ static void init_amd(struct cpuinfo_x86 *c)
 	case 0x10: init_amd_gh(c); break;
 	case 0x12: init_amd_ln(c); break;
 	case 0x15: init_amd_bd(c); break;
+	case 0x17: init_amd_zn(c); break;
 	}
 
 	/* Enable workaround for FXSAVE leak */

From 21757fc8bafd50ce477fff2bcec6faec27c5548d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 14 Jul 2018 02:38:50 -0700
Subject: [PATCH 087/519] x86/cpufeatures: Add FEATURE_ZEN

commit d1035d971829dcf80e8686ccde26f94b0a069472 upstream

Add a ZEN feature bit so family-dependent static_cpu_has() optimizations
can be built for ZEN.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/cpufeatures.h | 2 ++
 arch/x86/kernel/cpu/amd.c          | 1 +
 2 files changed, 3 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 7dd1bba49172..8ae91326cc47 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -213,6 +213,8 @@
 #define X86_FEATURE_IBRS	( 7*32+25) /* Indirect Branch Restricted Speculation */
 #define X86_FEATURE_IBPB	( 7*32+26) /* Indirect Branch Prediction Barrier */
 #define X86_FEATURE_STIBP	( 7*32+27) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_ZEN		( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
+
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 87f4a0d73c5e..9f6151884249 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -715,6 +715,7 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
 
 static void init_amd_zn(struct cpuinfo_x86 *c)
 {
+	set_cpu_cap(c, X86_FEATURE_ZEN);
 	/*
 	 * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects
 	 * all up to and including B1.

From ea8efcd4415f70766acb4bb9553fad855eea48e1 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 14 Jul 2018 02:38:58 -0700
Subject: [PATCH 088/519] x86/speculation: Handle HT correctly on AMD

commit 1f50ddb4f4189243c05926b842dc1a0332195f31 upstream

The AMD64_LS_CFG MSR is a per core MSR on Family 17H CPUs. That means when
hyperthreading is enabled the SSBD bit toggle needs to take both cores into
account. Otherwise the following situation can happen:

CPU0		CPU1

disable SSB
		disable SSB
		enable  SSB <- Enables it for the Core, i.e. for CPU0 as well

So after the SSB enable on CPU1 the task on CPU0 runs with SSB enabled
again.

On Intel the SSBD control is per core as well, but the synchronization
logic is implemented behind the per thread SPEC_CTRL MSR. It works like
this:

  CORE_SPEC_CTRL = THREAD0_SPEC_CTRL | THREAD1_SPEC_CTRL

i.e. if one of the threads enables a mitigation then this affects both and
the mitigation is only disabled in the core when both threads disabled it.

Add the necessary synchronization logic for AMD family 17H. Unfortunately
that requires a spinlock to serialize the access to the MSR, but the locks
are only shared between siblings.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/spec-ctrl.h |   6 ++
 arch/x86/kernel/process.c        | 131 ++++++++++++++++++++++++++++---
 arch/x86/kernel/smpboot.c        |   5 ++
 3 files changed, 133 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
index dc21209790bf..0cb49c4564b0 100644
--- a/arch/x86/include/asm/spec-ctrl.h
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -33,6 +33,12 @@ static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn)
 	return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL;
 }
 
+#ifdef CONFIG_SMP
+extern void speculative_store_bypass_ht_init(void);
+#else
+static inline void speculative_store_bypass_ht_init(void) { }
+#endif
+
 extern void speculative_store_bypass_update(void);
 
 #endif
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 8cefbd2bb348..0842869db312 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -199,22 +199,135 @@ static inline void switch_to_bitmap(struct tss_struct *tss,
 	}
 }
 
+#ifdef CONFIG_SMP
+
+struct ssb_state {
+	struct ssb_state	*shared_state;
+	raw_spinlock_t		lock;
+	unsigned int		disable_state;
+	unsigned long		local_state;
+};
+
+#define LSTATE_SSB	0
+
+static DEFINE_PER_CPU(struct ssb_state, ssb_state);
+
+void speculative_store_bypass_ht_init(void)
+{
+	struct ssb_state *st = this_cpu_ptr(&ssb_state);
+	unsigned int this_cpu = smp_processor_id();
+	unsigned int cpu;
+
+	st->local_state = 0;
+
+	/*
+	 * Shared state setup happens once on the first bringup
+	 * of the CPU. It's not destroyed on CPU hotunplug.
+	 */
+	if (st->shared_state)
+		return;
+
+	raw_spin_lock_init(&st->lock);
+
+	/*
+	 * Go over HT siblings and check whether one of them has set up the
+	 * shared state pointer already.
+	 */
+	for_each_cpu(cpu, topology_sibling_cpumask(this_cpu)) {
+		if (cpu == this_cpu)
+			continue;
+
+		if (!per_cpu(ssb_state, cpu).shared_state)
+			continue;
+
+		/* Link it to the state of the sibling: */
+		st->shared_state = per_cpu(ssb_state, cpu).shared_state;
+		return;
+	}
+
+	/*
+	 * First HT sibling to come up on the core.  Link shared state of
+	 * the first HT sibling to itself. The siblings on the same core
+	 * which come up later will see the shared state pointer and link
+	 * themself to the state of this CPU.
+	 */
+	st->shared_state = st;
+}
+
+/*
+ * Logic is: First HT sibling enables SSBD for both siblings in the core
+ * and last sibling to disable it, disables it for the whole core. This how
+ * MSR_SPEC_CTRL works in "hardware":
+ *
+ *  CORE_SPEC_CTRL = THREAD0_SPEC_CTRL | THREAD1_SPEC_CTRL
+ */
+static __always_inline void amd_set_core_ssb_state(unsigned long tifn)
+{
+	struct ssb_state *st = this_cpu_ptr(&ssb_state);
+	u64 msr = x86_amd_ls_cfg_base;
+
+	if (!static_cpu_has(X86_FEATURE_ZEN)) {
+		msr |= ssbd_tif_to_amd_ls_cfg(tifn);
+		wrmsrl(MSR_AMD64_LS_CFG, msr);
+		return;
+	}
+
+	if (tifn & _TIF_SSBD) {
+		/*
+		 * Since this can race with prctl(), block reentry on the
+		 * same CPU.
+		 */
+		if (__test_and_set_bit(LSTATE_SSB, &st->local_state))
+			return;
+
+		msr |= x86_amd_ls_cfg_ssbd_mask;
+
+		raw_spin_lock(&st->shared_state->lock);
+		/* First sibling enables SSBD: */
+		if (!st->shared_state->disable_state)
+			wrmsrl(MSR_AMD64_LS_CFG, msr);
+		st->shared_state->disable_state++;
+		raw_spin_unlock(&st->shared_state->lock);
+	} else {
+		if (!__test_and_clear_bit(LSTATE_SSB, &st->local_state))
+			return;
+
+		raw_spin_lock(&st->shared_state->lock);
+		st->shared_state->disable_state--;
+		if (!st->shared_state->disable_state)
+			wrmsrl(MSR_AMD64_LS_CFG, msr);
+		raw_spin_unlock(&st->shared_state->lock);
+	}
+}
+#else
+static __always_inline void amd_set_core_ssb_state(unsigned long tifn)
+{
+	u64 msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn);
+
+	wrmsrl(MSR_AMD64_LS_CFG, msr);
+}
+#endif
+
+static __always_inline void intel_set_ssb_state(unsigned long tifn)
+{
+	u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn);
+
+	wrmsrl(MSR_IA32_SPEC_CTRL, msr);
+}
+
 static __always_inline void __speculative_store_bypass_update(unsigned long tifn)
 {
-	u64 msr;
-
-	if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) {
-		msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn);
-		wrmsrl(MSR_AMD64_LS_CFG, msr);
-	} else {
-		msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn);
-		wrmsrl(MSR_IA32_SPEC_CTRL, msr);
-	}
+	if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD))
+		amd_set_core_ssb_state(tifn);
+	else
+		intel_set_ssb_state(tifn);
 }
 
 void speculative_store_bypass_update(void)
 {
+	preempt_disable();
 	__speculative_store_bypass_update(current_thread_info()->flags);
+	preempt_enable();
 }
 
 void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 1f7aefc7b0b4..c017f1c71560 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -75,6 +75,7 @@
 #include <asm/i8259.h>
 #include <asm/realmode.h>
 #include <asm/misc.h>
+#include <asm/spec-ctrl.h>
 
 /* Number of siblings per CPU package */
 int smp_num_siblings = 1;
@@ -217,6 +218,8 @@ static void notrace start_secondary(void *unused)
 	 */
 	check_tsc_sync_target();
 
+	speculative_store_bypass_ht_init();
+
 	/*
 	 * Lock vector_lock and initialize the vectors on this cpu
 	 * before setting the cpu online. We must set it online with
@@ -1209,6 +1212,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 	set_mtrr_aps_delayed_init();
 
 	smp_quirk_init_udelay();
+
+	speculative_store_bypass_ht_init();
 }
 
 void arch_enable_nonboot_cpus_begin(void)

From e13a6f0955bb5ee6daca1f08027d6561d0830daf Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 14 Jul 2018 02:39:06 -0700
Subject: [PATCH 089/519] x86/bugs, KVM: Extend speculation control for
 VIRT_SPEC_CTRL

commit ccbcd2674472a978b48c91c1fbfb66c0ff959f24 upstream

AMD is proposing a VIRT_SPEC_CTRL MSR to handle the Speculative Store
Bypass Disable via MSR_AMD64_LS_CFG so that guests do not have to care
about the bit position of the SSBD bit and thus facilitate migration.
Also, the sibling coordination on Family 17H CPUs can only be done on
the host.

Extend x86_spec_ctrl_set_guest() and x86_spec_ctrl_restore_host() with an
extra argument for the VIRT_SPEC_CTRL MSR.

Hand in 0 from VMX and in SVM add a new virt_spec_ctrl member to the CPU
data structure which is going to be used in later patches for the actual
implementation.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
[ Srivatsa: Backported to 4.4.y, skipping the KVM changes in this patch. ]
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/spec-ctrl.h |  9 ++++++---
 arch/x86/kernel/cpu/bugs.c       | 20 ++++++++++++++++++--
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
index 0cb49c4564b0..6e2874049afd 100644
--- a/arch/x86/include/asm/spec-ctrl.h
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -10,10 +10,13 @@
  * the guest has, while on VMEXIT we restore the host view. This
  * would be easier if SPEC_CTRL were architecturally maskable or
  * shadowable for guests but this is not (currently) the case.
- * Takes the guest view of SPEC_CTRL MSR as a parameter.
+ * Takes the guest view of SPEC_CTRL MSR as a parameter and also
+ * the guest's version of VIRT_SPEC_CTRL, if emulated.
  */
-extern void x86_spec_ctrl_set_guest(u64);
-extern void x86_spec_ctrl_restore_host(u64);
+extern void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl,
+				    u64 guest_virt_spec_ctrl);
+extern void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl,
+				       u64 guest_virt_spec_ctrl);
 
 /* AMD specific Speculative Store Bypass MSR data */
 extern u64 x86_amd_ls_cfg_base;
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 9be729267794..a1c98fda0878 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -149,7 +149,15 @@ u64 x86_spec_ctrl_get_default(void)
 }
 EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default);
 
-void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl)
+/**
+ * x86_spec_ctrl_set_guest - Set speculation control registers for the guest
+ * @guest_spec_ctrl:		The guest content of MSR_SPEC_CTRL
+ * @guest_virt_spec_ctrl:	The guest controlled bits of MSR_VIRT_SPEC_CTRL
+ *				(may get translated to MSR_AMD64_LS_CFG bits)
+ *
+ * Avoids writing to the MSR if the content/bits are the same
+ */
+void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
 {
 	u64 host = x86_spec_ctrl_base;
 
@@ -166,7 +174,15 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl)
 }
 EXPORT_SYMBOL_GPL(x86_spec_ctrl_set_guest);
 
-void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl)
+/**
+ * x86_spec_ctrl_restore_host - Restore host speculation control registers
+ * @guest_spec_ctrl:		The guest content of MSR_SPEC_CTRL
+ * @guest_virt_spec_ctrl:	The guest controlled bits of MSR_VIRT_SPEC_CTRL
+ *				(may get translated to MSR_AMD64_LS_CFG bits)
+ *
+ * Avoids writing to the MSR if the content/bits are the same
+ */
+void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
 {
 	u64 host = x86_spec_ctrl_base;
 

From ecfe9bf30e4b7cd13f3b28f40a587a932b5cb457 Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Sat, 14 Jul 2018 02:39:14 -0700
Subject: [PATCH 090/519] x86/speculation: Add virtualized speculative store
 bypass disable support

commit 11fb0683493b2da112cd64c9dada221b52463bf7 upstream

Some AMD processors only support a non-architectural means of enabling
speculative store bypass disable (SSBD).  To allow a simplified view of
this to a guest, an architectural definition has been created through a new
CPUID bit, 0x80000008_EBX[25], and a new MSR, 0xc001011f.  With this, a
hypervisor can virtualize the existence of this definition and provide an
architectural method for using SSBD to a guest.

Add the new CPUID feature, the new MSR and update the existing SSBD
support to use this MSR when present.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Borislav Petkov <bp@suse.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/cpufeatures.h |  1 +
 arch/x86/include/asm/msr-index.h   |  2 ++
 arch/x86/kernel/cpu/bugs.c         |  4 +++-
 arch/x86/kernel/process.c          | 13 ++++++++++++-
 4 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 8ae91326cc47..f4b175db70f4 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -269,6 +269,7 @@
 #define X86_FEATURE_AMD_IBPB	(13*32+12) /* Indirect Branch Prediction Barrier */
 #define X86_FEATURE_AMD_IBRS	(13*32+14) /* Indirect Branch Restricted Speculation */
 #define X86_FEATURE_AMD_STIBP	(13*32+15) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_VIRT_SSBD	(13*32+25) /* Virtualized Speculative Store Bypass Disable */
 
 /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
 #define X86_FEATURE_DTHERM	(14*32+ 0) /* Digital Thermal Sensor */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 2ea2ff1a81e8..22f2dd50d2d9 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -328,6 +328,8 @@
 #define MSR_AMD64_IBSOPDATA4		0xc001103d
 #define MSR_AMD64_IBS_REG_COUNT_MAX	8 /* includes MSR_AMD64_IBSBRTARGET */
 
+#define MSR_AMD64_VIRT_SPEC_CTRL	0xc001011f
+
 /* Fam 16h MSRs */
 #define MSR_F16H_L2I_PERF_CTL		0xc0010230
 #define MSR_F16H_L2I_PERF_CTR		0xc0010231
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index a1c98fda0878..50ab206a09b3 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -203,7 +203,9 @@ static void x86_amd_ssb_disable(void)
 {
 	u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask;
 
-	if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD))
+	if (boot_cpu_has(X86_FEATURE_VIRT_SSBD))
+		wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, SPEC_CTRL_SSBD);
+	else if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD))
 		wrmsrl(MSR_AMD64_LS_CFG, msrval);
 }
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 0842869db312..eab9d0cfed70 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -308,6 +308,15 @@ static __always_inline void amd_set_core_ssb_state(unsigned long tifn)
 }
 #endif
 
+static __always_inline void amd_set_ssb_virt_state(unsigned long tifn)
+{
+	/*
+	 * SSBD has the same definition in SPEC_CTRL and VIRT_SPEC_CTRL,
+	 * so ssbd_tif_to_spec_ctrl() just works.
+	 */
+	wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn));
+}
+
 static __always_inline void intel_set_ssb_state(unsigned long tifn)
 {
 	u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn);
@@ -317,7 +326,9 @@ static __always_inline void intel_set_ssb_state(unsigned long tifn)
 
 static __always_inline void __speculative_store_bypass_update(unsigned long tifn)
 {
-	if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD))
+	if (static_cpu_has(X86_FEATURE_VIRT_SSBD))
+		amd_set_ssb_virt_state(tifn);
+	else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD))
 		amd_set_core_ssb_state(tifn);
 	else
 		intel_set_ssb_state(tifn);

From 3d60492cea89c0a0fb06c73ee49cc14c55f527dd Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 14 Jul 2018 02:39:22 -0700
Subject: [PATCH 091/519] x86/speculation: Rework
 speculative_store_bypass_update()

commit 0270be3e34efb05a88bc4c422572ece038ef3608 upstream

The upcoming support for the virtual SPEC_CTRL MSR on AMD needs to reuse
speculative_store_bypass_update() to avoid code duplication. Add an
argument for supplying a thread info (TIF) value and create a wrapper
speculative_store_bypass_update_current() which is used at the existing
call site.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/spec-ctrl.h | 7 ++++++-
 arch/x86/kernel/cpu/bugs.c       | 2 +-
 arch/x86/kernel/process.c        | 4 ++--
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
index 6e2874049afd..82b6c5a0d61e 100644
--- a/arch/x86/include/asm/spec-ctrl.h
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -42,6 +42,11 @@ extern void speculative_store_bypass_ht_init(void);
 static inline void speculative_store_bypass_ht_init(void) { }
 #endif
 
-extern void speculative_store_bypass_update(void);
+extern void speculative_store_bypass_update(unsigned long tif);
+
+static inline void speculative_store_bypass_update_current(void)
+{
+	speculative_store_bypass_update(current_thread_info()->flags);
+}
 
 #endif
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 50ab206a09b3..1b29be9211af 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -596,7 +596,7 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
 	 * mitigation until it is next scheduled.
 	 */
 	if (task == current && update)
-		speculative_store_bypass_update();
+		speculative_store_bypass_update_current();
 
 	return 0;
 }
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index eab9d0cfed70..e18c8798c3a2 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -334,10 +334,10 @@ static __always_inline void __speculative_store_bypass_update(unsigned long tifn
 		intel_set_ssb_state(tifn);
 }
 
-void speculative_store_bypass_update(void)
+void speculative_store_bypass_update(unsigned long tif)
 {
 	preempt_disable();
-	__speculative_store_bypass_update(current_thread_info()->flags);
+	__speculative_store_bypass_update(tif);
 	preempt_enable();
 }
 

From d5aec90670c378b6d05e5f904b1a8c8cffb17eef Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Sat, 14 Jul 2018 02:39:30 -0700
Subject: [PATCH 092/519] x86/bugs: Unify x86_spec_ctrl_{set_guest,
 restore_host}

commit cc69b34989210f067b2c51d5539b5f96ebcc3a01 upstream

Function bodies are very similar and are going to grow more almost
identical code. Add a bool arg to determine whether SPEC_CTRL is being set
for the guest or restored to the host.

No functional changes.

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/spec-ctrl.h | 33 +++++++++++++++---
 arch/x86/kernel/cpu/bugs.c       | 58 ++++++++------------------------
 2 files changed, 43 insertions(+), 48 deletions(-)

diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
index 82b6c5a0d61e..9cecbe5e57ee 100644
--- a/arch/x86/include/asm/spec-ctrl.h
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -13,10 +13,35 @@
  * Takes the guest view of SPEC_CTRL MSR as a parameter and also
  * the guest's version of VIRT_SPEC_CTRL, if emulated.
  */
-extern void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl,
-				    u64 guest_virt_spec_ctrl);
-extern void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl,
-				       u64 guest_virt_spec_ctrl);
+extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool guest);
+
+/**
+ * x86_spec_ctrl_set_guest - Set speculation control registers for the guest
+ * @guest_spec_ctrl:		The guest content of MSR_SPEC_CTRL
+ * @guest_virt_spec_ctrl:	The guest controlled bits of MSR_VIRT_SPEC_CTRL
+ *				(may get translated to MSR_AMD64_LS_CFG bits)
+ *
+ * Avoids writing to the MSR if the content/bits are the same
+ */
+static inline
+void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
+{
+	x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, true);
+}
+
+/**
+ * x86_spec_ctrl_restore_host - Restore host speculation control registers
+ * @guest_spec_ctrl:		The guest content of MSR_SPEC_CTRL
+ * @guest_virt_spec_ctrl:	The guest controlled bits of MSR_VIRT_SPEC_CTRL
+ *				(may get translated to MSR_AMD64_LS_CFG bits)
+ *
+ * Avoids writing to the MSR if the content/bits are the same
+ */
+static inline
+void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
+{
+	x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, false);
+}
 
 /* AMD specific Speculative Store Bypass MSR data */
 extern u64 x86_amd_ls_cfg_base;
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 1b29be9211af..208d44c5cc50 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -149,55 +149,25 @@ u64 x86_spec_ctrl_get_default(void)
 }
 EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default);
 
-/**
- * x86_spec_ctrl_set_guest - Set speculation control registers for the guest
- * @guest_spec_ctrl:		The guest content of MSR_SPEC_CTRL
- * @guest_virt_spec_ctrl:	The guest controlled bits of MSR_VIRT_SPEC_CTRL
- *				(may get translated to MSR_AMD64_LS_CFG bits)
- *
- * Avoids writing to the MSR if the content/bits are the same
- */
-void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
+void
+x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
 {
-	u64 host = x86_spec_ctrl_base;
+	struct thread_info *ti = current_thread_info();
+	u64 msr, host = x86_spec_ctrl_base;
 
 	/* Is MSR_SPEC_CTRL implemented ? */
-	if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
-		return;
+	if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
+		/* SSBD controlled in MSR_SPEC_CTRL */
+		if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD))
+			host |= ssbd_tif_to_spec_ctrl(ti->flags);
 
-	/* SSBD controlled in MSR_SPEC_CTRL */
-	if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD))
-		host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags);
-
-	if (host != guest_spec_ctrl)
-		wrmsrl(MSR_IA32_SPEC_CTRL, guest_spec_ctrl);
+		if (host != guest_spec_ctrl) {
+			msr = setguest ? guest_spec_ctrl : host;
+			wrmsrl(MSR_IA32_SPEC_CTRL, msr);
+		}
+	}
 }
-EXPORT_SYMBOL_GPL(x86_spec_ctrl_set_guest);
-
-/**
- * x86_spec_ctrl_restore_host - Restore host speculation control registers
- * @guest_spec_ctrl:		The guest content of MSR_SPEC_CTRL
- * @guest_virt_spec_ctrl:	The guest controlled bits of MSR_VIRT_SPEC_CTRL
- *				(may get translated to MSR_AMD64_LS_CFG bits)
- *
- * Avoids writing to the MSR if the content/bits are the same
- */
-void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
-{
-	u64 host = x86_spec_ctrl_base;
-
-	/* Is MSR_SPEC_CTRL implemented ? */
-	if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
-		return;
-
-	/* SSBD controlled in MSR_SPEC_CTRL */
-	if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD))
-		host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags);
-
-	if (host != guest_spec_ctrl)
-		wrmsrl(MSR_IA32_SPEC_CTRL, host);
-}
-EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host);
+EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl);
 
 static void x86_amd_ssb_disable(void)
 {

From 9ed7ee52e4e06364f47d6a6e898610bae5f04e93 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 14 Jul 2018 02:39:38 -0700
Subject: [PATCH 093/519] x86/bugs: Expose x86_spec_ctrl_base directly

commit fa8ac4988249c38476f6ad678a4848a736373403 upstream

x86_spec_ctrl_base is the system wide default value for the SPEC_CTRL MSR.
x86_spec_ctrl_get_default() returns x86_spec_ctrl_base and was intended to
prevent modification to that variable. Though the variable is read only
after init and globaly visible already.

Remove the function and export the variable instead.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/nospec-branch.h | 16 +++++-----------
 arch/x86/include/asm/spec-ctrl.h     |  3 ---
 arch/x86/kernel/cpu/bugs.c           | 11 +----------
 3 files changed, 6 insertions(+), 24 deletions(-)

diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 640c11b25913..2757c79754e1 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -172,16 +172,7 @@ enum spectre_v2_mitigation {
 	SPECTRE_V2_IBRS,
 };
 
-/*
- * The Intel specification for the SPEC_CTRL MSR requires that we
- * preserve any already set reserved bits at boot time (e.g. for
- * future additions that this kernel is not currently aware of).
- * We then set any additional mitigation bits that we want
- * ourselves and always use this as the base for SPEC_CTRL.
- * We also use this when handling guest entry/exit as below.
- */
 extern void x86_spec_ctrl_set(u64);
-extern u64 x86_spec_ctrl_get_default(void);
 
 /* The Speculative Store Bypass disable variants */
 enum ssb_mitigation {
@@ -232,6 +223,9 @@ static inline void indirect_branch_prediction_barrier(void)
 	alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB);
 }
 
+/* The Intel SPEC CTRL MSR base value cache */
+extern u64 x86_spec_ctrl_base;
+
 /*
  * With retpoline, we must use IBRS to restrict branch prediction
  * before calling into firmware.
@@ -240,7 +234,7 @@ static inline void indirect_branch_prediction_barrier(void)
  */
 #define firmware_restrict_branch_speculation_start()			\
 do {									\
-	u64 val = x86_spec_ctrl_get_default() | SPEC_CTRL_IBRS;		\
+	u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS;			\
 									\
 	preempt_disable();						\
 	alternative_msr_write(MSR_IA32_SPEC_CTRL, val,			\
@@ -249,7 +243,7 @@ do {									\
 
 #define firmware_restrict_branch_speculation_end()			\
 do {									\
-	u64 val = x86_spec_ctrl_get_default();				\
+	u64 val = x86_spec_ctrl_base;					\
 									\
 	alternative_msr_write(MSR_IA32_SPEC_CTRL, val,			\
 			      X86_FEATURE_USE_IBRS_FW);			\
diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
index 9cecbe5e57ee..763d49710329 100644
--- a/arch/x86/include/asm/spec-ctrl.h
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -47,9 +47,6 @@ void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
 extern u64 x86_amd_ls_cfg_base;
 extern u64 x86_amd_ls_cfg_ssbd_mask;
 
-/* The Intel SPEC CTRL MSR base value cache */
-extern u64 x86_spec_ctrl_base;
-
 static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn)
 {
 	BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 208d44c5cc50..5391df5826c9 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -35,6 +35,7 @@ static void __init ssb_select_mitigation(void);
  * writes to SPEC_CTRL contain whatever reserved bits have been set.
  */
 u64 x86_spec_ctrl_base;
+EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
 
 /*
  * The vendor and possibly platform specific bits which can be modified in
@@ -139,16 +140,6 @@ void x86_spec_ctrl_set(u64 val)
 }
 EXPORT_SYMBOL_GPL(x86_spec_ctrl_set);
 
-u64 x86_spec_ctrl_get_default(void)
-{
-	u64 msrval = x86_spec_ctrl_base;
-
-	if (static_cpu_has(X86_FEATURE_SPEC_CTRL))
-		msrval |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags);
-	return msrval;
-}
-EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default);
-
 void
 x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
 {

From 90cfa767bc12a9931e5e45ed275b069d5b35b52e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 14 Jul 2018 02:39:46 -0700
Subject: [PATCH 094/519] x86/bugs: Remove x86_spec_ctrl_set()

commit 4b59bdb569453a60b752b274ca61f009e37f4dae upstream

x86_spec_ctrl_set() is only used in bugs.c and the extra mask checks there
provide no real value as both call sites can just write x86_spec_ctrl_base
to MSR_SPEC_CTRL. x86_spec_ctrl_base is valid and does not need any extra
masking or checking.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/nospec-branch.h |  2 --
 arch/x86/kernel/cpu/bugs.c           | 13 ++-----------
 2 files changed, 2 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 2757c79754e1..b4c74c24c890 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -172,8 +172,6 @@ enum spectre_v2_mitigation {
 	SPECTRE_V2_IBRS,
 };
 
-extern void x86_spec_ctrl_set(u64);
-
 /* The Speculative Store Bypass disable variants */
 enum ssb_mitigation {
 	SPEC_STORE_BYPASS_NONE,
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 5391df5826c9..05eed68a32f4 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -131,15 +131,6 @@ static const char *spectre_v2_strings[] = {
 
 static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
 
-void x86_spec_ctrl_set(u64 val)
-{
-	if (val & x86_spec_ctrl_mask)
-		WARN_ONCE(1, "SPEC_CTRL MSR value 0x%16llx is unknown.\n", val);
-	else
-		wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base | val);
-}
-EXPORT_SYMBOL_GPL(x86_spec_ctrl_set);
-
 void
 x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
 {
@@ -501,7 +492,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
 		case X86_VENDOR_INTEL:
 			x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
 			x86_spec_ctrl_mask &= ~SPEC_CTRL_SSBD;
-			x86_spec_ctrl_set(SPEC_CTRL_SSBD);
+			wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
 			break;
 		case X86_VENDOR_AMD:
 			x86_amd_ssb_disable();
@@ -613,7 +604,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
 void x86_spec_ctrl_setup_ap(void)
 {
 	if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
-		x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask);
+		wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
 
 	if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
 		x86_amd_ssb_disable();

From 80d7439fb0c446d006599b6347efd255a86a93ca Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 14 Jul 2018 02:39:55 -0700
Subject: [PATCH 095/519] x86/bugs: Rework spec_ctrl base and mask logic

commit be6fcb5478e95bb1c91f489121238deb3abca46a upstream

x86_spec_ctrL_mask is intended to mask out bits from a MSR_SPEC_CTRL value
which are not to be modified. However the implementation is not really used
and the bitmask was inverted to make a check easier, which was removed in
"x86/bugs: Remove x86_spec_ctrl_set()"

Aside of that it is missing the STIBP bit if it is supported by the
platform, so if the mask would be used in x86_virt_spec_ctrl() then it
would prevent a guest from setting STIBP.

Add the STIBP bit if supported and use the mask in x86_virt_spec_ctrl() to
sanitize the value which is supplied by the guest.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Borislav Petkov <bp@suse.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/bugs.c | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 05eed68a32f4..af11a02819bc 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -41,7 +41,7 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
  * The vendor and possibly platform specific bits which can be modified in
  * x86_spec_ctrl_base.
  */
-static u64 x86_spec_ctrl_mask = ~SPEC_CTRL_IBRS;
+static u64 x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
 
 /*
  * AMD specific MSR info for Speculative Store Bypass control.
@@ -67,6 +67,10 @@ void __init check_bugs(void)
 	if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
 		rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
 
+	/* Allow STIBP in MSR_SPEC_CTRL if supported */
+	if (boot_cpu_has(X86_FEATURE_STIBP))
+		x86_spec_ctrl_mask |= SPEC_CTRL_STIBP;
+
 	/* Select the proper spectre mitigation before patching alternatives */
 	spectre_v2_select_mitigation();
 
@@ -134,18 +138,26 @@ static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
 void
 x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
 {
+	u64 msrval, guestval, hostval = x86_spec_ctrl_base;
 	struct thread_info *ti = current_thread_info();
-	u64 msr, host = x86_spec_ctrl_base;
 
 	/* Is MSR_SPEC_CTRL implemented ? */
 	if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
+		/*
+		 * Restrict guest_spec_ctrl to supported values. Clear the
+		 * modifiable bits in the host base value and or the
+		 * modifiable bits from the guest value.
+		 */
+		guestval = hostval & ~x86_spec_ctrl_mask;
+		guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
+
 		/* SSBD controlled in MSR_SPEC_CTRL */
 		if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD))
-			host |= ssbd_tif_to_spec_ctrl(ti->flags);
+			hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
 
-		if (host != guest_spec_ctrl) {
-			msr = setguest ? guest_spec_ctrl : host;
-			wrmsrl(MSR_IA32_SPEC_CTRL, msr);
+		if (hostval != guestval) {
+			msrval = setguest ? guestval : hostval;
+			wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
 		}
 	}
 }
@@ -491,7 +503,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
 		switch (boot_cpu_data.x86_vendor) {
 		case X86_VENDOR_INTEL:
 			x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
-			x86_spec_ctrl_mask &= ~SPEC_CTRL_SSBD;
+			x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
 			wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
 			break;
 		case X86_VENDOR_AMD:

From 48805280d05c968e0883e8debf5e33f40f8e56c5 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 14 Jul 2018 02:40:03 -0700
Subject: [PATCH 096/519] x86/speculation, KVM: Implement support for
 VIRT_SPEC_CTRL/LS_CFG

commit 47c61b3955cf712cadfc25635bf9bc174af030ea upstream

Add the necessary logic for supporting the emulated VIRT_SPEC_CTRL MSR to
x86_virt_spec_ctrl().  If either X86_FEATURE_LS_CFG_SSBD or
X86_FEATURE_VIRT_SPEC_CTRL is set then use the new guest_virt_spec_ctrl
argument to check whether the state must be modified on the host. The
update reuses speculative_store_bypass_update() so the ZEN-specific sibling
coordination can be reused.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/spec-ctrl.h |  6 ++++++
 arch/x86/kernel/cpu/bugs.c       | 30 ++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
index 763d49710329..ae7c2c5cd7f0 100644
--- a/arch/x86/include/asm/spec-ctrl.h
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -53,6 +53,12 @@ static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn)
 	return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
 }
 
+static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl)
+{
+	BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT);
+	return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
+}
+
 static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn)
 {
 	return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL;
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index af11a02819bc..12a8867071f3 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -160,6 +160,36 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
 			wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
 		}
 	}
+
+	/*
+	 * If SSBD is not handled in MSR_SPEC_CTRL on AMD, update
+	 * MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported.
+	 */
+	if (!static_cpu_has(X86_FEATURE_LS_CFG_SSBD) &&
+	    !static_cpu_has(X86_FEATURE_VIRT_SSBD))
+		return;
+
+	/*
+	 * If the host has SSBD mitigation enabled, force it in the host's
+	 * virtual MSR value. If its not permanently enabled, evaluate
+	 * current's TIF_SSBD thread flag.
+	 */
+	if (static_cpu_has(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE))
+		hostval = SPEC_CTRL_SSBD;
+	else
+		hostval = ssbd_tif_to_spec_ctrl(ti->flags);
+
+	/* Sanitize the guest value */
+	guestval = guest_virt_spec_ctrl & SPEC_CTRL_SSBD;
+
+	if (hostval != guestval) {
+		unsigned long tif;
+
+		tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) :
+				 ssbd_spec_ctrl_to_tif(hostval);
+
+		speculative_store_bypass_update(tif);
+	}
 }
 EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl);
 

From cadb98135daf474648d646db5625e9c663b94a3d Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Sat, 14 Jul 2018 02:40:10 -0700
Subject: [PATCH 097/519] x86/bugs: Rename SSBD_NO to SSB_NO

commit 240da953fcc6a9008c92fae5b1f727ee5ed167ab upstream

The "336996 Speculative Execution Side Channel Mitigations" from
May defines this as SSB_NO, hence lets sync-up.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/msr-index.h | 2 +-
 arch/x86/kernel/cpu/common.c     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 22f2dd50d2d9..caa00191e565 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -58,7 +58,7 @@
 #define MSR_IA32_ARCH_CAPABILITIES	0x0000010a
 #define ARCH_CAP_RDCL_NO		(1 << 0)   /* Not susceptible to Meltdown */
 #define ARCH_CAP_IBRS_ALL		(1 << 1)   /* Enhanced IBRS support */
-#define ARCH_CAP_SSBD_NO		(1 << 4)   /*
+#define ARCH_CAP_SSB_NO			(1 << 4)   /*
 						    * Not susceptible to Speculative Store Bypass
 						    * attack, so no Speculative Store Bypass
 						    * control required.
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 10977236c6eb..9ad38ad194ac 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -881,7 +881,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
 		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
 
 	if (!x86_match_cpu(cpu_no_spec_store_bypass) &&
-	   !(ia32_cap & ARCH_CAP_SSBD_NO))
+	   !(ia32_cap & ARCH_CAP_SSB_NO))
 		setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
 
 	if (x86_match_cpu(cpu_no_speculation))

From 399a9d0cc466b0100feee5c6e9de7e6378b18fab Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Thu, 21 Jun 2018 10:43:31 +0200
Subject: [PATCH 098/519] x86/xen: Add call of
 speculative_store_bypass_ht_init() to PV paths

commit 74899d92e66663dc7671a8017b3146dcd4735f3b upstream.

Commit:

  1f50ddb4f418 ("x86/speculation: Handle HT correctly on AMD")

... added speculative_store_bypass_ht_init() to the per-CPU initialization sequence.

speculative_store_bypass_ht_init() needs to be called on each CPU for
PV guests, too.

Reported-by: Brian Woods <brian.woods@amd.com>
Tested-by: Brian Woods <brian.woods@amd.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Cc: <stable@vger.kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: boris.ostrovsky@oracle.com
Cc: xen-devel@lists.xenproject.org
Fixes: 1f50ddb4f4189243c05926b842dc1a0332195f31 ("x86/speculation: Handle HT correctly on AMD")
Link: https://lore.kernel.org/lkml/20180621084331.21228-1-jgross@suse.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/xen/smp.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 3f4ebf0261f2..29e50d1229bc 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -28,6 +28,7 @@
 #include <xen/interface/vcpu.h>
 #include <xen/interface/xenpmu.h>
 
+#include <asm/spec-ctrl.h>
 #include <asm/xen/interface.h>
 #include <asm/xen/hypercall.h>
 
@@ -87,6 +88,8 @@ static void cpu_bringup(void)
 	cpu_data(cpu).x86_max_cores = 1;
 	set_cpu_sibling_map(cpu);
 
+	speculative_store_bypass_ht_init();
+
 	xen_setup_cpu_clockevents();
 
 	notify_cpu_starting(cpu);
@@ -357,6 +360,8 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
 	}
 	set_cpu_sibling_map(0);
 
+	speculative_store_bypass_ht_init();
+
 	xen_pmu_init(0);
 
 	if (xen_smp_intr_init(0))

From 42a8fe474e0c3e9babad09b4d3e882d7a0f09c76 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Wed, 18 Jan 2017 11:15:39 -0800
Subject: [PATCH 099/519] x86/cpu: Re-apply forced caps every time CPU caps are
 re-read

commit 60d3450167433f2d099ce2869dc52dd9e7dc9b29 upstream.

Calling get_cpu_cap() will reset a bunch of CPU features.  This will
cause the system to lose track of force-set and force-cleared
features in the words that are reset until the end of CPU
initialization.  This can cause X86_FEATURE_FPU, for example, to
change back and forth during boot and potentially confuse CPU setup.

To minimize the chance of confusion, re-apply forced caps every time
get_cpu_cap() is called.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Borislav Petkov <bp@suse.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matthew Whitehead <tedheadster@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: One Thousand Gnomes <gnomes@lxorguk.ukuu.org.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/c817eb373d2c67c2c81413a70fc9b845fa34a37e.1484705016.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/common.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 9ad38ad194ac..3d21b28f9826 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -803,6 +803,13 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
 
 	init_scattered_cpuid_features(c);
 	init_speculation_control(c);
+
+	/*
+	 * Clear/Set all flags overridden by options, after probe.
+	 * This needs to happen each time we re-probe, which may happen
+	 * several times during CPU initialization.
+	 */
+	apply_forced_caps(c);
 }
 
 static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)

From c9ae49135d83243f12b5a66044302d4a17e0dcfe Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan.christopher.jenkins@gmail.com>
Date: Thu, 12 Apr 2018 19:11:58 +0100
Subject: [PATCH 100/519] block: do not use interruptible wait anywhere

commit 1dc3039bc87ae7d19a990c3ee71cfd8a9068f428 upstream.

When blk_queue_enter() waits for a queue to unfreeze, or unset the
PREEMPT_ONLY flag, do not allow it to be interrupted by a signal.

The PREEMPT_ONLY flag was introduced later in commit 3a0a529971ec
("block, scsi: Make SCSI quiesce and resume work reliably").  Note the SCSI
device is resumed asynchronously, i.e. after un-freezing userspace tasks.

So that commit exposed the bug as a regression in v4.15.  A mysterious
SIGBUS (or -EIO) sometimes happened during the time the device was being
resumed.  Most frequently, there was no kernel log message, and we saw Xorg
or Xwayland killed by SIGBUS.[1]

[1] E.g. https://bugzilla.redhat.com/show_bug.cgi?id=1553979

Without this fix, I get an IO error in this test:

# dd if=/dev/sda of=/dev/null iflag=direct & \
  while killall -SIGUSR1 dd; do sleep 0.1; done & \
  echo mem > /sys/power/state ; \
  sleep 5; killall dd  # stop after 5 seconds

The interruptible wait was added to blk_queue_enter in
commit 3ef28e83ab15 ("block: generic request_queue reference counting").
Before then, the interruptible wait was only in blk-mq, but I don't think
it could ever have been correct.

Reviewed-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: stable@vger.kernel.org
Signed-off-by: Alan Jenkins <alan.christopher.jenkins@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 block/blk-core.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index f5f1a55703ae..50d77c90070d 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -651,21 +651,17 @@ EXPORT_SYMBOL(blk_alloc_queue);
 int blk_queue_enter(struct request_queue *q, gfp_t gfp)
 {
 	while (true) {
-		int ret;
-
 		if (percpu_ref_tryget_live(&q->q_usage_counter))
 			return 0;
 
 		if (!gfpflags_allow_blocking(gfp))
 			return -EBUSY;
 
-		ret = wait_event_interruptible(q->mq_freeze_wq,
-				!atomic_read(&q->mq_freeze_depth) ||
-				blk_queue_dying(q));
+		wait_event(q->mq_freeze_wq,
+			   !atomic_read(&q->mq_freeze_depth) ||
+			   blk_queue_dying(q));
 		if (blk_queue_dying(q))
 			return -ENODEV;
-		if (ret)
-			return ret;
 	}
 }
 

From 470ee7ab7776085fe5573788df2dea8140d7a0c1 Mon Sep 17 00:00:00 2001
From: Lucas Stach <dev@lynxeye.de>
Date: Mon, 29 Feb 2016 21:46:07 +0100
Subject: [PATCH 101/519] clk: tegra: Fix PLL_U post divider and initial rate
 on Tegra30

commit 797097301860c64b63346d068ba4fe4992bd5021 upstream.

The post divider value in the frequency table is wrong as it would lead
to the PLL producing an output rate of 960 MHz instead of the desired
480 MHz. This wasn't a problem as nothing used the table to actually
initialize the PLL rate, but the bootloader configuration was used
unaltered.

If the bootloader does not set up the PLL it will fail to come when used
under Linux. To fix this don't rely on the bootloader, but set the
correct rate in the clock driver.

Signed-off-by: Lucas Stach <dev@lynxeye.de>
Signed-off-by: Thierry Reding <treding@nvidia.com>
[jonathanh@nvidia.com: Back-ported to stable v4.4.y]
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/tegra/clk-tegra30.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/clk/tegra/clk-tegra30.c b/drivers/clk/tegra/clk-tegra30.c
index 8c41c6fcb9ee..acf83569f86f 100644
--- a/drivers/clk/tegra/clk-tegra30.c
+++ b/drivers/clk/tegra/clk-tegra30.c
@@ -333,11 +333,11 @@ static struct pdiv_map pllu_p[] = {
 };
 
 static struct tegra_clk_pll_freq_table pll_u_freq_table[] = {
-	{ 12000000, 480000000, 960, 12, 0, 12},
-	{ 13000000, 480000000, 960, 13, 0, 12},
-	{ 16800000, 480000000, 400, 7,  0, 5},
-	{ 19200000, 480000000, 200, 4,  0, 3},
-	{ 26000000, 480000000, 960, 26, 0, 12},
+	{ 12000000, 480000000, 960, 12, 2, 12 },
+	{ 13000000, 480000000, 960, 13, 2, 12 },
+	{ 16800000, 480000000, 400,  7, 2,  5 },
+	{ 19200000, 480000000, 200,  4, 2,  3 },
+	{ 26000000, 480000000, 960, 26, 2, 12 },
 	{ 0, 0, 0, 0, 0, 0 },
 };
 
@@ -1372,6 +1372,7 @@ static struct tegra_clk_init_table init_table[] __initdata = {
 	{TEGRA30_CLK_GR2D, TEGRA30_CLK_PLL_C, 300000000, 0},
 	{TEGRA30_CLK_GR3D, TEGRA30_CLK_PLL_C, 300000000, 0},
 	{TEGRA30_CLK_GR3D2, TEGRA30_CLK_PLL_C, 300000000, 0},
+	{ TEGRA30_CLK_PLL_U, TEGRA30_CLK_CLK_MAX, 480000000, 0 },
 	{TEGRA30_CLK_CLK_MAX, TEGRA30_CLK_CLK_MAX, 0, 0}, /* This MUST be the last entry. */
 };
 

From 1ee52929e64c4bd185884ebc22d437ff93f97e3a Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Tue, 14 Jun 2016 10:12:13 +0200
Subject: [PATCH 102/519] ubi: Introduce vol_ignored()

commit 243a4f8126fcf7facb04b324dbb7c85d10b11ce9 upstream.

This makes the logic more easy to follow.

Signed-off-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/ubi/attach.c | 24 ++++++++++++++++++------
 drivers/mtd/ubi/ubi.h    | 15 +++++++++++++++
 2 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/drivers/mtd/ubi/attach.c b/drivers/mtd/ubi/attach.c
index c1aaf0336cf2..abd6137a3b4d 100644
--- a/drivers/mtd/ubi/attach.c
+++ b/drivers/mtd/ubi/attach.c
@@ -803,6 +803,20 @@ static int check_corruption(struct ubi_device *ubi, struct ubi_vid_hdr *vid_hdr,
 	return err;
 }
 
+static bool vol_ignored(int vol_id)
+{
+	switch (vol_id) {
+		case UBI_LAYOUT_VOLUME_ID:
+		return true;
+	}
+
+#ifdef CONFIG_MTD_UBI_FASTMAP
+	return ubi_is_fm_vol(vol_id);
+#else
+	return false;
+#endif
+}
+
 /**
  * scan_peb - scan and process UBI headers of a PEB.
  * @ubi: UBI device description object
@@ -995,17 +1009,15 @@ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai,
 		*vid = vol_id;
 	if (sqnum)
 		*sqnum = be64_to_cpu(vidh->sqnum);
-	if (vol_id > UBI_MAX_VOLUMES && vol_id != UBI_LAYOUT_VOLUME_ID) {
+	if (vol_id > UBI_MAX_VOLUMES && !vol_ignored(vol_id)) {
 		int lnum = be32_to_cpu(vidh->lnum);
 
 		/* Unsupported internal volume */
 		switch (vidh->compat) {
 		case UBI_COMPAT_DELETE:
-			if (vol_id != UBI_FM_SB_VOLUME_ID
-			    && vol_id != UBI_FM_DATA_VOLUME_ID) {
-				ubi_msg(ubi, "\"delete\" compatible internal volume %d:%d found, will remove it",
-					vol_id, lnum);
-			}
+			ubi_msg(ubi, "\"delete\" compatible internal volume %d:%d found, will remove it",
+				vol_id, lnum);
+
 			err = add_to_list(ai, pnum, vol_id, lnum,
 					  ec, 1, &ai->erase);
 			if (err)
diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index de1ea2e4c37d..086ff56922b5 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -1101,4 +1101,19 @@ static inline int idx2vol_id(const struct ubi_device *ubi, int idx)
 		return idx;
 }
 
+/**
+ * ubi_is_fm_vol - check whether a volume ID is a Fastmap volume.
+ * @vol_id: volume ID
+ */
+static inline bool ubi_is_fm_vol(int vol_id)
+{
+	switch (vol_id) {
+		case UBI_FM_SB_VOLUME_ID:
+		case UBI_FM_DATA_VOLUME_ID:
+		return true;
+	}
+
+	return false;
+}
+
 #endif /* !__UBI_UBI_H__ */

From 6fdca47fcc1a26b770ce1eb1a440ea06f8d804c5 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Tue, 14 Jun 2016 10:12:15 +0200
Subject: [PATCH 103/519] ubi: Rework Fastmap attach base code

commit fdf10ed710c0aa177e8dfcd84e65e4e5e8e0956b upstream.

Introduce a new list to the UBI attach information
object to be able to deal better with old and corrupted
Fastmap eraseblocks.
Also move more Fastmap specific code into fastmap.c.

Signed-off-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/ubi/attach.c  | 99 +++++++++++++++++++++++++++------------
 drivers/mtd/ubi/fastmap.c | 36 ++++++++++++--
 drivers/mtd/ubi/ubi.h     | 28 ++++++++++-
 drivers/mtd/ubi/wl.c      | 47 ++++++++++++++-----
 4 files changed, 165 insertions(+), 45 deletions(-)

diff --git a/drivers/mtd/ubi/attach.c b/drivers/mtd/ubi/attach.c
index abd6137a3b4d..68ff57435c89 100644
--- a/drivers/mtd/ubi/attach.c
+++ b/drivers/mtd/ubi/attach.c
@@ -174,6 +174,40 @@ static int add_corrupted(struct ubi_attach_info *ai, int pnum, int ec)
 	return 0;
 }
 
+/**
+ * add_fastmap - add a Fastmap related physical eraseblock.
+ * @ai: attaching information
+ * @pnum: physical eraseblock number the VID header came from
+ * @vid_hdr: the volume identifier header
+ * @ec: erase counter of the physical eraseblock
+ *
+ * This function allocates a 'struct ubi_ainf_peb' object for a Fastamp
+ * physical eraseblock @pnum and adds it to the 'fastmap' list.
+ * Such blocks can be Fastmap super and data blocks from both the most
+ * recent Fastmap we're attaching from or from old Fastmaps which will
+ * be erased.
+ */
+static int add_fastmap(struct ubi_attach_info *ai, int pnum,
+		       struct ubi_vid_hdr *vid_hdr, int ec)
+{
+	struct ubi_ainf_peb *aeb;
+
+	aeb = kmem_cache_alloc(ai->aeb_slab_cache, GFP_KERNEL);
+	if (!aeb)
+		return -ENOMEM;
+
+	aeb->pnum = pnum;
+	aeb->vol_id = be32_to_cpu(vidh->vol_id);
+	aeb->sqnum = be64_to_cpu(vidh->sqnum);
+	aeb->ec = ec;
+	list_add(&aeb->u.list, &ai->fastmap);
+
+	dbg_bld("add to fastmap list: PEB %d, vol_id %d, sqnum: %llu", pnum,
+		aeb->vol_id, aeb->sqnum);
+
+	return 0;
+}
+
 /**
  * validate_vid_hdr - check volume identifier header.
  * @ubi: UBI device description object
@@ -822,18 +856,15 @@ static bool vol_ignored(int vol_id)
  * @ubi: UBI device description object
  * @ai: attaching information
  * @pnum: the physical eraseblock number
- * @vid: The volume ID of the found volume will be stored in this pointer
- * @sqnum: The sqnum of the found volume will be stored in this pointer
  *
  * This function reads UBI headers of PEB @pnum, checks them, and adds
  * information about this PEB to the corresponding list or RB-tree in the
  * "attaching info" structure. Returns zero if the physical eraseblock was
  * successfully handled and a negative error code in case of failure.
  */
-static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai,
-		    int pnum, int *vid, unsigned long long *sqnum)
+static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai, int pnum)
 {
-	long long uninitialized_var(ec);
+	long long ec;
 	int err, bitflips = 0, vol_id = -1, ec_err = 0;
 
 	dbg_bld("scan PEB %d", pnum);
@@ -1005,10 +1036,6 @@ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai,
 	}
 
 	vol_id = be32_to_cpu(vidh->vol_id);
-	if (vid)
-		*vid = vol_id;
-	if (sqnum)
-		*sqnum = be64_to_cpu(vidh->sqnum);
 	if (vol_id > UBI_MAX_VOLUMES && !vol_ignored(vol_id)) {
 		int lnum = be32_to_cpu(vidh->lnum);
 
@@ -1049,7 +1076,12 @@ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai,
 	if (ec_err)
 		ubi_warn(ubi, "valid VID header but corrupted EC header at PEB %d",
 			 pnum);
-	err = ubi_add_to_av(ubi, ai, pnum, ec, vidh, bitflips);
+
+	if (ubi_is_fm_vol(vol_id))
+		err = add_fastmap(ai, pnum, vidh, ec);
+	else
+		err = ubi_add_to_av(ubi, ai, pnum, ec, vidh, bitflips);
+
 	if (err)
 		return err;
 
@@ -1198,6 +1230,10 @@ static void destroy_ai(struct ubi_attach_info *ai)
 		list_del(&aeb->u.list);
 		kmem_cache_free(ai->aeb_slab_cache, aeb);
 	}
+	list_for_each_entry_safe(aeb, aeb_tmp, &ai->fastmap, u.list) {
+		list_del(&aeb->u.list);
+		kmem_cache_free(ai->aeb_slab_cache, aeb);
+	}
 
 	/* Destroy the volume RB-tree */
 	rb = ai->volumes.rb_node;
@@ -1257,7 +1293,7 @@ static int scan_all(struct ubi_device *ubi, struct ubi_attach_info *ai,
 		cond_resched();
 
 		dbg_gen("process PEB %d", pnum);
-		err = scan_peb(ubi, ai, pnum, NULL, NULL);
+		err = scan_peb(ubi, ai, pnum);
 		if (err < 0)
 			goto out_vidh;
 	}
@@ -1323,6 +1359,7 @@ static struct ubi_attach_info *alloc_ai(void)
 	INIT_LIST_HEAD(&ai->free);
 	INIT_LIST_HEAD(&ai->erase);
 	INIT_LIST_HEAD(&ai->alien);
+	INIT_LIST_HEAD(&ai->fastmap);
 	ai->volumes = RB_ROOT;
 	ai->aeb_slab_cache = kmem_cache_create("ubi_aeb_slab_cache",
 					       sizeof(struct ubi_ainf_peb),
@@ -1349,52 +1386,54 @@ static struct ubi_attach_info *alloc_ai(void)
  */
 static int scan_fast(struct ubi_device *ubi, struct ubi_attach_info **ai)
 {
-	int err, pnum, fm_anchor = -1;
-	unsigned long long max_sqnum = 0;
+	int err, pnum;
+	struct ubi_attach_info *scan_ai;
 
 	err = -ENOMEM;
 
+	scan_ai = alloc_ai();
+	if (!scan_ai)
+		goto out;
+
 	ech = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL);
 	if (!ech)
-		goto out;
+		goto out_ai;
 
 	vidh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL);
 	if (!vidh)
 		goto out_ech;
 
 	for (pnum = 0; pnum < UBI_FM_MAX_START; pnum++) {
-		int vol_id = -1;
-		unsigned long long sqnum = -1;
 		cond_resched();
 
 		dbg_gen("process PEB %d", pnum);
-		err = scan_peb(ubi, *ai, pnum, &vol_id, &sqnum);
+		err = scan_peb(ubi, scan_ai, pnum);
 		if (err < 0)
 			goto out_vidh;
-
-		if (vol_id == UBI_FM_SB_VOLUME_ID && sqnum > max_sqnum) {
-			max_sqnum = sqnum;
-			fm_anchor = pnum;
-		}
 	}
 
 	ubi_free_vid_hdr(ubi, vidh);
 	kfree(ech);
 
-	if (fm_anchor < 0)
-		return UBI_NO_FASTMAP;
+	err = ubi_scan_fastmap(ubi, *ai, scan_ai);
+	if (err) {
+		/*
+		 * Didn't attach via fastmap, do a full scan but reuse what
+		 * we've aready scanned.
+		 */
+		destroy_ai(*ai);
+		*ai = scan_ai;
+	} else
+		destroy_ai(scan_ai);
 
-	destroy_ai(*ai);
-	*ai = alloc_ai();
-	if (!*ai)
-		return -ENOMEM;
-
-	return ubi_scan_fastmap(ubi, *ai, fm_anchor);
+	return err;
 
 out_vidh:
 	ubi_free_vid_hdr(ubi, vidh);
 out_ech:
 	kfree(ech);
+out_ai:
+	destroy_ai(scan_ai);
 out:
 	return err;
 }
diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c
index bba7dd1b5ebf..e724a363cef3 100644
--- a/drivers/mtd/ubi/fastmap.c
+++ b/drivers/mtd/ubi/fastmap.c
@@ -850,28 +850,58 @@ static int ubi_attach_fastmap(struct ubi_device *ubi,
 	return ret;
 }
 
+/**
+ * find_fm_anchor - find the most recent Fastmap superblock (anchor)
+ * @ai: UBI attach info to be filled
+ */
+static int find_fm_anchor(struct ubi_attach_info *ai)
+{
+	int ret = -1;
+	struct ubi_ainf_peb *aeb;
+	unsigned long long max_sqnum = 0;
+
+	list_for_each_entry(aeb, &ai->fastmap, u.list) {
+		if (aeb->vol_id == UBI_FM_SB_VOLUME_ID && aeb->sqnum > max_sqnum) {
+			max_sqnum = aeb->sqnum;
+			ret = aeb->pnum;
+		}
+	}
+
+	return ret;
+}
+
 /**
  * ubi_scan_fastmap - scan the fastmap.
  * @ubi: UBI device object
  * @ai: UBI attach info to be filled
- * @fm_anchor: The fastmap starts at this PEB
+ * @scan_ai: UBI attach info from the first 64 PEBs,
+ *           used to find the most recent Fastmap data structure
  *
  * Returns 0 on success, UBI_NO_FASTMAP if no fastmap was found,
  * UBI_BAD_FASTMAP if one was found but is not usable.
  * < 0 indicates an internal error.
  */
 int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai,
-		     int fm_anchor)
+		     struct ubi_attach_info *scan_ai)
 {
 	struct ubi_fm_sb *fmsb, *fmsb2;
 	struct ubi_vid_hdr *vh;
 	struct ubi_ec_hdr *ech;
 	struct ubi_fastmap_layout *fm;
-	int i, used_blocks, pnum, ret = 0;
+	struct ubi_ainf_peb *tmp_aeb, *aeb;
+	int i, used_blocks, pnum, fm_anchor, ret = 0;
 	size_t fm_size;
 	__be32 crc, tmp_crc;
 	unsigned long long sqnum = 0;
 
+	fm_anchor = find_fm_anchor(scan_ai);
+	if (fm_anchor < 0)
+		return UBI_NO_FASTMAP;
+
+	/* Move all (possible) fastmap blocks into our new attach structure. */
+	list_for_each_entry_safe(aeb, tmp_aeb, &scan_ai->fastmap, u.list)
+		list_move_tail(&aeb->u.list, &ai->fastmap);
+
 	down_write(&ubi->fm_protect);
 	memset(ubi->fm_buf, 0, ubi->fm_size);
 
diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index 086ff56922b5..051976caf8f0 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -699,6 +699,8 @@ struct ubi_ainf_volume {
  * @erase: list of physical eraseblocks which have to be erased
  * @alien: list of physical eraseblocks which should not be used by UBI (e.g.,
  *         those belonging to "preserve"-compatible internal volumes)
+ * @fastmap: list of physical eraseblocks which relate to fastmap (e.g.,
+ *           eraseblocks of the current and not yet erased old fastmap blocks)
  * @corr_peb_count: count of PEBs in the @corr list
  * @empty_peb_count: count of PEBs which are presumably empty (contain only
  *                   0xFF bytes)
@@ -727,6 +729,7 @@ struct ubi_attach_info {
 	struct list_head free;
 	struct list_head erase;
 	struct list_head alien;
+	struct list_head fastmap;
 	int corr_peb_count;
 	int empty_peb_count;
 	int alien_peb_count;
@@ -907,7 +910,7 @@ int ubi_compare_lebs(struct ubi_device *ubi, const struct ubi_ainf_peb *aeb,
 size_t ubi_calc_fm_size(struct ubi_device *ubi);
 int ubi_update_fastmap(struct ubi_device *ubi);
 int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai,
-		     int fm_anchor);
+		     struct ubi_attach_info *scan_ai);
 #else
 static inline int ubi_update_fastmap(struct ubi_device *ubi) { return 0; }
 #endif
@@ -1116,4 +1119,27 @@ static inline bool ubi_is_fm_vol(int vol_id)
 	return false;
 }
 
+/**
+ * ubi_find_fm_block - check whether a PEB is part of the current Fastmap.
+ * @ubi: UBI device description object
+ * @pnum: physical eraseblock to look for
+ *
+ * This function returns a wear leveling object if @pnum relates to the current
+ * fastmap, @NULL otherwise.
+ */
+static inline struct ubi_wl_entry *ubi_find_fm_block(const struct ubi_device *ubi,
+						     int pnum)
+{
+	int i;
+
+	if (ubi->fm) {
+		for (i = 0; i < ubi->fm->used_blocks; i++) {
+			if (ubi->fm->e[i]->pnum == pnum)
+				return ubi->fm->e[i];
+		}
+	}
+
+	return NULL;
+}
+
 #endif /* !__UBI_UBI_H__ */
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index ca9746f41ff1..7eb1f3fb16e2 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -1598,19 +1598,44 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai)
 		}
 	}
 
+	list_for_each_entry(aeb, &ai->fastmap, u.list) {
+		cond_resched();
+
+		e = ubi_find_fm_block(ubi, aeb->pnum);
+
+		if (e) {
+			ubi_assert(!ubi->lookuptbl[e->pnum]);
+			ubi->lookuptbl[e->pnum] = e;
+		} else {
+			/*
+			 * Usually old Fastmap PEBs are scheduled for erasure
+			 * and we don't have to care about them but if we face
+			 * an power cut before scheduling them we need to
+			 * take care of them here.
+			 */
+			if (ubi->lookuptbl[aeb->pnum])
+				continue;
+
+			e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
+			if (!e)
+				goto out_free;
+
+			e->pnum = aeb->pnum;
+			e->ec = aeb->ec;
+			ubi_assert(!ubi->lookuptbl[e->pnum]);
+			ubi->lookuptbl[e->pnum] = e;
+			if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0)) {
+				wl_entry_destroy(ubi, e);
+				goto out_free;
+			}
+		}
+
+		found_pebs++;
+	}
+
 	dbg_wl("found %i PEBs", found_pebs);
 
-	if (ubi->fm) {
-		ubi_assert(ubi->good_peb_count ==
-			   found_pebs + ubi->fm->used_blocks);
-
-		for (i = 0; i < ubi->fm->used_blocks; i++) {
-			e = ubi->fm->e[i];
-			ubi->lookuptbl[e->pnum] = e;
-		}
-	}
-	else
-		ubi_assert(ubi->good_peb_count == found_pebs);
+	ubi_assert(ubi->good_peb_count == found_pebs);
 
 	reserved_pebs = WL_RESERVED_PEBS;
 	ubi_fastmap_init(ubi, &reserved_pebs);

From faf2b8d929a47809eab04f17e21f44ebae377dc6 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Tue, 14 Jun 2016 10:12:17 +0200
Subject: [PATCH 104/519] ubi: Be more paranoid while seaching for the most
 recent Fastmap

commit 74f2c6e9a47cf4e508198c8594626cc82906a13d upstream.

Since PEB erasure is asynchornous it can happen that there is
more than one Fastmap on the MTD. This is fine because the attach logic
will pick the Fastmap data structure with the highest sequence number.

On a not so well configured MTD stack spurious ECC errors are common.
Causes can be different, bad hardware, wrong operating modes, etc...
If the most current Fastmap renders bad due to ECC errors UBI might
pick an older Fastmap to attach from.
While this can only happen on an anyway broken setup it will show
completely different sympthoms and makes finding the root cause much
more difficult.
So, be debug friendly and fall back to scanning mode of we're facing
an ECC error while scanning for Fastmap.

Cc: <stable@vger.kernel.org>
Signed-off-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/ubi/attach.c | 28 ++++++++++++++++++++++++----
 drivers/mtd/ubi/ubi.h    |  3 +++
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/drivers/mtd/ubi/attach.c b/drivers/mtd/ubi/attach.c
index 68ff57435c89..5cde3ad1665e 100644
--- a/drivers/mtd/ubi/attach.c
+++ b/drivers/mtd/ubi/attach.c
@@ -856,13 +856,15 @@ static bool vol_ignored(int vol_id)
  * @ubi: UBI device description object
  * @ai: attaching information
  * @pnum: the physical eraseblock number
+ * @fast: true if we're scanning for a Fastmap
  *
  * This function reads UBI headers of PEB @pnum, checks them, and adds
  * information about this PEB to the corresponding list or RB-tree in the
  * "attaching info" structure. Returns zero if the physical eraseblock was
  * successfully handled and a negative error code in case of failure.
  */
-static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai, int pnum)
+static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai,
+		    int pnum, bool fast)
 {
 	long long ec;
 	int err, bitflips = 0, vol_id = -1, ec_err = 0;
@@ -980,6 +982,20 @@ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai, int pnum
 			 */
 			ai->maybe_bad_peb_count += 1;
 	case UBI_IO_BAD_HDR:
+			/*
+			 * If we're facing a bad VID header we have to drop *all*
+			 * Fastmap data structures we find. The most recent Fastmap
+			 * could be bad and therefore there is a chance that we attach
+			 * from an old one. On a fine MTD stack a PEB must not render
+			 * bad all of a sudden, but the reality is different.
+			 * So, let's be paranoid and help finding the root cause by
+			 * falling back to scanning mode instead of attaching with a
+			 * bad EBA table and cause data corruption which is hard to
+			 * analyze.
+			 */
+			if (fast)
+				ai->force_full_scan = 1;
+
 		if (ec_err)
 			/*
 			 * Both headers are corrupted. There is a possibility
@@ -1293,7 +1309,7 @@ static int scan_all(struct ubi_device *ubi, struct ubi_attach_info *ai,
 		cond_resched();
 
 		dbg_gen("process PEB %d", pnum);
-		err = scan_peb(ubi, ai, pnum);
+		err = scan_peb(ubi, ai, pnum, false);
 		if (err < 0)
 			goto out_vidh;
 	}
@@ -1407,7 +1423,7 @@ static int scan_fast(struct ubi_device *ubi, struct ubi_attach_info **ai)
 		cond_resched();
 
 		dbg_gen("process PEB %d", pnum);
-		err = scan_peb(ubi, scan_ai, pnum);
+		err = scan_peb(ubi, scan_ai, pnum, true);
 		if (err < 0)
 			goto out_vidh;
 	}
@@ -1415,7 +1431,11 @@ static int scan_fast(struct ubi_device *ubi, struct ubi_attach_info **ai)
 	ubi_free_vid_hdr(ubi, vidh);
 	kfree(ech);
 
-	err = ubi_scan_fastmap(ubi, *ai, scan_ai);
+	if (scan_ai->force_full_scan)
+		err = UBI_NO_FASTMAP;
+	else
+		err = ubi_scan_fastmap(ubi, *ai, scan_ai);
+
 	if (err) {
 		/*
 		 * Didn't attach via fastmap, do a full scan but reuse what
diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index 051976caf8f0..05d9ec66437c 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -711,6 +711,8 @@ struct ubi_ainf_volume {
  * @vols_found: number of volumes found
  * @highest_vol_id: highest volume ID
  * @is_empty: flag indicating whether the MTD device is empty or not
+ * @force_full_scan: flag indicating whether we need to do a full scan and drop
+		     all existing Fastmap data structures
  * @min_ec: lowest erase counter value
  * @max_ec: highest erase counter value
  * @max_sqnum: highest sequence number value
@@ -738,6 +740,7 @@ struct ubi_attach_info {
 	int vols_found;
 	int highest_vol_id;
 	int is_empty;
+	int force_full_scan;
 	int min_ec;
 	int max_ec;
 	unsigned long long max_sqnum;

From 000b4c28bb28d471662a7d8fed80c9f511afe4cf Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Wed, 24 Aug 2016 14:36:14 +0200
Subject: [PATCH 105/519] ubi: Fix races around ubi_refill_pools()

commit 2e8f08deabbc7eefe4c5838aaa6aa9a23a8acf2e upstream.

When writing a new Fastmap the first thing that happens
is refilling the pools in memory.
At this stage it is possible that new PEBs from the new pools
get already claimed and written with data.
If this happens before the new Fastmap data structure hits the
flash and we face power cut the freshly written PEB will not
scanned and unnoticed.

Solve the issue by locking the pools until Fastmap is written.

Cc: <stable@vger.kernel.org>
Fixes: dbb7d2a88d ("UBI: Add fastmap core")
Signed-off-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/ubi/eba.c        |  4 ++--
 drivers/mtd/ubi/fastmap-wl.c |  6 ++++--
 drivers/mtd/ubi/fastmap.c    | 14 ++++++++++----
 drivers/mtd/ubi/wl.c         | 20 ++++++++++++++------
 4 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index c4a25c858c07..03cf0553ec1b 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -1178,6 +1178,8 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
 	struct ubi_volume *vol;
 	uint32_t crc;
 
+	ubi_assert(rwsem_is_locked(&ubi->fm_eba_sem));
+
 	vol_id = be32_to_cpu(vid_hdr->vol_id);
 	lnum = be32_to_cpu(vid_hdr->lnum);
 
@@ -1346,9 +1348,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
 	}
 
 	ubi_assert(vol->eba_tbl[lnum] == from);
-	down_read(&ubi->fm_eba_sem);
 	vol->eba_tbl[lnum] = to;
-	up_read(&ubi->fm_eba_sem);
 
 out_unlock_buf:
 	mutex_unlock(&ubi->buf_mutex);
diff --git a/drivers/mtd/ubi/fastmap-wl.c b/drivers/mtd/ubi/fastmap-wl.c
index ed62f1efe6eb..69dd21679a30 100644
--- a/drivers/mtd/ubi/fastmap-wl.c
+++ b/drivers/mtd/ubi/fastmap-wl.c
@@ -262,6 +262,8 @@ static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi)
 	struct ubi_fm_pool *pool = &ubi->fm_wl_pool;
 	int pnum;
 
+	ubi_assert(rwsem_is_locked(&ubi->fm_eba_sem));
+
 	if (pool->used == pool->size) {
 		/* We cannot update the fastmap here because this
 		 * function is called in atomic context.
@@ -303,7 +305,7 @@ int ubi_ensure_anchor_pebs(struct ubi_device *ubi)
 
 	wrk->anchor = 1;
 	wrk->func = &wear_leveling_worker;
-	schedule_ubi_work(ubi, wrk);
+	__schedule_ubi_work(ubi, wrk);
 	return 0;
 }
 
@@ -344,7 +346,7 @@ int ubi_wl_put_fm_peb(struct ubi_device *ubi, struct ubi_wl_entry *fm_e,
 	spin_unlock(&ubi->wl_lock);
 
 	vol_id = lnum ? UBI_FM_DATA_VOLUME_ID : UBI_FM_SB_VOLUME_ID;
-	return schedule_erase(ubi, e, vol_id, lnum, torture);
+	return schedule_erase(ubi, e, vol_id, lnum, torture, true);
 }
 
 /**
diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c
index e724a363cef3..c5477299b66b 100644
--- a/drivers/mtd/ubi/fastmap.c
+++ b/drivers/mtd/ubi/fastmap.c
@@ -1514,22 +1514,30 @@ int ubi_update_fastmap(struct ubi_device *ubi)
 	struct ubi_wl_entry *tmp_e;
 
 	down_write(&ubi->fm_protect);
+	down_write(&ubi->work_sem);
+	down_write(&ubi->fm_eba_sem);
 
 	ubi_refill_pools(ubi);
 
 	if (ubi->ro_mode || ubi->fm_disabled) {
+		up_write(&ubi->fm_eba_sem);
+		up_write(&ubi->work_sem);
 		up_write(&ubi->fm_protect);
 		return 0;
 	}
 
 	ret = ubi_ensure_anchor_pebs(ubi);
 	if (ret) {
+		up_write(&ubi->fm_eba_sem);
+		up_write(&ubi->work_sem);
 		up_write(&ubi->fm_protect);
 		return ret;
 	}
 
 	new_fm = kzalloc(sizeof(*new_fm), GFP_KERNEL);
 	if (!new_fm) {
+		up_write(&ubi->fm_eba_sem);
+		up_write(&ubi->work_sem);
 		up_write(&ubi->fm_protect);
 		return -ENOMEM;
 	}
@@ -1638,16 +1646,14 @@ int ubi_update_fastmap(struct ubi_device *ubi)
 		new_fm->e[0] = tmp_e;
 	}
 
-	down_write(&ubi->work_sem);
-	down_write(&ubi->fm_eba_sem);
 	ret = ubi_write_fastmap(ubi, new_fm);
-	up_write(&ubi->fm_eba_sem);
-	up_write(&ubi->work_sem);
 
 	if (ret)
 		goto err;
 
 out_unlock:
+	up_write(&ubi->fm_eba_sem);
+	up_write(&ubi->work_sem);
 	up_write(&ubi->fm_protect);
 	kfree(old_fm);
 	return ret;
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index 7eb1f3fb16e2..d49e96f94f46 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -580,7 +580,7 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
  * failure.
  */
 static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
-			  int vol_id, int lnum, int torture)
+			  int vol_id, int lnum, int torture, bool nested)
 {
 	struct ubi_work *wl_wrk;
 
@@ -599,7 +599,10 @@ static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
 	wl_wrk->lnum = lnum;
 	wl_wrk->torture = torture;
 
-	schedule_ubi_work(ubi, wl_wrk);
+	if (nested)
+		__schedule_ubi_work(ubi, wl_wrk);
+	else
+		schedule_ubi_work(ubi, wl_wrk);
 	return 0;
 }
 
@@ -658,6 +661,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 	if (!vid_hdr)
 		return -ENOMEM;
 
+	down_read(&ubi->fm_eba_sem);
 	mutex_lock(&ubi->move_mutex);
 	spin_lock(&ubi->wl_lock);
 	ubi_assert(!ubi->move_from && !ubi->move_to);
@@ -884,6 +888,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 
 	dbg_wl("done");
 	mutex_unlock(&ubi->move_mutex);
+	up_read(&ubi->fm_eba_sem);
 	return 0;
 
 	/*
@@ -925,6 +930,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 	}
 
 	mutex_unlock(&ubi->move_mutex);
+	up_read(&ubi->fm_eba_sem);
 	return 0;
 
 out_error:
@@ -946,6 +952,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 out_ro:
 	ubi_ro_mode(ubi);
 	mutex_unlock(&ubi->move_mutex);
+	up_read(&ubi->fm_eba_sem);
 	ubi_assert(err != 0);
 	return err < 0 ? err : -EIO;
 
@@ -953,6 +960,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 	ubi->wl_scheduled = 0;
 	spin_unlock(&ubi->wl_lock);
 	mutex_unlock(&ubi->move_mutex);
+	up_read(&ubi->fm_eba_sem);
 	ubi_free_vid_hdr(ubi, vid_hdr);
 	return 0;
 }
@@ -1075,7 +1083,7 @@ static int __erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk)
 		int err1;
 
 		/* Re-schedule the LEB for erasure */
-		err1 = schedule_erase(ubi, e, vol_id, lnum, 0);
+		err1 = schedule_erase(ubi, e, vol_id, lnum, 0, false);
 		if (err1) {
 			wl_entry_destroy(ubi, e);
 			err = err1;
@@ -1256,7 +1264,7 @@ int ubi_wl_put_peb(struct ubi_device *ubi, int vol_id, int lnum,
 	}
 	spin_unlock(&ubi->wl_lock);
 
-	err = schedule_erase(ubi, e, vol_id, lnum, torture);
+	err = schedule_erase(ubi, e, vol_id, lnum, torture, false);
 	if (err) {
 		spin_lock(&ubi->wl_lock);
 		wl_tree_add(e, &ubi->used);
@@ -1544,7 +1552,7 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai)
 		e->pnum = aeb->pnum;
 		e->ec = aeb->ec;
 		ubi->lookuptbl[e->pnum] = e;
-		if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0)) {
+		if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false)) {
 			wl_entry_destroy(ubi, e);
 			goto out_free;
 		}
@@ -1624,7 +1632,7 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai)
 			e->ec = aeb->ec;
 			ubi_assert(!ubi->lookuptbl[e->pnum]);
 			ubi->lookuptbl[e->pnum] = e;
-			if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0)) {
+			if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false)) {
 				wl_entry_destroy(ubi, e);
 				goto out_free;
 			}

From a5f958c4eadb8c9214c75b69330d4b5aa03d16e6 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Wed, 24 Aug 2016 14:36:15 +0200
Subject: [PATCH 106/519] ubi: Fix Fastmap's update_vol()

commit f7d11b33d4e8cedf19367c09b891bbc705163976 upstream.

Usually Fastmap is free to consider every PEB in one of the pools
as newer than the existing PEB. Since PEBs in a pool are by definition
newer than everything else.
But update_vol() missed the case that a pool can contain more than
one candidate.

Cc: <stable@vger.kernel.org>
Fixes: dbb7d2a88d ("UBI: Add fastmap core")
Signed-off-by: Richard Weinberger <richard@nod.at>
Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/ubi/fastmap.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c
index c5477299b66b..72e89b352034 100644
--- a/drivers/mtd/ubi/fastmap.c
+++ b/drivers/mtd/ubi/fastmap.c
@@ -326,6 +326,7 @@ static int update_vol(struct ubi_device *ubi, struct ubi_attach_info *ai,
 			aeb->pnum = new_aeb->pnum;
 			aeb->copy_flag = new_vh->copy_flag;
 			aeb->scrub = new_aeb->scrub;
+			aeb->sqnum = new_aeb->sqnum;
 			kmem_cache_free(ai->aeb_slab_cache, new_aeb);
 
 		/* new_aeb is older */

From f891ee97d9df8407ba1a46f9a7b89d8d57a70b7a Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Tue, 5 Dec 2017 16:01:20 +0100
Subject: [PATCH 107/519] ubi: fastmap: Erase outdated anchor PEBs during
 attach

commit f78e5623f45bab2b726eec29dc5cefbbab2d0b1c upstream.

The fastmap update code might erase the current fastmap anchor PEB
in case it doesn't find any new free PEB. When a power cut happens
in this situation we must not have any outdated fastmap anchor PEB
on the device, because that would be used to attach during next
boot.
The easiest way to make that sure is to erase all outdated fastmap
anchor PEBs synchronously during attach.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Reviewed-by: Richard Weinberger <richard@nod.at>
Fixes: dbb7d2a88d2a ("UBI: Add fastmap core")
Cc: <stable@vger.kernel.org>
Signed-off-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/ubi/wl.c | 77 ++++++++++++++++++++++++++++++++------------
 1 file changed, 57 insertions(+), 20 deletions(-)

diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index d49e96f94f46..b3c1b8106a68 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -1508,6 +1508,46 @@ static void shutdown_work(struct ubi_device *ubi)
 	}
 }
 
+/**
+ * erase_aeb - erase a PEB given in UBI attach info PEB
+ * @ubi: UBI device description object
+ * @aeb: UBI attach info PEB
+ * @sync: If true, erase synchronously. Otherwise schedule for erasure
+ */
+static int erase_aeb(struct ubi_device *ubi, struct ubi_ainf_peb *aeb, bool sync)
+{
+	struct ubi_wl_entry *e;
+	int err;
+
+	e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
+	if (!e)
+		return -ENOMEM;
+
+	e->pnum = aeb->pnum;
+	e->ec = aeb->ec;
+	ubi->lookuptbl[e->pnum] = e;
+
+	if (sync) {
+		err = sync_erase(ubi, e, false);
+		if (err)
+			goto out_free;
+
+		wl_tree_add(e, &ubi->free);
+		ubi->free_count++;
+	} else {
+		err = schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false);
+		if (err)
+			goto out_free;
+	}
+
+	return 0;
+
+out_free:
+	wl_entry_destroy(ubi, e);
+
+	return err;
+}
+
 /**
  * ubi_wl_init - initialize the WL sub-system using attaching information.
  * @ubi: UBI device description object
@@ -1545,18 +1585,10 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai)
 	list_for_each_entry_safe(aeb, tmp, &ai->erase, u.list) {
 		cond_resched();
 
-		e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
-		if (!e)
+		err = erase_aeb(ubi, aeb, false);
+		if (err)
 			goto out_free;
 
-		e->pnum = aeb->pnum;
-		e->ec = aeb->ec;
-		ubi->lookuptbl[e->pnum] = e;
-		if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false)) {
-			wl_entry_destroy(ubi, e);
-			goto out_free;
-		}
-
 		found_pebs++;
 	}
 
@@ -1615,6 +1647,8 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai)
 			ubi_assert(!ubi->lookuptbl[e->pnum]);
 			ubi->lookuptbl[e->pnum] = e;
 		} else {
+			bool sync = false;
+
 			/*
 			 * Usually old Fastmap PEBs are scheduled for erasure
 			 * and we don't have to care about them but if we face
@@ -1624,18 +1658,21 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai)
 			if (ubi->lookuptbl[aeb->pnum])
 				continue;
 
-			e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
-			if (!e)
-				goto out_free;
+			/*
+			 * The fastmap update code might not find a free PEB for
+			 * writing the fastmap anchor to and then reuses the
+			 * current fastmap anchor PEB. When this PEB gets erased
+			 * and a power cut happens before it is written again we
+			 * must make sure that the fastmap attach code doesn't
+			 * find any outdated fastmap anchors, hence we erase the
+			 * outdated fastmap anchor PEBs synchronously here.
+			 */
+			if (aeb->vol_id == UBI_FM_SB_VOLUME_ID)
+				sync = true;
 
-			e->pnum = aeb->pnum;
-			e->ec = aeb->ec;
-			ubi_assert(!ubi->lookuptbl[e->pnum]);
-			ubi->lookuptbl[e->pnum] = e;
-			if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false)) {
-				wl_entry_destroy(ubi, e);
+			err = erase_aeb(ubi, aeb, sync);
+			if (err)
 				goto out_free;
-			}
 		}
 
 		found_pebs++;

From 762b585c492fedda1b0bc4c6d0a867307bf7cd0f Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 25 Jul 2018 10:18:33 +0200
Subject: [PATCH 108/519] Linux 4.4.144

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 54690fee0485..63f3e2438a26 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 4
-SUBLEVEL = 143
+SUBLEVEL = 144
 EXTRAVERSION =
 NAME = Blurry Fish Butt
 

From 54a634c43ae75031c03126957edf667dba1439ec Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 20 Jul 2018 13:58:21 +0200
Subject: [PATCH 109/519] MIPS: ath79: fix register address in
 ath79_ddr_wb_flush()

commit bc88ad2efd11f29e00a4fd60fcd1887abfe76833 upstream.

ath79_ddr_wb_flush_base has the type void __iomem *, so register offsets
need to be a multiple of 4 in order to access the intended register.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Fixes: 24b0e3e84fbf ("MIPS: ath79: Improve the DDR controller interface")
Patchwork: https://patchwork.linux-mips.org/patch/19912/
Cc: Alban Bedel <albeu@free.fr>
Cc: James Hogan <jhogan@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org # 4.2+
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/ath79/common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/ath79/common.c b/arch/mips/ath79/common.c
index 8ae4067a5eda..40ecb6e700cd 100644
--- a/arch/mips/ath79/common.c
+++ b/arch/mips/ath79/common.c
@@ -58,7 +58,7 @@ EXPORT_SYMBOL_GPL(ath79_ddr_ctrl_init);
 
 void ath79_ddr_wb_flush(u32 reg)
 {
-	void __iomem *flush_reg = ath79_ddr_wb_flush_base + reg;
+	void __iomem *flush_reg = ath79_ddr_wb_flush_base + (reg * 4);
 
 	/* Flush the DDR write buffer. */
 	__raw_writel(0x1, flush_reg);

From 48f41c0c5781e3450830f8ec68513bfa18a5451b Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 23 Jul 2018 16:50:48 +0200
Subject: [PATCH 110/519] ip: hash fragments consistently

[ Upstream commit 3dd1c9a1270736029ffca670e9bd0265f4120600 ]

The skb hash for locally generated ip[v6] fragments belonging
to the same datagram can vary in several circumstances:
* for connected UDP[v6] sockets, the first fragment get its hash
  via set_owner_w()/skb_set_hash_from_sk()
* for unconnected IPv6 UDPv6 sockets, the first fragment can get
  its hash via ip6_make_flowlabel()/skb_get_hash_flowi6(), if
  auto_flowlabel is enabled

For the following frags the hash is usually computed via
skb_get_hash().
The above can cause OoO for unconnected IPv6 UDPv6 socket: in that
scenario the egress tx queue can be selected on a per packet basis
via the skb hash.
It may also fool flow-oriented schedulers to place fragments belonging
to the same datagram in different flows.

Fix the issue by copying the skb hash from the head frag into
the others at fragmentation time.

Before this commit:
perf probe -a "dev_queue_xmit skb skb->hash skb->l4_hash:b1@0/8 skb->sw_hash:b1@1/8"
netperf -H $IPV4 -t UDP_STREAM -l 5 -- -m 2000 -n &
perf record -e probe:dev_queue_xmit -e probe:skb_set_owner_w -a sleep 0.1
perf script
probe:dev_queue_xmit: (ffffffff8c6b1b20) hash=3713014309 l4_hash=1 sw_hash=0
probe:dev_queue_xmit: (ffffffff8c6b1b20) hash=0 l4_hash=0 sw_hash=0

After this commit:
probe:dev_queue_xmit: (ffffffff8c6b1b20) hash=2171763177 l4_hash=1 sw_hash=0
probe:dev_queue_xmit: (ffffffff8c6b1b20) hash=2171763177 l4_hash=1 sw_hash=0

Fixes: b73c3d0e4f0e ("net: Save TX flow hash in sock and set in skbuf on xmit")
Fixes: 67800f9b1f4e ("ipv6: Call skb_get_hash_flowi6 to get skb->hash in ip6_make_flowlabel")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/ip_output.c  | 2 ++
 net/ipv6/ip6_output.c | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 10286432f684..c11bb6d2d00a 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -480,6 +480,8 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 	to->dev = from->dev;
 	to->mark = from->mark;
 
+	skb_copy_hash(to, from);
+
 	/* Copy the flags to each fragment. */
 	IPCB(to)->flags = IPCB(from)->flags;
 
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 74786783834b..0feede45bd28 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -559,6 +559,8 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 	to->dev = from->dev;
 	to->mark = from->mark;
 
+	skb_copy_hash(to, from);
+
 #ifdef CONFIG_NET_SCHED
 	to->tc_index = from->tc_index;
 #endif

From 73dad08709fee646875f5a81c07b05fb54582732 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Tue, 24 Jul 2018 14:27:55 +0300
Subject: [PATCH 111/519] net/mlx4_core: Save the qpn from the input modifier
 in RST2INIT wrapper

[ Upstream commit 958c696f5a7274d9447a458ad7aa70719b29a50a ]

Function mlx4_RST2INIT_QP_wrapper saved the qp number passed in the qp
context, rather than the one passed in the input modifier.

However, the qp number in the qp context is not defined as a
required parameter by the FW. Therefore, drivers may choose to not
specify the qp number in the qp context for the reset-to-init transition.

Thus, we must save the qp number passed in the command input modifier --
which is always present. (This saved qp number is used as the input
modifier for command 2RST_QP when a slave's qp's are destroyed).

Fixes: c82e9aa0a8bc ("mlx4_core: resource tracking for HCA resources used by guests")
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index e3080fbd9d00..7911dc3da98e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -2891,7 +2891,7 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
 	u32 srqn = qp_get_srqn(qpc) & 0xffffff;
 	int use_srq = (qp_get_srqn(qpc) >> 24) & 1;
 	struct res_srq *srq;
-	int local_qpn = be32_to_cpu(qpc->local_qpn) & 0xffffff;
+	int local_qpn = vhcr->in_modifier & 0xffffff;
 
 	err = adjust_qp_sched_queue(dev, slave, qpc, inbox);
 	if (err)

From b04c9a08710606e2fa0a16f24541a270c00ef58c Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Fri, 20 Jul 2018 13:21:01 -0700
Subject: [PATCH 112/519] rtnetlink: add rtnl_link_state check in
 rtnl_configure_link

[ Upstream commit 5025f7f7d506fba9b39e7fe8ca10f6f34cb9bc2d ]

rtnl_configure_link sets dev->rtnl_link_state to
RTNL_LINK_INITIALIZED and unconditionally calls
__dev_notify_flags to notify user-space of dev flags.

current call sequence for rtnl_configure_link
rtnetlink_newlink
    rtnl_link_ops->newlink
    rtnl_configure_link (unconditionally notifies userspace of
                         default and new dev flags)

If a newlink handler wants to call rtnl_configure_link
early, we will end up with duplicate notifications to
user-space.

This patch fixes rtnl_configure_link to check rtnl_link_state
and call __dev_notify_flags with gchanges = 0 if already
RTNL_LINK_INITIALIZED.

Later in the series, this patch will help the following sequence
where a driver implementing newlink can call rtnl_configure_link
to initialize the link early.

makes the following call sequence work:
rtnetlink_newlink
    rtnl_link_ops->newlink (vxlan) -> rtnl_configure_link (initializes
                                                link and notifies
                                                user-space of default
                                                dev flags)
    rtnl_configure_link (updates dev flags if requested by user ifm
                         and notifies user-space of new dev flags)

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/core/rtnetlink.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2017ffa5197a..96c9c0f0905a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2087,9 +2087,12 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
 			return err;
 	}
 
-	dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
-
-	__dev_notify_flags(dev, old_flags, ~0U);
+	if (dev->rtnl_link_state == RTNL_LINK_INITIALIZED) {
+		__dev_notify_flags(dev, old_flags, 0U);
+	} else {
+		dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
+		__dev_notify_flags(dev, old_flags, ~0U);
+	}
 	return 0;
 }
 EXPORT_SYMBOL(rtnl_configure_link);

From 500e03f463835e74c75890d56d9a7ab63755aa2d Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Thu, 12 Jul 2018 06:04:52 -0700
Subject: [PATCH 113/519] tcp: fix dctcp delayed ACK schedule

[ Upstream commit b0c05d0e99d98d7f0cd41efc1eeec94efdc3325d ]

Previously, when a data segment was sent an ACK was piggybacked
on the data segment without generating a CA_EVENT_NON_DELAYED_ACK
event to notify congestion control modules. So the DCTCP
ca->delayed_ack_reserved flag could incorrectly stay set when
in fact there were no delayed ACKs being reserved. This could result
in sending a special ECN notification ACK that carries an older
ACK sequence, when in fact there was no need for such an ACK.
DCTCP keeps track of the delayed ACK status with its own separate
state ca->delayed_ack_reserved. Previously it may accidentally cancel
the delayed ACK without updating this field upon sending a special
ACK that carries a older ACK sequence. This inconsistency would
lead to DCTCP receiver never acknowledging the latest data until the
sender times out and retry in some cases.

Packetdrill script (provided by Larry Brakmo)

0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
0.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
0.000 setsockopt(3, SOL_TCP, TCP_CONGESTION, "dctcp", 5) = 0
0.000 bind(3, ..., ...) = 0
0.000 listen(3, 1) = 0

0.100 < [ect0] SEW 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
0.100 > SE. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
0.110 < [ect0] . 1:1(0) ack 1 win 257
0.200 accept(3, ..., ...) = 4

0.200 < [ect0] . 1:1001(1000) ack 1 win 257
0.200 > [ect01] . 1:1(0) ack 1001

0.200 write(4, ..., 1) = 1
0.200 > [ect01] P. 1:2(1) ack 1001

0.200 < [ect0] . 1001:2001(1000) ack 2 win 257
0.200 write(4, ..., 1) = 1
0.200 > [ect01] P. 2:3(1) ack 2001

0.200 < [ect0] . 2001:3001(1000) ack 3 win 257
0.200 < [ect0] . 3001:4001(1000) ack 3 win 257
0.200 > [ect01] . 3:3(0) ack 4001

0.210 < [ce] P. 4001:4501(500) ack 3 win 257

+0.001 read(4, ..., 4500) = 4500
+0 write(4, ..., 1) = 1
+0 > [ect01] PE. 3:4(1) ack 4501

+0.010 < [ect0] W. 4501:5501(1000) ack 4 win 257
// Previously the ACK sequence below would be 4501, causing a long RTO
+0.040~+0.045 > [ect01] . 4:4(0) ack 5501   // delayed ack

+0.311 < [ect0] . 5501:6501(1000) ack 4 win 257  // More data
+0 > [ect01] . 4:4(0) ack 6501     // now acks everything

+0.500 < F. 9501:9501(0) ack 4 win 257

Reported-by: Larry Brakmo <brakmo@fb.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Lawrence Brakmo <brakmo@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_dctcp.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 55d7da1d2ce9..e772679a5ded 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -134,7 +134,8 @@ static void dctcp_ce_state_0_to_1(struct sock *sk)
 	/* State has changed from CE=0 to CE=1 and delayed
 	 * ACK has not sent yet.
 	 */
-	if (!ca->ce_state && ca->delayed_ack_reserved) {
+	if (!ca->ce_state &&
+	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
 		u32 tmp_rcv_nxt;
 
 		/* Save current rcv_nxt. */
@@ -164,7 +165,8 @@ static void dctcp_ce_state_1_to_0(struct sock *sk)
 	/* State has changed from CE=1 to CE=0 and delayed
 	 * ACK has not sent yet.
 	 */
-	if (ca->ce_state && ca->delayed_ack_reserved) {
+	if (ca->ce_state &&
+	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
 		u32 tmp_rcv_nxt;
 
 		/* Save current rcv_nxt. */

From 17fea38e74ab24afb06970bbd9dc52db11a8034b Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Wed, 18 Jul 2018 13:56:34 -0700
Subject: [PATCH 114/519] tcp: helpers to send special DCTCP ack

[ Upstream commit 2987babb6982306509380fc11b450227a844493b ]

Refactor and create helpers to send the special ACK in DCTCP.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_output.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 2854db094864..cbba484d4f5e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -901,8 +901,8 @@ void tcp_wfree(struct sk_buff *skb)
  * We are working here with either a clone of the original
  * SKB, or a fresh unique copy made by the retransmit engine.
  */
-static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
-			    gfp_t gfp_mask)
+static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
+			      int clone_it, gfp_t gfp_mask, u32 rcv_nxt)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct inet_sock *inet;
@@ -962,7 +962,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	th->source		= inet->inet_sport;
 	th->dest		= inet->inet_dport;
 	th->seq			= htonl(tcb->seq);
-	th->ack_seq		= htonl(tp->rcv_nxt);
+	th->ack_seq		= htonl(rcv_nxt);
 	*(((__be16 *)th) + 6)	= htons(((tcp_header_size >> 2) << 12) |
 					tcb->tcp_flags);
 
@@ -1036,6 +1036,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	return net_xmit_eval(err);
 }
 
+static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
+			    gfp_t gfp_mask)
+{
+	return __tcp_transmit_skb(sk, skb, clone_it, gfp_mask,
+				  tcp_sk(sk)->rcv_nxt);
+}
+
 /* This routine just queues the buffer for sending.
  *
  * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames,
@@ -3354,7 +3361,7 @@ void tcp_send_delayed_ack(struct sock *sk)
 }
 
 /* This routine sends an ack and also updates the window. */
-void tcp_send_ack(struct sock *sk)
+void __tcp_send_ack(struct sock *sk, u32 rcv_nxt)
 {
 	struct sk_buff *buff;
 
@@ -3391,7 +3398,12 @@ void tcp_send_ack(struct sock *sk)
 
 	/* Send it off, this clears delayed acks for us. */
 	skb_mstamp_get(&buff->skb_mstamp);
-	tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC));
+	__tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC), rcv_nxt);
+}
+
+void tcp_send_ack(struct sock *sk)
+{
+	__tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt);
 }
 EXPORT_SYMBOL_GPL(tcp_send_ack);
 

From 0b1d40e9e7738e3396ce414b1c62b911c285dfa3 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Wed, 18 Jul 2018 13:56:35 -0700
Subject: [PATCH 115/519] tcp: do not cancel delay-AcK on DCTCP special ACK

[ Upstream commit 27cde44a259c380a3c09066fc4b42de7dde9b1ad ]

Currently when a DCTCP receiver delays an ACK and receive a
data packet with a different CE mark from the previous one's, it
sends two immediate ACKs acking previous and latest sequences
respectly (for ECN accounting).

Previously sending the first ACK may mark off the delayed ACK timer
(tcp_event_ack_sent). This may subsequently prevent sending the
second ACK to acknowledge the latest sequence (tcp_ack_snd_check).
The culprit is that tcp_send_ack() assumes it always acknowleges
the latest sequence, which is not true for the first special ACK.

The fix is to not make the assumption in tcp_send_ack and check the
actual ack sequence before cancelling the delayed ACK. Further it's
safer to pass the ack sequence number as a local variable into
tcp_send_ack routine, instead of intercepting tp->rcv_nxt to avoid
future bugs like this.

Reported-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/tcp.h     |  1 +
 net/ipv4/tcp_dctcp.c  | 34 ++++------------------------------
 net/ipv4/tcp_output.c | 11 ++++++++---
 3 files changed, 13 insertions(+), 33 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index a3696b778757..24ba8f005f01 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -559,6 +559,7 @@ void tcp_send_fin(struct sock *sk);
 void tcp_send_active_reset(struct sock *sk, gfp_t priority);
 int tcp_send_synack(struct sock *);
 void tcp_push_one(struct sock *, unsigned int mss_now);
+void __tcp_send_ack(struct sock *sk, u32 rcv_nxt);
 void tcp_send_ack(struct sock *sk);
 void tcp_send_delayed_ack(struct sock *sk);
 void tcp_send_loss_probe(struct sock *sk);
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index e772679a5ded..c48badee1cfa 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -135,21 +135,8 @@ static void dctcp_ce_state_0_to_1(struct sock *sk)
 	 * ACK has not sent yet.
 	 */
 	if (!ca->ce_state &&
-	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
-		u32 tmp_rcv_nxt;
-
-		/* Save current rcv_nxt. */
-		tmp_rcv_nxt = tp->rcv_nxt;
-
-		/* Generate previous ack with CE=0. */
-		tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
-		tp->rcv_nxt = ca->prior_rcv_nxt;
-
-		tcp_send_ack(sk);
-
-		/* Recover current rcv_nxt. */
-		tp->rcv_nxt = tmp_rcv_nxt;
-	}
+	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
+		__tcp_send_ack(sk, ca->prior_rcv_nxt);
 
 	ca->prior_rcv_nxt = tp->rcv_nxt;
 	ca->ce_state = 1;
@@ -166,21 +153,8 @@ static void dctcp_ce_state_1_to_0(struct sock *sk)
 	 * ACK has not sent yet.
 	 */
 	if (ca->ce_state &&
-	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
-		u32 tmp_rcv_nxt;
-
-		/* Save current rcv_nxt. */
-		tmp_rcv_nxt = tp->rcv_nxt;
-
-		/* Generate previous ack with CE=1. */
-		tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
-		tp->rcv_nxt = ca->prior_rcv_nxt;
-
-		tcp_send_ack(sk);
-
-		/* Recover current rcv_nxt. */
-		tp->rcv_nxt = tmp_rcv_nxt;
-	}
+	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
+		__tcp_send_ack(sk, ca->prior_rcv_nxt);
 
 	ca->prior_rcv_nxt = tp->rcv_nxt;
 	ca->ce_state = 0;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index cbba484d4f5e..6fa749ce231f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -177,8 +177,13 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
 }
 
 /* Account for an ACK we sent. */
-static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
+static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
+				      u32 rcv_nxt)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (unlikely(rcv_nxt != tp->rcv_nxt))
+		return;  /* Special ACK sent by DCTCP to reflect ECN */
 	tcp_dec_quickack_mode(sk, pkts);
 	inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
 }
@@ -1005,7 +1010,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
 	icsk->icsk_af_ops->send_check(sk, skb);
 
 	if (likely(tcb->tcp_flags & TCPHDR_ACK))
-		tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
+		tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt);
 
 	if (skb->len != tcp_header_size)
 		tcp_event_data_sent(tp, sk);
@@ -3400,12 +3405,12 @@ void __tcp_send_ack(struct sock *sk, u32 rcv_nxt)
 	skb_mstamp_get(&buff->skb_mstamp);
 	__tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC), rcv_nxt);
 }
+EXPORT_SYMBOL_GPL(__tcp_send_ack);
 
 void tcp_send_ack(struct sock *sk)
 {
 	__tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt);
 }
-EXPORT_SYMBOL_GPL(tcp_send_ack);
 
 /* This routine sends a packet with an out of date sequence
  * number. It assumes the other end will try to ack it.

From 255924ea891f647451af3acbc40a3730dcb3255e Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Wed, 18 Jul 2018 13:56:36 -0700
Subject: [PATCH 116/519] tcp: do not delay ACK in DCTCP upon CE status change

[ Upstream commit a0496ef2c23b3b180902dd185d0d63ccbc624cf8 ]

Per DCTCP RFC8257 (Section 3.2) the ACK reflecting the CE status change
has to be sent immediately so the sender can respond quickly:

""" When receiving packets, the CE codepoint MUST be processed as follows:

   1.  If the CE codepoint is set and DCTCP.CE is false, set DCTCP.CE to
       true and send an immediate ACK.

   2.  If the CE codepoint is not set and DCTCP.CE is true, set DCTCP.CE
       to false and send an immediate ACK.
"""

Previously DCTCP implementation may continue to delay the ACK. This
patch fixes that to implement the RFC by forcing an immediate ACK.

Tested with this packetdrill script provided by Larry Brakmo

0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
0.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
0.000 setsockopt(3, SOL_TCP, TCP_CONGESTION, "dctcp", 5) = 0
0.000 bind(3, ..., ...) = 0
0.000 listen(3, 1) = 0

0.100 < [ect0] SEW 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
0.100 > SE. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
0.110 < [ect0] . 1:1(0) ack 1 win 257
0.200 accept(3, ..., ...) = 4
   +0 setsockopt(4, SOL_SOCKET, SO_DEBUG, [1], 4) = 0

0.200 < [ect0] . 1:1001(1000) ack 1 win 257
0.200 > [ect01] . 1:1(0) ack 1001

0.200 write(4, ..., 1) = 1
0.200 > [ect01] P. 1:2(1) ack 1001

0.200 < [ect0] . 1001:2001(1000) ack 2 win 257
+0.005 < [ce] . 2001:3001(1000) ack 2 win 257

+0.000 > [ect01] . 2:2(0) ack 2001
// Previously the ACK below would be delayed by 40ms
+0.000 > [ect01] E. 2:2(0) ack 3001

+0.500 < F. 9501:9501(0) ack 4 win 257

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/tcp.h    |  1 +
 net/ipv4/tcp_dctcp.c | 30 ++++++++++++++++++------------
 net/ipv4/tcp_input.c |  3 ++-
 3 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 24ba8f005f01..65babd8a682d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -376,6 +376,7 @@ ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos,
 			struct pipe_inode_info *pipe, size_t len,
 			unsigned int flags);
 
+void tcp_enter_quickack_mode(struct sock *sk);
 static inline void tcp_dec_quickack_mode(struct sock *sk,
 					 const unsigned int pkts)
 {
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index c48badee1cfa..e63b764e55ea 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -131,12 +131,15 @@ static void dctcp_ce_state_0_to_1(struct sock *sk)
 	struct dctcp *ca = inet_csk_ca(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	/* State has changed from CE=0 to CE=1 and delayed
-	 * ACK has not sent yet.
-	 */
-	if (!ca->ce_state &&
-	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
-		__tcp_send_ack(sk, ca->prior_rcv_nxt);
+	if (!ca->ce_state) {
+		/* State has changed from CE=0 to CE=1, force an immediate
+		 * ACK to reflect the new CE state. If an ACK was delayed,
+		 * send that first to reflect the prior CE state.
+		 */
+		if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
+			__tcp_send_ack(sk, ca->prior_rcv_nxt);
+		tcp_enter_quickack_mode(sk);
+	}
 
 	ca->prior_rcv_nxt = tp->rcv_nxt;
 	ca->ce_state = 1;
@@ -149,12 +152,15 @@ static void dctcp_ce_state_1_to_0(struct sock *sk)
 	struct dctcp *ca = inet_csk_ca(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	/* State has changed from CE=1 to CE=0 and delayed
-	 * ACK has not sent yet.
-	 */
-	if (ca->ce_state &&
-	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
-		__tcp_send_ack(sk, ca->prior_rcv_nxt);
+	if (ca->ce_state) {
+		/* State has changed from CE=1 to CE=0, force an immediate
+		 * ACK to reflect the new CE state. If an ACK was delayed,
+		 * send that first to reflect the prior CE state.
+		 */
+		if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
+			__tcp_send_ack(sk, ca->prior_rcv_nxt);
+		tcp_enter_quickack_mode(sk);
+	}
 
 	ca->prior_rcv_nxt = tp->rcv_nxt;
 	ca->ce_state = 0;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 4350ee058441..782fb6dbf636 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -187,13 +187,14 @@ static void tcp_incr_quickack(struct sock *sk)
 		icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
 }
 
-static void tcp_enter_quickack_mode(struct sock *sk)
+void tcp_enter_quickack_mode(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	tcp_incr_quickack(sk);
 	icsk->icsk_ack.pingpong = 0;
 	icsk->icsk_ack.ato = TCP_ATO_MIN;
 }
+EXPORT_SYMBOL(tcp_enter_quickack_mode);
 
 /* Send ACKs quickly, if "quick" count is not exhausted
  * and the session is not interactive.

From 5fbec4801264cb3279ef6ac9c70bcbe2aaef89d5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 23 Jul 2018 09:28:18 -0700
Subject: [PATCH 117/519] tcp: avoid collapses in tcp_prune_queue() if possible

[ Upstream commit f4a3313d8e2ca9fd8d8f45e40a2903ba782607e7 ]

Right after a TCP flow is created, receiving tiny out of order
packets allways hit the condition :

if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
	tcp_clamp_window(sk);

tcp_clamp_window() increases sk_rcvbuf to match sk_rmem_alloc
(guarded by tcp_rmem[2])

Calling tcp_collapse_ofo_queue() in this case is not useful,
and offers a O(N^2) surface attack to malicious peers.

Better not attempt anything before full queue capacity is reached,
forcing attacker to spend lots of resource and allow us to more
easily detect the abuse.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_input.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 782fb6dbf636..8088e98696aa 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4875,6 +4875,9 @@ static int tcp_prune_queue(struct sock *sk)
 	else if (tcp_under_memory_pressure(sk))
 		tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
 
+	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
+		return 0;
+
 	tcp_collapse_ofo_queue(sk);
 	if (!skb_queue_empty(&sk->sk_receive_queue))
 		tcp_collapse(sk, &sk->sk_receive_queue,

From dc6ae4dffd656811dee7151b19545e4cd839d378 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 23 Jul 2018 09:28:19 -0700
Subject: [PATCH 118/519] tcp: detect malicious patterns in
 tcp_collapse_ofo_queue()

[ Upstream commit 3d4bf93ac12003f9b8e1e2de37fe27983deebdcf ]

In case an attacker feeds tiny packets completely out of order,
tcp_collapse_ofo_queue() might scan the whole rb-tree, performing
expensive copies, but not changing socket memory usage at all.

1) Do not attempt to collapse tiny skbs.
2) Add logic to exit early when too many tiny skbs are detected.

We prefer not doing aggressive collapsing (which copies packets)
for pathological flows, and revert to tcp_prune_ofo_queue() which
will be less expensive.

In the future, we might add the possibility of terminating flows
that are proven to be malicious.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_input.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8088e98696aa..5c645069a09a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4789,6 +4789,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 static void tcp_collapse_ofo_queue(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	u32 range_truesize, sum_tiny = 0;
 	struct sk_buff *skb = skb_peek(&tp->out_of_order_queue);
 	struct sk_buff *head;
 	u32 start, end;
@@ -4798,6 +4799,7 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
 
 	start = TCP_SKB_CB(skb)->seq;
 	end = TCP_SKB_CB(skb)->end_seq;
+	range_truesize = skb->truesize;
 	head = skb;
 
 	for (;;) {
@@ -4812,14 +4814,24 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
 		if (!skb ||
 		    after(TCP_SKB_CB(skb)->seq, end) ||
 		    before(TCP_SKB_CB(skb)->end_seq, start)) {
-			tcp_collapse(sk, &tp->out_of_order_queue,
-				     head, skb, start, end);
+			/* Do not attempt collapsing tiny skbs */
+			if (range_truesize != head->truesize ||
+			    end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) {
+				tcp_collapse(sk, &tp->out_of_order_queue,
+					     head, skb, start, end);
+			} else {
+				sum_tiny += range_truesize;
+				if (sum_tiny > sk->sk_rcvbuf >> 3)
+					return;
+			}
+
 			head = skb;
 			if (!skb)
 				break;
 			/* Start new segment */
 			start = TCP_SKB_CB(skb)->seq;
 			end = TCP_SKB_CB(skb)->end_seq;
+			range_truesize = skb->truesize;
 		} else {
 			if (before(TCP_SKB_CB(skb)->seq, start))
 				start = TCP_SKB_CB(skb)->seq;

From a77bf88daa402cb52729fe4e46e1b7ce58f1f8ad Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 23 Jul 2018 19:36:48 -0400
Subject: [PATCH 119/519] ip: in cmsg IP(V6)_ORIGDSTADDR call pskb_may_pull

[ Upstream commit 2efd4fca703a6707cad16ab486eaab8fc7f0fd49 ]

Syzbot reported a read beyond the end of the skb head when returning
IPV6_ORIGDSTADDR:

  BUG: KMSAN: kernel-infoleak in put_cmsg+0x5ef/0x860 net/core/scm.c:242
  CPU: 0 PID: 4501 Comm: syz-executor128 Not tainted 4.17.0+ #9
  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
  Google 01/01/2011
  Call Trace:
    __dump_stack lib/dump_stack.c:77 [inline]
    dump_stack+0x185/0x1d0 lib/dump_stack.c:113
    kmsan_report+0x188/0x2a0 mm/kmsan/kmsan.c:1125
    kmsan_internal_check_memory+0x138/0x1f0 mm/kmsan/kmsan.c:1219
    kmsan_copy_to_user+0x7a/0x160 mm/kmsan/kmsan.c:1261
    copy_to_user include/linux/uaccess.h:184 [inline]
    put_cmsg+0x5ef/0x860 net/core/scm.c:242
    ip6_datagram_recv_specific_ctl+0x1cf3/0x1eb0 net/ipv6/datagram.c:719
    ip6_datagram_recv_ctl+0x41c/0x450 net/ipv6/datagram.c:733
    rawv6_recvmsg+0x10fb/0x1460 net/ipv6/raw.c:521
    [..]

This logic and its ipv4 counterpart read the destination port from
the packet at skb_transport_offset(skb) + 4.

With MSG_MORE and a local SOCK_RAW sender, syzbot was able to cook a
packet that stores headers exactly up to skb_transport_offset(skb) in
the head and the remainder in a frag.

Call pskb_may_pull before accessing the pointer to ensure that it lies
in skb head.

Link: http://lkml.kernel.org/r/CAF=yD-LEJwZj5a1-bAAj2Oy_hKmGygV6rsJ_WOrAYnv-fnayiQ@mail.gmail.com
Reported-by: syzbot+9adb4b567003cac781f0@syzkaller.appspotmail.com
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/ip_sockglue.c | 7 +++++--
 net/ipv6/datagram.c    | 7 +++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index ce9a7fbb7c5f..88426a6a7a85 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -135,15 +135,18 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
 {
 	struct sockaddr_in sin;
 	const struct iphdr *iph = ip_hdr(skb);
-	__be16 *ports = (__be16 *)skb_transport_header(skb);
+	__be16 *ports;
+	int end;
 
-	if (skb_transport_offset(skb) + 4 > skb->len)
+	end = skb_transport_offset(skb) + 4;
+	if (end > 0 && !pskb_may_pull(skb, end))
 		return;
 
 	/* All current transport protocols have the port numbers in the
 	 * first four bytes of the transport header and this function is
 	 * written with this assumption in mind.
 	 */
+	ports = (__be16 *)skb_transport_header(skb);
 
 	sin.sin_family = AF_INET;
 	sin.sin_addr.s_addr = iph->daddr;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index cae37bfd12ab..9f6e57ded338 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -657,13 +657,16 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg,
 	}
 	if (np->rxopt.bits.rxorigdstaddr) {
 		struct sockaddr_in6 sin6;
-		__be16 *ports = (__be16 *) skb_transport_header(skb);
+		__be16 *ports;
+		int end;
 
-		if (skb_transport_offset(skb) + 4 <= skb->len) {
+		end = skb_transport_offset(skb) + 4;
+		if (end <= 0 || pskb_may_pull(skb, end)) {
 			/* All current transport protocols have the port numbers in the
 			 * first four bytes of the transport header and this function is
 			 * written with this assumption in mind.
 			 */
+			ports = (__be16 *)skb_transport_header(skb);
 
 			sin6.sin6_family = AF_INET6;
 			sin6.sin6_addr = ipv6_hdr(skb)->daddr;

From 92197cdb913a3f130b70550c856afc27cf710ea9 Mon Sep 17 00:00:00 2001
From: Lubomir Rintel <lkundrak@v3.sk>
Date: Tue, 10 Jul 2018 08:28:49 +0200
Subject: [PATCH 120/519] usb: cdc_acm: Add quirk for Castles VEGA3000

commit 1445cbe476fc3dd09c0b380b206526a49403c071 upstream.

The device (a POS terminal) implements CDC ACM, but has not union
descriptor.

Signed-off-by: Lubomir Rintel <lkundrak@v3.sk>
Acked-by: Oliver Neukum <oneukum@suse.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/cdc-acm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 7ed30d0b5273..a501f3ba6a3f 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -1771,6 +1771,9 @@ static const struct usb_device_id acm_ids[] = {
 	{ USB_DEVICE(0x09d8, 0x0320), /* Elatec GmbH TWN3 */
 	.driver_info = NO_UNION_NORMAL, /* has misplaced union descriptor */
 	},
+	{ USB_DEVICE(0x0ca6, 0xa050), /* Castles VEGA3000 */
+	.driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */
+	},
 
 	{ USB_DEVICE(0x2912, 0x0001), /* ATOL FPrint */
 	.driver_info = CLEAR_HALT_CONDITIONS,

From ff180bcc594676b355476237a382ea10c6492c34 Mon Sep 17 00:00:00 2001
From: Bin Liu <b-liu@ti.com>
Date: Thu, 19 Jul 2018 14:39:37 -0500
Subject: [PATCH 121/519] usb: core: handle hub C_PORT_OVER_CURRENT condition

commit 249a32b7eeb3edb6897dd38f89651a62163ac4ed upstream.

Based on USB2.0 Spec Section 11.12.5,

  "If a hub has per-port power switching and per-port current limiting,
  an over-current on one port may still cause the power on another port
  to fall below specific minimums. In this case, the affected port is
  placed in the Power-Off state and C_PORT_OVER_CURRENT is set for the
  port, but PORT_OVER_CURRENT is not set."

so let's check C_PORT_OVER_CURRENT too for over current condition.

Fixes: 08d1dec6f405 ("usb:hub set hub->change_bits when over-current happens")
Cc: <stable@vger.kernel.org>
Tested-by: Alessandro Antenucci <antenucci@korg.it>
Signed-off-by: Bin Liu <b-liu@ti.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 4d86da0df131..93756664592a 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -1123,10 +1123,14 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
 
 		if (!udev || udev->state == USB_STATE_NOTATTACHED) {
 			/* Tell hub_wq to disconnect the device or
-			 * check for a new connection
+			 * check for a new connection or over current condition.
+			 * Based on USB2.0 Spec Section 11.12.5,
+			 * C_PORT_OVER_CURRENT could be set while
+			 * PORT_OVER_CURRENT is not. So check for any of them.
 			 */
 			if (udev || (portstatus & USB_PORT_STAT_CONNECTION) ||
-			    (portstatus & USB_PORT_STAT_OVERCURRENT))
+			    (portstatus & USB_PORT_STAT_OVERCURRENT) ||
+			    (portchange & USB_PORT_STAT_C_OVERCURRENT))
 				set_bit(port1, hub->change_bits);
 
 		} else if (portstatus & USB_PORT_STAT_ENABLE) {

From c420866afc161203d17f1fcd965a27a61ef70dd4 Mon Sep 17 00:00:00 2001
From: Jerry Zhang <zhangjerry@google.com>
Date: Mon, 2 Jul 2018 12:48:08 -0700
Subject: [PATCH 122/519] usb: gadget: f_fs: Only return delayed status when
 len is 0

commit 4d644abf25698362bd33d17c9ddc8f7122c30f17 upstream.

Commit 1b9ba000 ("Allow function drivers to pause control
transfers") states that USB_GADGET_DELAYED_STATUS is only
supported if data phase is 0 bytes.

It seems that when the length is not 0 bytes, there is no
need to explicitly delay the data stage since the transfer
is not completed until the user responds. However, when the
length is 0, there is no data stage and the transfer is
finished once setup() returns, hence there is a need to
explicitly delay completion.

This manifests as the following bugs:

Prior to 946ef68ad4e4 ('Let setup() return
USB_GADGET_DELAYED_STATUS'), when setup is 0 bytes, ffs
would require user to queue a 0 byte request in order to
clear setup state. However, that 0 byte request was actually
not needed and would hang and cause errors in other setup
requests.

After the above commit, 0 byte setups work since the gadget
now accepts empty queues to ep0 to clear the delay, but all
other setups hang.

Fixes: 946ef68ad4e4 ("Let setup() return USB_GADGET_DELAYED_STATUS")
Signed-off-by: Jerry Zhang <zhangjerry@google.com>
Cc: stable <stable@vger.kernel.org>
Acked-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/f_fs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 4191feb765b1..4800bb22cdd6 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -3037,7 +3037,7 @@ static int ffs_func_setup(struct usb_function *f,
 	__ffs_event_add(ffs, FUNCTIONFS_SETUP);
 	spin_unlock_irqrestore(&ffs->ev.waitq.lock, flags);
 
-	return USB_GADGET_DELAYED_STATUS;
+	return creq->wLength == 0 ? USB_GADGET_DELAYED_STATUS : 0;
 }
 
 static void ffs_func_suspend(struct usb_function *f)

From 0b14de0538aaa6c9b8c5d90b29e0a8dd698ba918 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 10 Jul 2018 14:51:33 +0200
Subject: [PATCH 123/519] driver core: Partially revert "driver core: correct
 device's shutdown order"

commit 722e5f2b1eec7de61117b7c0a7914761e3da2eda upstream.

Commit 52cdbdd49853 (driver core: correct device's shutdown order)
introduced a regression by breaking device shutdown on some systems.

Namely, the devices_kset_move_last() call in really_probe() added by
that commit is a mistake as it may cause parents to follow children
in the devices_kset list which then causes shutdown to fail.  For
example, if a device has children before really_probe() is called
for it (which is not uncommon), that call will cause it to be
reordered after the children in the devices_kset list and the
ordering of that list will not reflect the correct device shutdown
order any more.

Also it causes the devices_kset list to be constantly reordered
until all drivers have been probed which is totally pointless
overhead in the majority of cases and it only covered an issue
with system shutdown, while system-wide suspend/resume potentially
had the same issue on the affected platforms (which was not covered).

Moreover, the shutdown issue originally addressed by the change in
really_probe() made by commit 52cdbdd49853 is not present in 4.18-rc
any more, since dra7 started to use the sdhci-omap driver which
doesn't disable any regulators during shutdown, so the really_probe()
part of commit 52cdbdd49853 can be safely reverted.  [The original
issue was related to the omap_hsmmc driver used by dra7 previously.]

For the above reasons, revert the really_probe() modifications made
by commit 52cdbdd49853.

The other code changes made by commit 52cdbdd49853 are useful and
they need not be reverted.

Fixes: 52cdbdd49853 (driver core: correct device's shutdown order)
Link: https://lore.kernel.org/lkml/CAFgQCTt7VfqM=UyCnvNFxrSw8Z6cUtAi3HUwR4_xPAc03SgHjQ@mail.gmail.com/
Reported-by: Pingfan Liu <kernelfans@gmail.com>
Tested-by: Pingfan Liu <kernelfans@gmail.com>
Reviewed-by: Kishon Vijay Abraham I <kishon@ti.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/dd.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index a641cf3ccad6..1dffb018a7fe 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -304,14 +304,6 @@ static int really_probe(struct device *dev, struct device_driver *drv)
 			goto probe_failed;
 	}
 
-	/*
-	 * Ensure devices are listed in devices_kset in correct order
-	 * It's important to move Dev to the end of devices_kset before
-	 * calling .probe, because it could be recursive and parent Dev
-	 * should always go first
-	 */
-	devices_kset_move_last(dev);
-
 	if (dev->bus->probe) {
 		ret = dev->bus->probe(dev);
 		if (ret)

From e4c557649fda285e512f28a771038180cdeff6cf Mon Sep 17 00:00:00 2001
From: Anssi Hannula <anssi.hannula@bitwise.fi>
Date: Tue, 7 Feb 2017 17:01:14 +0200
Subject: [PATCH 124/519] can: xilinx_can: fix RX loop if RXNEMP is asserted
 without RXOK

commit 32852c561bffd613d4ed7ec464b1e03e1b7b6c5c upstream.

If the device gets into a state where RXNEMP (RX FIFO not empty)
interrupt is asserted without RXOK (new frame received successfully)
interrupt being asserted, xcan_rx_poll() will continue to try to clear
RXNEMP without actually reading frames from RX FIFO. If the RX FIFO is
not empty, the interrupt will not be cleared and napi_schedule() will
just be called again.

This situation can occur when:

(a) xcan_rx() returns without reading RX FIFO due to an error condition.
The code tries to clear both RXOK and RXNEMP but RXNEMP will not clear
due to a frame still being in the FIFO. The frame will never be read
from the FIFO as RXOK is no longer set.

(b) A frame is received between xcan_rx_poll() reading interrupt status
and clearing RXOK. RXOK will be cleared, but RXNEMP will again remain
set as the new message is still in the FIFO.

I'm able to trigger case (b) by flooding the bus with frames under load.

There does not seem to be any benefit in using both RXNEMP and RXOK in
the way the driver does, and the polling example in the reference manual
(UG585 v1.10 18.3.7 Read Messages from RxFIFO) also says that either
RXOK or RXNEMP can be used for detecting incoming messages.

Fix the issue and simplify the RX processing by only using RXNEMP
without RXOK.

Tested with the integrated CAN on Zynq-7000 SoC.

Fixes: b1201e44f50b ("can: xilinx CAN controller support")
Signed-off-by: Anssi Hannula <anssi.hannula@bitwise.fi>
Cc: <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/can/xilinx_can.c | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
index 51670b322409..727add0decdf 100644
--- a/drivers/net/can/xilinx_can.c
+++ b/drivers/net/can/xilinx_can.c
@@ -100,7 +100,7 @@ enum xcan_reg {
 #define XCAN_INTR_ALL		(XCAN_IXR_TXOK_MASK | XCAN_IXR_BSOFF_MASK |\
 				 XCAN_IXR_WKUP_MASK | XCAN_IXR_SLP_MASK | \
 				 XCAN_IXR_RXNEMP_MASK | XCAN_IXR_ERROR_MASK | \
-				 XCAN_IXR_ARBLST_MASK | XCAN_IXR_RXOK_MASK)
+				 XCAN_IXR_ARBLST_MASK)
 
 /* CAN register bit shift - XCAN_<REG>_<BIT>_SHIFT */
 #define XCAN_BTR_SJW_SHIFT		7  /* Synchronous jump width */
@@ -708,15 +708,7 @@ static int xcan_rx_poll(struct napi_struct *napi, int quota)
 
 	isr = priv->read_reg(priv, XCAN_ISR_OFFSET);
 	while ((isr & XCAN_IXR_RXNEMP_MASK) && (work_done < quota)) {
-		if (isr & XCAN_IXR_RXOK_MASK) {
-			priv->write_reg(priv, XCAN_ICR_OFFSET,
-				XCAN_IXR_RXOK_MASK);
-			work_done += xcan_rx(ndev);
-		} else {
-			priv->write_reg(priv, XCAN_ICR_OFFSET,
-				XCAN_IXR_RXNEMP_MASK);
-			break;
-		}
+		work_done += xcan_rx(ndev);
 		priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_RXNEMP_MASK);
 		isr = priv->read_reg(priv, XCAN_ISR_OFFSET);
 	}
@@ -727,7 +719,7 @@ static int xcan_rx_poll(struct napi_struct *napi, int quota)
 	if (work_done < quota) {
 		napi_complete(napi);
 		ier = priv->read_reg(priv, XCAN_IER_OFFSET);
-		ier |= (XCAN_IXR_RXOK_MASK | XCAN_IXR_RXNEMP_MASK);
+		ier |= XCAN_IXR_RXNEMP_MASK;
 		priv->write_reg(priv, XCAN_IER_OFFSET, ier);
 	}
 	return work_done;
@@ -799,9 +791,9 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id)
 	}
 
 	/* Check for the type of receive interrupt and Processing it */
-	if (isr & (XCAN_IXR_RXNEMP_MASK | XCAN_IXR_RXOK_MASK)) {
+	if (isr & XCAN_IXR_RXNEMP_MASK) {
 		ier = priv->read_reg(priv, XCAN_IER_OFFSET);
-		ier &= ~(XCAN_IXR_RXNEMP_MASK | XCAN_IXR_RXOK_MASK);
+		ier &= ~XCAN_IXR_RXNEMP_MASK;
 		priv->write_reg(priv, XCAN_IER_OFFSET, ier);
 		napi_schedule(&priv->napi);
 	}

From 7e572a170fc30ce4e6bff79b2a54f445a88457ac Mon Sep 17 00:00:00 2001
From: Anssi Hannula <anssi.hannula@bitwise.fi>
Date: Wed, 8 Feb 2017 13:13:40 +0200
Subject: [PATCH 125/519] can: xilinx_can: fix recovery from error states not
 being propagated

commit 877e0b75947e2c7acf5624331bb17ceb093c98ae upstream.

The xilinx_can driver contains no mechanism for propagating recovery
from CAN_STATE_ERROR_WARNING and CAN_STATE_ERROR_PASSIVE.

Add such a mechanism by factoring the handling of
XCAN_STATE_ERROR_PASSIVE and XCAN_STATE_ERROR_WARNING out of
xcan_err_interrupt and checking for recovery after RX and TX if the
interface is in one of those states.

Tested with the integrated CAN on Zynq-7000 SoC.

Fixes: b1201e44f50b ("can: xilinx CAN controller support")
Signed-off-by: Anssi Hannula <anssi.hannula@bitwise.fi>
Cc: <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/can/xilinx_can.c | 155 ++++++++++++++++++++++++++++-------
 1 file changed, 127 insertions(+), 28 deletions(-)

diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
index 727add0decdf..b0b52bf276b0 100644
--- a/drivers/net/can/xilinx_can.c
+++ b/drivers/net/can/xilinx_can.c
@@ -2,6 +2,7 @@
  *
  * Copyright (C) 2012 - 2014 Xilinx, Inc.
  * Copyright (C) 2009 PetaLogix. All rights reserved.
+ * Copyright (C) 2017 Sandvik Mining and Construction Oy
  *
  * Description:
  * This driver is developed for Axi CAN IP and for Zynq CANPS Controller.
@@ -528,6 +529,123 @@ static int xcan_rx(struct net_device *ndev)
 	return 1;
 }
 
+/**
+ * xcan_current_error_state - Get current error state from HW
+ * @ndev:	Pointer to net_device structure
+ *
+ * Checks the current CAN error state from the HW. Note that this
+ * only checks for ERROR_PASSIVE and ERROR_WARNING.
+ *
+ * Return:
+ * ERROR_PASSIVE or ERROR_WARNING if either is active, ERROR_ACTIVE
+ * otherwise.
+ */
+static enum can_state xcan_current_error_state(struct net_device *ndev)
+{
+	struct xcan_priv *priv = netdev_priv(ndev);
+	u32 status = priv->read_reg(priv, XCAN_SR_OFFSET);
+
+	if ((status & XCAN_SR_ESTAT_MASK) == XCAN_SR_ESTAT_MASK)
+		return CAN_STATE_ERROR_PASSIVE;
+	else if (status & XCAN_SR_ERRWRN_MASK)
+		return CAN_STATE_ERROR_WARNING;
+	else
+		return CAN_STATE_ERROR_ACTIVE;
+}
+
+/**
+ * xcan_set_error_state - Set new CAN error state
+ * @ndev:	Pointer to net_device structure
+ * @new_state:	The new CAN state to be set
+ * @cf:		Error frame to be populated or NULL
+ *
+ * Set new CAN error state for the device, updating statistics and
+ * populating the error frame if given.
+ */
+static void xcan_set_error_state(struct net_device *ndev,
+				 enum can_state new_state,
+				 struct can_frame *cf)
+{
+	struct xcan_priv *priv = netdev_priv(ndev);
+	u32 ecr = priv->read_reg(priv, XCAN_ECR_OFFSET);
+	u32 txerr = ecr & XCAN_ECR_TEC_MASK;
+	u32 rxerr = (ecr & XCAN_ECR_REC_MASK) >> XCAN_ESR_REC_SHIFT;
+
+	priv->can.state = new_state;
+
+	if (cf) {
+		cf->can_id |= CAN_ERR_CRTL;
+		cf->data[6] = txerr;
+		cf->data[7] = rxerr;
+	}
+
+	switch (new_state) {
+	case CAN_STATE_ERROR_PASSIVE:
+		priv->can.can_stats.error_passive++;
+		if (cf)
+			cf->data[1] = (rxerr > 127) ?
+					CAN_ERR_CRTL_RX_PASSIVE :
+					CAN_ERR_CRTL_TX_PASSIVE;
+		break;
+	case CAN_STATE_ERROR_WARNING:
+		priv->can.can_stats.error_warning++;
+		if (cf)
+			cf->data[1] |= (txerr > rxerr) ?
+					CAN_ERR_CRTL_TX_WARNING :
+					CAN_ERR_CRTL_RX_WARNING;
+		break;
+	case CAN_STATE_ERROR_ACTIVE:
+		if (cf)
+			cf->data[1] |= CAN_ERR_CRTL_ACTIVE;
+		break;
+	default:
+		/* non-ERROR states are handled elsewhere */
+		WARN_ON(1);
+		break;
+	}
+}
+
+/**
+ * xcan_update_error_state_after_rxtx - Update CAN error state after RX/TX
+ * @ndev:	Pointer to net_device structure
+ *
+ * If the device is in a ERROR-WARNING or ERROR-PASSIVE state, check if
+ * the performed RX/TX has caused it to drop to a lesser state and set
+ * the interface state accordingly.
+ */
+static void xcan_update_error_state_after_rxtx(struct net_device *ndev)
+{
+	struct xcan_priv *priv = netdev_priv(ndev);
+	enum can_state old_state = priv->can.state;
+	enum can_state new_state;
+
+	/* changing error state due to successful frame RX/TX can only
+	 * occur from these states
+	 */
+	if (old_state != CAN_STATE_ERROR_WARNING &&
+	    old_state != CAN_STATE_ERROR_PASSIVE)
+		return;
+
+	new_state = xcan_current_error_state(ndev);
+
+	if (new_state != old_state) {
+		struct sk_buff *skb;
+		struct can_frame *cf;
+
+		skb = alloc_can_err_skb(ndev, &cf);
+
+		xcan_set_error_state(ndev, new_state, skb ? cf : NULL);
+
+		if (skb) {
+			struct net_device_stats *stats = &ndev->stats;
+
+			stats->rx_packets++;
+			stats->rx_bytes += cf->can_dlc;
+			netif_rx(skb);
+		}
+	}
+}
+
 /**
  * xcan_err_interrupt - error frame Isr
  * @ndev:	net_device pointer
@@ -543,16 +661,12 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr)
 	struct net_device_stats *stats = &ndev->stats;
 	struct can_frame *cf;
 	struct sk_buff *skb;
-	u32 err_status, status, txerr = 0, rxerr = 0;
+	u32 err_status;
 
 	skb = alloc_can_err_skb(ndev, &cf);
 
 	err_status = priv->read_reg(priv, XCAN_ESR_OFFSET);
 	priv->write_reg(priv, XCAN_ESR_OFFSET, err_status);
-	txerr = priv->read_reg(priv, XCAN_ECR_OFFSET) & XCAN_ECR_TEC_MASK;
-	rxerr = ((priv->read_reg(priv, XCAN_ECR_OFFSET) &
-			XCAN_ECR_REC_MASK) >> XCAN_ESR_REC_SHIFT);
-	status = priv->read_reg(priv, XCAN_SR_OFFSET);
 
 	if (isr & XCAN_IXR_BSOFF_MASK) {
 		priv->can.state = CAN_STATE_BUS_OFF;
@@ -562,28 +676,10 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr)
 		can_bus_off(ndev);
 		if (skb)
 			cf->can_id |= CAN_ERR_BUSOFF;
-	} else if ((status & XCAN_SR_ESTAT_MASK) == XCAN_SR_ESTAT_MASK) {
-		priv->can.state = CAN_STATE_ERROR_PASSIVE;
-		priv->can.can_stats.error_passive++;
-		if (skb) {
-			cf->can_id |= CAN_ERR_CRTL;
-			cf->data[1] = (rxerr > 127) ?
-					CAN_ERR_CRTL_RX_PASSIVE :
-					CAN_ERR_CRTL_TX_PASSIVE;
-			cf->data[6] = txerr;
-			cf->data[7] = rxerr;
-		}
-	} else if (status & XCAN_SR_ERRWRN_MASK) {
-		priv->can.state = CAN_STATE_ERROR_WARNING;
-		priv->can.can_stats.error_warning++;
-		if (skb) {
-			cf->can_id |= CAN_ERR_CRTL;
-			cf->data[1] |= (txerr > rxerr) ?
-					CAN_ERR_CRTL_TX_WARNING :
-					CAN_ERR_CRTL_RX_WARNING;
-			cf->data[6] = txerr;
-			cf->data[7] = rxerr;
-		}
+	} else {
+		enum can_state new_state = xcan_current_error_state(ndev);
+
+		xcan_set_error_state(ndev, new_state, skb ? cf : NULL);
 	}
 
 	/* Check for Arbitration lost interrupt */
@@ -713,8 +809,10 @@ static int xcan_rx_poll(struct napi_struct *napi, int quota)
 		isr = priv->read_reg(priv, XCAN_ISR_OFFSET);
 	}
 
-	if (work_done)
+	if (work_done) {
 		can_led_event(ndev, CAN_LED_EVENT_RX);
+		xcan_update_error_state_after_rxtx(ndev);
+	}
 
 	if (work_done < quota) {
 		napi_complete(napi);
@@ -745,6 +843,7 @@ static void xcan_tx_interrupt(struct net_device *ndev, u32 isr)
 		isr = priv->read_reg(priv, XCAN_ISR_OFFSET);
 	}
 	can_led_event(ndev, CAN_LED_EVENT_TX);
+	xcan_update_error_state_after_rxtx(ndev);
 	netif_wake_queue(ndev);
 }
 

From d7dec444472632fcba1d4b3be0aac8d279a37cc4 Mon Sep 17 00:00:00 2001
From: Anssi Hannula <anssi.hannula@bitwise.fi>
Date: Tue, 7 Feb 2017 13:23:04 +0200
Subject: [PATCH 126/519] can: xilinx_can: fix device dropping off bus on RX
 overrun

commit 2574fe54515ed3487405de329e4e9f13d7098c10 upstream.

The xilinx_can driver performs a software reset when an RX overrun is
detected. This causes the device to enter Configuration mode where no
messages are received or transmitted.

The documentation does not mention any need to perform a reset on an RX
overrun, and testing by inducing an RX overflow also indicated that the
device continues to work just fine without a reset.

Remove the software reset.

Tested with the integrated CAN on Zynq-7000 SoC.

Fixes: b1201e44f50b ("can: xilinx CAN controller support")
Signed-off-by: Anssi Hannula <anssi.hannula@bitwise.fi>
Cc: <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/can/xilinx_can.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
index b0b52bf276b0..e787116e9e82 100644
--- a/drivers/net/can/xilinx_can.c
+++ b/drivers/net/can/xilinx_can.c
@@ -695,7 +695,6 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr)
 	if (isr & XCAN_IXR_RXOFLW_MASK) {
 		stats->rx_over_errors++;
 		stats->rx_errors++;
-		priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_RESET_MASK);
 		if (skb) {
 			cf->can_id |= CAN_ERR_CRTL;
 			cf->data[1] |= CAN_ERR_CRTL_RX_OVERFLOW;

From e5df9264649e13f0b6c2da9ce089a8ac41ef5121 Mon Sep 17 00:00:00 2001
From: Anssi Hannula <anssi.hannula@bitwise.fi>
Date: Thu, 23 Feb 2017 14:50:03 +0200
Subject: [PATCH 127/519] can: xilinx_can: keep only 1-2 frames in TX FIFO to
 fix TX accounting

commit 620050d9c2be15c47017ba95efe59e0832e99a56 upstream.

The xilinx_can driver assumes that the TXOK interrupt only clears after
it has been acknowledged as many times as there have been successfully
sent frames.

However, the documentation does not mention such behavior, instead
saying just that the interrupt is cleared when the clear bit is set.

Similarly, testing seems to also suggest that it is immediately cleared
regardless of the amount of frames having been sent. Performing some
heavy TX load and then going back to idle has the tx_head drifting
further away from tx_tail over time, steadily reducing the amount of
frames the driver keeps in the TX FIFO (but not to zero, as the TXOK
interrupt always frees up space for 1 frame from the driver's
perspective, so frames continue to be sent) and delaying the local echo
frames.

The TX FIFO tracking is also otherwise buggy as it does not account for
TX FIFO being cleared after software resets, causing
  BUG!, TX FIFO full when queue awake!
messages to be output.

There does not seem to be any way to accurately track the state of the
TX FIFO for local echo support while using the full TX FIFO.

The Zynq version of the HW (but not the soft-AXI version) has watermark
programming support and with it an additional TX-FIFO-empty interrupt
bit.

Modify the driver to only put 1 frame into TX FIFO at a time on soft-AXI
and 2 frames at a time on Zynq. On Zynq the TXFEMP interrupt bit is used
to detect whether 1 or 2 frames have been sent at interrupt processing
time.

Tested with the integrated CAN on Zynq-7000 SoC. The 1-frame-FIFO mode
was also tested.

An alternative way to solve this would be to drop local echo support but
keep using the full TX FIFO.

v2: Add FIFO space check before TX queue wake with locking to
synchronize with queue stop. This avoids waking the queue when xmit()
had just filled it.

v3: Keep local echo support and reduce the amount of frames in FIFO
instead as suggested by Marc Kleine-Budde.

Fixes: b1201e44f50b ("can: xilinx CAN controller support")
Signed-off-by: Anssi Hannula <anssi.hannula@bitwise.fi>
Cc: <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/can/xilinx_can.c | 139 +++++++++++++++++++++++++++++++----
 1 file changed, 123 insertions(+), 16 deletions(-)

diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
index e787116e9e82..09c9950a03a5 100644
--- a/drivers/net/can/xilinx_can.c
+++ b/drivers/net/can/xilinx_can.c
@@ -26,8 +26,10 @@
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/of.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/skbuff.h>
+#include <linux/spinlock.h>
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/can/dev.h>
@@ -118,6 +120,7 @@ enum xcan_reg {
 /**
  * struct xcan_priv - This definition define CAN driver instance
  * @can:			CAN private data structure.
+ * @tx_lock:			Lock for synchronizing TX interrupt handling
  * @tx_head:			Tx CAN packets ready to send on the queue
  * @tx_tail:			Tx CAN packets successfully sended on the queue
  * @tx_max:			Maximum number packets the driver can send
@@ -132,6 +135,7 @@ enum xcan_reg {
  */
 struct xcan_priv {
 	struct can_priv can;
+	spinlock_t tx_lock;
 	unsigned int tx_head;
 	unsigned int tx_tail;
 	unsigned int tx_max;
@@ -159,6 +163,11 @@ static const struct can_bittiming_const xcan_bittiming_const = {
 	.brp_inc = 1,
 };
 
+#define XCAN_CAP_WATERMARK	0x0001
+struct xcan_devtype_data {
+	unsigned int caps;
+};
+
 /**
  * xcan_write_reg_le - Write a value to the device register little endian
  * @priv:	Driver private data structure
@@ -238,6 +247,10 @@ static int set_reset_mode(struct net_device *ndev)
 		usleep_range(500, 10000);
 	}
 
+	/* reset clears FIFOs */
+	priv->tx_head = 0;
+	priv->tx_tail = 0;
+
 	return 0;
 }
 
@@ -392,6 +405,7 @@ static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 	struct net_device_stats *stats = &ndev->stats;
 	struct can_frame *cf = (struct can_frame *)skb->data;
 	u32 id, dlc, data[2] = {0, 0};
+	unsigned long flags;
 
 	if (can_dropped_invalid_skb(ndev, skb))
 		return NETDEV_TX_OK;
@@ -439,6 +453,9 @@ static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 		data[1] = be32_to_cpup((__be32 *)(cf->data + 4));
 
 	can_put_echo_skb(skb, ndev, priv->tx_head % priv->tx_max);
+
+	spin_lock_irqsave(&priv->tx_lock, flags);
+
 	priv->tx_head++;
 
 	/* Write the Frame to Xilinx CAN TX FIFO */
@@ -454,10 +471,16 @@ static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 		stats->tx_bytes += cf->can_dlc;
 	}
 
+	/* Clear TX-FIFO-empty interrupt for xcan_tx_interrupt() */
+	if (priv->tx_max > 1)
+		priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXFEMP_MASK);
+
 	/* Check if the TX buffer is full */
 	if ((priv->tx_head - priv->tx_tail) == priv->tx_max)
 		netif_stop_queue(ndev);
 
+	spin_unlock_irqrestore(&priv->tx_lock, flags);
+
 	return NETDEV_TX_OK;
 }
 
@@ -831,19 +854,71 @@ static void xcan_tx_interrupt(struct net_device *ndev, u32 isr)
 {
 	struct xcan_priv *priv = netdev_priv(ndev);
 	struct net_device_stats *stats = &ndev->stats;
+	unsigned int frames_in_fifo;
+	int frames_sent = 1; /* TXOK => at least 1 frame was sent */
+	unsigned long flags;
+	int retries = 0;
 
-	while ((priv->tx_head - priv->tx_tail > 0) &&
-			(isr & XCAN_IXR_TXOK_MASK)) {
+	/* Synchronize with xmit as we need to know the exact number
+	 * of frames in the FIFO to stay in sync due to the TXFEMP
+	 * handling.
+	 * This also prevents a race between netif_wake_queue() and
+	 * netif_stop_queue().
+	 */
+	spin_lock_irqsave(&priv->tx_lock, flags);
+
+	frames_in_fifo = priv->tx_head - priv->tx_tail;
+
+	if (WARN_ON_ONCE(frames_in_fifo == 0)) {
+		/* clear TXOK anyway to avoid getting back here */
 		priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXOK_MASK);
+		spin_unlock_irqrestore(&priv->tx_lock, flags);
+		return;
+	}
+
+	/* Check if 2 frames were sent (TXOK only means that at least 1
+	 * frame was sent).
+	 */
+	if (frames_in_fifo > 1) {
+		WARN_ON(frames_in_fifo > priv->tx_max);
+
+		/* Synchronize TXOK and isr so that after the loop:
+		 * (1) isr variable is up-to-date at least up to TXOK clear
+		 *     time. This avoids us clearing a TXOK of a second frame
+		 *     but not noticing that the FIFO is now empty and thus
+		 *     marking only a single frame as sent.
+		 * (2) No TXOK is left. Having one could mean leaving a
+		 *     stray TXOK as we might process the associated frame
+		 *     via TXFEMP handling as we read TXFEMP *after* TXOK
+		 *     clear to satisfy (1).
+		 */
+		while ((isr & XCAN_IXR_TXOK_MASK) && !WARN_ON(++retries == 100)) {
+			priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXOK_MASK);
+			isr = priv->read_reg(priv, XCAN_ISR_OFFSET);
+		}
+
+		if (isr & XCAN_IXR_TXFEMP_MASK) {
+			/* nothing in FIFO anymore */
+			frames_sent = frames_in_fifo;
+		}
+	} else {
+		/* single frame in fifo, just clear TXOK */
+		priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXOK_MASK);
+	}
+
+	while (frames_sent--) {
 		can_get_echo_skb(ndev, priv->tx_tail %
 					priv->tx_max);
 		priv->tx_tail++;
 		stats->tx_packets++;
-		isr = priv->read_reg(priv, XCAN_ISR_OFFSET);
 	}
+
+	netif_wake_queue(ndev);
+
+	spin_unlock_irqrestore(&priv->tx_lock, flags);
+
 	can_led_event(ndev, CAN_LED_EVENT_TX);
 	xcan_update_error_state_after_rxtx(ndev);
-	netif_wake_queue(ndev);
 }
 
 /**
@@ -1120,6 +1195,18 @@ static int __maybe_unused xcan_resume(struct device *dev)
 
 static SIMPLE_DEV_PM_OPS(xcan_dev_pm_ops, xcan_suspend, xcan_resume);
 
+static const struct xcan_devtype_data xcan_zynq_data = {
+	.caps = XCAN_CAP_WATERMARK,
+};
+
+/* Match table for OF platform binding */
+static const struct of_device_id xcan_of_match[] = {
+	{ .compatible = "xlnx,zynq-can-1.0", .data = &xcan_zynq_data },
+	{ .compatible = "xlnx,axi-can-1.00.a", },
+	{ /* end of list */ },
+};
+MODULE_DEVICE_TABLE(of, xcan_of_match);
+
 /**
  * xcan_probe - Platform registration call
  * @pdev:	Handle to the platform device structure
@@ -1134,8 +1221,10 @@ static int xcan_probe(struct platform_device *pdev)
 	struct resource *res; /* IO mem resources */
 	struct net_device *ndev;
 	struct xcan_priv *priv;
+	const struct of_device_id *of_id;
+	int caps = 0;
 	void __iomem *addr;
-	int ret, rx_max, tx_max;
+	int ret, rx_max, tx_max, tx_fifo_depth;
 
 	/* Get the virtual base address for the device */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -1145,7 +1234,8 @@ static int xcan_probe(struct platform_device *pdev)
 		goto err;
 	}
 
-	ret = of_property_read_u32(pdev->dev.of_node, "tx-fifo-depth", &tx_max);
+	ret = of_property_read_u32(pdev->dev.of_node, "tx-fifo-depth",
+				   &tx_fifo_depth);
 	if (ret < 0)
 		goto err;
 
@@ -1153,6 +1243,30 @@ static int xcan_probe(struct platform_device *pdev)
 	if (ret < 0)
 		goto err;
 
+	of_id = of_match_device(xcan_of_match, &pdev->dev);
+	if (of_id) {
+		const struct xcan_devtype_data *devtype_data = of_id->data;
+
+		if (devtype_data)
+			caps = devtype_data->caps;
+	}
+
+	/* There is no way to directly figure out how many frames have been
+	 * sent when the TXOK interrupt is processed. If watermark programming
+	 * is supported, we can have 2 frames in the FIFO and use TXFEMP
+	 * to determine if 1 or 2 frames have been sent.
+	 * Theoretically we should be able to use TXFWMEMP to determine up
+	 * to 3 frames, but it seems that after putting a second frame in the
+	 * FIFO, with watermark at 2 frames, it can happen that TXFWMEMP (less
+	 * than 2 frames in FIFO) is set anyway with no TXOK (a frame was
+	 * sent), which is not a sensible state - possibly TXFWMEMP is not
+	 * completely synchronized with the rest of the bits?
+	 */
+	if (caps & XCAN_CAP_WATERMARK)
+		tx_max = min(tx_fifo_depth, 2);
+	else
+		tx_max = 1;
+
 	/* Create a CAN device instance */
 	ndev = alloc_candev(sizeof(struct xcan_priv), tx_max);
 	if (!ndev)
@@ -1167,6 +1281,7 @@ static int xcan_probe(struct platform_device *pdev)
 					CAN_CTRLMODE_BERR_REPORTING;
 	priv->reg_base = addr;
 	priv->tx_max = tx_max;
+	spin_lock_init(&priv->tx_lock);
 
 	/* Get IRQ for the device */
 	ndev->irq = platform_get_irq(pdev, 0);
@@ -1234,9 +1349,9 @@ static int xcan_probe(struct platform_device *pdev)
 	devm_can_led_init(ndev);
 	clk_disable_unprepare(priv->bus_clk);
 	clk_disable_unprepare(priv->can_clk);
-	netdev_dbg(ndev, "reg_base=0x%p irq=%d clock=%d, tx fifo depth:%d\n",
+	netdev_dbg(ndev, "reg_base=0x%p irq=%d clock=%d, tx fifo depth: actual %d, using %d\n",
 			priv->reg_base, ndev->irq, priv->can.clock.freq,
-			priv->tx_max);
+			tx_fifo_depth, priv->tx_max);
 
 	return 0;
 
@@ -1272,14 +1387,6 @@ static int xcan_remove(struct platform_device *pdev)
 	return 0;
 }
 
-/* Match table for OF platform binding */
-static const struct of_device_id xcan_of_match[] = {
-	{ .compatible = "xlnx,zynq-can-1.0", },
-	{ .compatible = "xlnx,axi-can-1.00.a", },
-	{ /* end of list */ },
-};
-MODULE_DEVICE_TABLE(of, xcan_of_match);
-
 static struct platform_driver xcan_driver = {
 	.probe = xcan_probe,
 	.remove	= xcan_remove,

From f7efacee6f8aaa311726eed09470e970b59fd4e1 Mon Sep 17 00:00:00 2001
From: Anssi Hannula <anssi.hannula@bitwise.fi>
Date: Mon, 26 Feb 2018 14:39:59 +0200
Subject: [PATCH 128/519] can: xilinx_can: fix incorrect clear of non-processed
 interrupts

commit 2f4f0f338cf453bfcdbcf089e177c16f35f023c8 upstream.

xcan_interrupt() clears ERROR|RXOFLV|BSOFF|ARBLST interrupts if any of
them is asserted. This does not take into account that some of them
could have been asserted between interrupt status read and interrupt
clear, therefore clearing them without handling them.

Fix the code to only clear those interrupts that it knows are asserted
and therefore going to be processed in xcan_err_interrupt().

Fixes: b1201e44f50b ("can: xilinx CAN controller support")
Signed-off-by: Anssi Hannula <anssi.hannula@bitwise.fi>
Cc: Michal Simek <michal.simek@xilinx.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/can/xilinx_can.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
index 09c9950a03a5..a7286d72ed7b 100644
--- a/drivers/net/can/xilinx_can.c
+++ b/drivers/net/can/xilinx_can.c
@@ -937,6 +937,7 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id)
 	struct net_device *ndev = (struct net_device *)dev_id;
 	struct xcan_priv *priv = netdev_priv(ndev);
 	u32 isr, ier;
+	u32 isr_errors;
 
 	/* Get the interrupt status from Xilinx CAN */
 	isr = priv->read_reg(priv, XCAN_ISR_OFFSET);
@@ -955,11 +956,10 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id)
 		xcan_tx_interrupt(ndev, isr);
 
 	/* Check for the type of error interrupt and Processing it */
-	if (isr & (XCAN_IXR_ERROR_MASK | XCAN_IXR_RXOFLW_MASK |
-			XCAN_IXR_BSOFF_MASK | XCAN_IXR_ARBLST_MASK)) {
-		priv->write_reg(priv, XCAN_ICR_OFFSET, (XCAN_IXR_ERROR_MASK |
-				XCAN_IXR_RXOFLW_MASK | XCAN_IXR_BSOFF_MASK |
-				XCAN_IXR_ARBLST_MASK));
+	isr_errors = isr & (XCAN_IXR_ERROR_MASK | XCAN_IXR_RXOFLW_MASK |
+			    XCAN_IXR_BSOFF_MASK | XCAN_IXR_ARBLST_MASK);
+	if (isr_errors) {
+		priv->write_reg(priv, XCAN_ICR_OFFSET, isr_errors);
 		xcan_err_interrupt(ndev, isr);
 	}
 

From 264dc1588925388709eeddd51901042ca277c828 Mon Sep 17 00:00:00 2001
From: Anssi Hannula <anssi.hannula@bitwise.fi>
Date: Mon, 26 Feb 2018 14:27:13 +0200
Subject: [PATCH 129/519] can: xilinx_can: fix RX overflow interrupt not being
 enabled

commit 83997997252f5d3fc7f04abc24a89600c2b504ab upstream.

RX overflow interrupt (RXOFLW) is disabled even though xcan_interrupt()
processes it. This means that an RX overflow interrupt will only be
processed when another interrupt gets asserted (e.g. for RX/TX).

Fix that by enabling the RXOFLW interrupt.

Fixes: b1201e44f50b ("can: xilinx CAN controller support")
Signed-off-by: Anssi Hannula <anssi.hannula@bitwise.fi>
Cc: Michal Simek <michal.simek@xilinx.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/can/xilinx_can.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
index a7286d72ed7b..700b98d9c250 100644
--- a/drivers/net/can/xilinx_can.c
+++ b/drivers/net/can/xilinx_can.c
@@ -103,7 +103,7 @@ enum xcan_reg {
 #define XCAN_INTR_ALL		(XCAN_IXR_TXOK_MASK | XCAN_IXR_BSOFF_MASK |\
 				 XCAN_IXR_WKUP_MASK | XCAN_IXR_SLP_MASK | \
 				 XCAN_IXR_RXNEMP_MASK | XCAN_IXR_ERROR_MASK | \
-				 XCAN_IXR_ARBLST_MASK)
+				 XCAN_IXR_RXOFLW_MASK | XCAN_IXR_ARBLST_MASK)
 
 /* CAN register bit shift - XCAN_<REG>_<BIT>_SHIFT */
 #define XCAN_BTR_SJW_SHIFT		7  /* Synchronous jump width */

From d41d0fe374d4fc358cfdae80fd39fd98174d3da3 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 26 Jul 2018 10:13:22 +0200
Subject: [PATCH 130/519] turn off -Wattribute-alias

Starting with gcc-8.1, we get a warning about all system call definitions,
which use an alias between functions with incompatible prototypes, e.g.:

In file included from ../mm/process_vm_access.c:19:
../include/linux/syscalls.h:211:18: warning: 'sys_process_vm_readv' alias between functions of incompatible types 'long int(pid_t,  const struct iovec *, long unsigned int,  const struct iovec *, long unsigned int,  long unsigned int)' {aka 'long int(int,  const struct iovec *, long unsigned int,  const struct iovec *, long unsigned int,  long unsigned int)'} and 'long int(long int,  long int,  long int,  long int,  long int,  long int)' [-Wattribute-alias]
  asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \
                  ^~~
../include/linux/syscalls.h:207:2: note: in expansion of macro '__SYSCALL_DEFINEx'
  __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
  ^~~~~~~~~~~~~~~~~
../include/linux/syscalls.h:201:36: note: in expansion of macro 'SYSCALL_DEFINEx'
 #define SYSCALL_DEFINE6(name, ...) SYSCALL_DEFINEx(6, _##name, __VA_ARGS__)
                                    ^~~~~~~~~~~~~~~
../mm/process_vm_access.c:300:1: note: in expansion of macro 'SYSCALL_DEFINE6'
 SYSCALL_DEFINE6(process_vm_readv, pid_t, pid, const struct iovec __user *, lvec,
 ^~~~~~~~~~~~~~~
../include/linux/syscalls.h:215:18: note: aliased declaration here
  asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
                  ^~~
../include/linux/syscalls.h:207:2: note: in expansion of macro '__SYSCALL_DEFINEx'
  __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
  ^~~~~~~~~~~~~~~~~
../include/linux/syscalls.h:201:36: note: in expansion of macro 'SYSCALL_DEFINEx'
 #define SYSCALL_DEFINE6(name, ...) SYSCALL_DEFINEx(6, _##name, __VA_ARGS__)
                                    ^~~~~~~~~~~~~~~
../mm/process_vm_access.c:300:1: note: in expansion of macro 'SYSCALL_DEFINE6'
 SYSCALL_DEFINE6(process_vm_readv, pid_t, pid, const struct iovec __user *, lvec,

This is really noisy and does not indicate a real problem. In the latest
mainline kernel, this was addressed by commit bee20031772a ("disable
-Wattribute-alias warning for SYSCALL_DEFINEx()"), which seems too invasive
to backport.

This takes a much simpler approach and just disables the warning across the
kernel.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Makefile b/Makefile
index 63f3e2438a26..bee96eda883a 100644
--- a/Makefile
+++ b/Makefile
@@ -624,6 +624,7 @@ KBUILD_CFLAGS	+= $(call cc-disable-warning,frame-address,)
 KBUILD_CFLAGS	+= $(call cc-disable-warning, format-truncation)
 KBUILD_CFLAGS	+= $(call cc-disable-warning, format-overflow)
 KBUILD_CFLAGS	+= $(call cc-disable-warning, int-in-bool-context)
+KBUILD_CFLAGS	+= $(call cc-disable-warning, attribute-alias)
 
 ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
 KBUILD_CFLAGS	+= -Os

From 0ca85fc310e8c24cba10ed241a0188795e177683 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 26 Jul 2018 10:13:23 +0200
Subject: [PATCH 131/519] ARM: fix put_user() for gcc-8

Building kernels before linux-4.7 with gcc-8 results in many build failures
when gcc triggers a check that was meant to catch broken compilers:

/tmp/ccCGMQmS.s:648: Error: .err encountered

According to the discussion in the gcc bugzilla, a local "register
asm()" variable is still supposed to be the correct way to force an
inline assembly to use a particular register, but marking it 'const'
lets the compiler do optimizations that break that, i.e the compiler is
free to treat the variable as either 'const' or 'register' in that case.

Upstream commit 9f73bd8bb445 ("ARM: uaccess: remove put_user() code
duplication") fixed this problem in linux-4.8 as part of a larger change,
but seems a little too big to be backported to 4.4.

Let's take the simplest fix and change only the one broken line in the
same way as newer kernels.

Suggested-by: Bernd Edlinger <bernd.edlinger@hotmail.de>
Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85745
Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86673
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/include/asm/uaccess.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 35c9db857ebe..cd8b589111ba 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -251,7 +251,7 @@ extern int __put_user_8(void *, unsigned long long);
 	({								\
 		unsigned long __limit = current_thread_info()->addr_limit - 1; \
 		const typeof(*(p)) __user *__tmp_p = (p);		\
-		register const typeof(*(p)) __r2 asm("r2") = (x);	\
+		register typeof(*(p)) __r2 asm("r2") = (x);	\
 		register const typeof(*(p)) __user *__p asm("r0") = __tmp_p; \
 		register unsigned long __l asm("r1") = __limit;		\
 		register int __e asm("r0");				\

From ac15b2b23808ee2c3264329b035a8f0f7d7f50e6 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 28 Jul 2018 07:45:05 +0200
Subject: [PATCH 132/519] Linux 4.4.145

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index bee96eda883a..be31491a2d67 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 4
-SUBLEVEL = 144
+SUBLEVEL = 145
 EXTRAVERSION =
 NAME = Blurry Fish Butt
 

From 97e066123dc69bd3bca36794a0f75989e97a04fe Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Thu, 12 Jul 2018 09:33:04 -0700
Subject: [PATCH 133/519] MIPS: Fix off-by-one in pci_resource_to_user()

commit 38c0a74fe06da3be133cae3fb7bde6a9438e698b upstream.

The MIPS implementation of pci_resource_to_user() introduced in v3.12 by
commit 4c2924b725fb ("MIPS: PCI: Use pci_resource_to_user to map pci
memory space properly") incorrectly sets *end to the address of the
byte after the resource, rather than the last byte of the resource.

This results in userland seeing resources as a byte larger than they
actually are, for example a 32 byte BAR will be reported by a tool such
as lspci as being 33 bytes in size:

    Region 2: I/O ports at 1000 [disabled] [size=33]

Correct this by subtracting one from the calculated end address,
reporting the correct address to userland.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Reported-by: Rui Wang <rui.wang@windriver.com>
Fixes: 4c2924b725fb ("MIPS: PCI: Use pci_resource_to_user to map pci memory space properly")
Cc: James Hogan <jhogan@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Wolfgang Grandegger <wg@grandegger.com>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org # v3.12+
Patchwork: https://patchwork.linux-mips.org/patch/19829/
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/include/asm/pci.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h
index 98c31e5d9579..a7bc901819c8 100644
--- a/arch/mips/include/asm/pci.h
+++ b/arch/mips/include/asm/pci.h
@@ -89,7 +89,7 @@ static inline void pci_resource_to_user(const struct pci_dev *dev, int bar,
 	phys_addr_t size = resource_size(rsrc);
 
 	*start = fixup_bigphys_addr(rsrc->start, size);
-	*end = rsrc->start + size;
+	*end = rsrc->start + size - 1;
 }
 
 /*

From 1e2e0c65b5fcb13328fa4387663148040ae28d5a Mon Sep 17 00:00:00 2001
From: Donald Shanty III <dshanty@protonmail.com>
Date: Wed, 4 Jul 2018 15:50:47 +0000
Subject: [PATCH 134/519] Input: elan_i2c - add ACPI ID for lenovo ideapad 330

commit 938f45008d8bc391593c97508bc798cc95a52b9b upstream.

This allows Elan driver to bind to the touchpad found in Lenovo Ideapad 330
series laptops.

Signed-off-by: Donald Shanty III <dshanty@protonmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/mouse/elan_i2c_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c
index 97f6e05cffce..a7515f5dd76c 100644
--- a/drivers/input/mouse/elan_i2c_core.c
+++ b/drivers/input/mouse/elan_i2c_core.c
@@ -1251,6 +1251,7 @@ static const struct acpi_device_id elan_acpi_id[] = {
 	{ "ELAN0611", 0 },
 	{ "ELAN0612", 0 },
 	{ "ELAN0618", 0 },
+	{ "ELAN061D", 0 },
 	{ "ELAN1000", 0 },
 	{ }
 };

From 8fe1c0b7af711ff6b2e5ed17edb3128b5b66d924 Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Wed, 18 Jul 2018 17:24:35 +0000
Subject: [PATCH 135/519] Input: i8042 - add Lenovo LaVie Z to the i8042 reset
 list

commit 384cf4285b34e08917e3e66603382f2b0c4f6e1b upstream.

The Lenovo LaVie Z laptop requires i8042 to be reset in order to
consistently detect its Elantech touchpad. The nomux and kbdreset
quirks are not sufficient.

It's possible the other LaVie Z models from NEC require this as well.

Cc: stable@vger.kernel.org
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/serio/i8042-x86ia64io.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index e484ea2dc787..34be09651ee8 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -527,6 +527,13 @@ static const struct dmi_system_id __initconst i8042_dmi_nomux_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "N24_25BU"),
 		},
 	},
+	{
+		/* Lenovo LaVie Z */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo LaVie Z"),
+		},
+	},
 	{ }
 };
 

From 0179b1b364243aca8001ef1fbd51a1d4349547c5 Mon Sep 17 00:00:00 2001
From: KT Liao <kt.liao@emc.com.tw>
Date: Mon, 16 Jul 2018 12:10:03 +0000
Subject: [PATCH 136/519] Input: elan_i2c - add another ACPI ID for Lenovo
 Ideapad 330-15AST

commit 6f88a6439da5d94de334a341503bc2c7f4a7ea7f upstream.

Add ELAN0622 to ACPI mapping table to support Elan touchpad found in
Ideapad 330-15AST.

Signed-off-by: KT Liao <kt.liao@emc.com.tw>
Reported-by: Anant Shende <anantshende@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/mouse/elan_i2c_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c
index a7515f5dd76c..a716482774db 100644
--- a/drivers/input/mouse/elan_i2c_core.c
+++ b/drivers/input/mouse/elan_i2c_core.c
@@ -1252,6 +1252,7 @@ static const struct acpi_device_id elan_acpi_id[] = {
 	{ "ELAN0612", 0 },
 	{ "ELAN0618", 0 },
 	{ "ELAN061D", 0 },
+	{ "ELAN0622", 0 },
 	{ "ELAN1000", 0 },
 	{ }
 };

From b80091b0113c931894c87b034048f8677a53fe9f Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Tue, 24 Jul 2018 19:13:31 -0400
Subject: [PATCH 137/519] tracing: Fix double free of event_trigger_data

commit 1863c387259b629e4ebfb255495f67cd06aa229b upstream.

Running the following:

 # cd /sys/kernel/debug/tracing
 # echo 500000 > buffer_size_kb
[ Or some other number that takes up most of memory ]
 # echo snapshot > events/sched/sched_switch/trigger

Triggers the following bug:

 ------------[ cut here ]------------
 kernel BUG at mm/slub.c:296!
 invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC PTI
 CPU: 6 PID: 6878 Comm: bash Not tainted 4.18.0-rc6-test+ #1066
 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v03.03 07/14/2016
 RIP: 0010:kfree+0x16c/0x180
 Code: 05 41 0f b6 72 51 5b 5d 41 5c 4c 89 d7 e9 ac b3 f8 ff 48 89 d9 48 89 da 41 b8 01 00 00 00 5b 5d 41 5c 4c 89 d6 e9 f4 f3 ff ff <0f> 0b 0f 0b 48 8b 3d d9 d8 f9 00 e9 c1 fe ff ff 0f 1f 40 00 0f 1f
 RSP: 0018:ffffb654436d3d88 EFLAGS: 00010246
 RAX: ffff91a9d50f3d80 RBX: ffff91a9d50f3d80 RCX: ffff91a9d50f3d80
 RDX: 00000000000006a4 RSI: ffff91a9de5a60e0 RDI: ffff91a9d9803500
 RBP: ffffffff8d267c80 R08: 00000000000260e0 R09: ffffffff8c1a56be
 R10: fffff0d404543cc0 R11: 0000000000000389 R12: ffffffff8c1a56be
 R13: ffff91a9d9930e18 R14: ffff91a98c0c2890 R15: ffffffff8d267d00
 FS:  00007f363ea64700(0000) GS:ffff91a9de580000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 000055c1cacc8e10 CR3: 00000000d9b46003 CR4: 00000000001606e0
 Call Trace:
  event_trigger_callback+0xee/0x1d0
  event_trigger_write+0xfc/0x1a0
  __vfs_write+0x33/0x190
  ? handle_mm_fault+0x115/0x230
  ? _cond_resched+0x16/0x40
  vfs_write+0xb0/0x190
  ksys_write+0x52/0xc0
  do_syscall_64+0x5a/0x160
  entry_SYSCALL_64_after_hwframe+0x49/0xbe
 RIP: 0033:0x7f363e16ab50
 Code: 73 01 c3 48 8b 0d 38 83 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 79 db 2c 00 00 75 10 b8 01 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 1e e3 01 00 48 89 04 24
 RSP: 002b:00007fff9a4c6378 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
 RAX: ffffffffffffffda RBX: 0000000000000009 RCX: 00007f363e16ab50
 RDX: 0000000000000009 RSI: 000055c1cacc8e10 RDI: 0000000000000001
 RBP: 000055c1cacc8e10 R08: 00007f363e435740 R09: 00007f363ea64700
 R10: 0000000000000073 R11: 0000000000000246 R12: 0000000000000009
 R13: 0000000000000001 R14: 00007f363e4345e0 R15: 00007f363e4303c0
 Modules linked in: ip6table_filter ip6_tables snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_seq snd_seq_device i915 snd_pcm snd_timer i2c_i801 snd soundcore i2c_algo_bit drm_kms_helper
86_pkg_temp_thermal video kvm_intel kvm irqbypass wmi e1000e
 ---[ end trace d301afa879ddfa25 ]---

The cause is because the register_snapshot_trigger() call failed to
allocate the snapshot buffer, and then called unregister_trigger()
which freed the data that was passed to it. Then on return to the
function that called register_snapshot_trigger(), as it sees it
failed to register, it frees the trigger_data again and causes
a double free.

By calling event_trigger_init() on the trigger_data (which only ups
the reference counter for it), and then event_trigger_free() afterward,
the trigger_data would not get freed by the registering trigger function
as it would only up and lower the ref count for it. If the register
trigger function fails, then the event_trigger_free() called after it
will free the trigger data normally.

Link: http://lkml.kernel.org/r/20180724191331.738eb819@gandalf.local.home

Cc: stable@vger.kerne.org
Fixes: 93e31ffbf417 ("tracing: Add 'snapshot' event trigger command")
Reported-by: Masami Hiramatsu <mhiramat@kernel.org>
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/trace/trace_events_trigger.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 2172dd61577e..9d9cacf07ed4 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -663,6 +663,8 @@ event_trigger_callback(struct event_command *cmd_ops,
 		goto out_free;
 
  out_reg:
+	/* Up the trigger_data count to make sure reg doesn't free it on failure */
+	event_trigger_init(trigger_ops, trigger_data);
 	ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file);
 	/*
 	 * The above returns on success the # of functions enabled,
@@ -670,11 +672,13 @@ event_trigger_callback(struct event_command *cmd_ops,
 	 * Consider no functions a failure too.
 	 */
 	if (!ret) {
+		cmd_ops->unreg(glob, trigger_ops, trigger_data, file);
 		ret = -ENOENT;
-		goto out_free;
-	} else if (ret < 0)
-		goto out_free;
-	ret = 0;
+	} else if (ret > 0)
+		ret = 0;
+
+	/* Down the counter of trigger_data or free it if not used anymore */
+	event_trigger_free(trigger_ops, trigger_data);
  out:
 	return ret;
 

From 1a7f63b2a0911df09b4939995e3fe5daeb9753ef Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 25 Jul 2018 16:02:06 -0400
Subject: [PATCH 138/519] tracing: Fix possible double free in
 event_enable_trigger_func()

commit 15cc78644d0075e76d59476a4467e7143860f660 upstream.

There was a case that triggered a double free in event_trigger_callback()
due to the called reg() function freeing the trigger_data and then it
getting freed again by the error return by the caller. The solution there
was to up the trigger_data ref count.

Code inspection found that event_enable_trigger_func() has the same issue,
but is not as easy to trigger (requires harder to trigger failures). It
needs to be solved slightly different as it needs more to clean up when the
reg() function fails.

Link: http://lkml.kernel.org/r/20180725124008.7008e586@gandalf.local.home

Cc: stable@vger.kernel.org
Fixes: 7862ad1846e99 ("tracing: Add 'enable_event' and 'disable_event' event trigger commands")
Reivewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/trace/trace_events_trigger.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 9d9cacf07ed4..b8a894adab2c 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -1231,6 +1231,9 @@ event_enable_trigger_func(struct event_command *cmd_ops,
 		goto out;
 	}
 
+	/* Up the trigger_data count to make sure nothing frees it on failure */
+	event_trigger_init(trigger_ops, trigger_data);
+
 	if (trigger) {
 		number = strsep(&trigger, ":");
 
@@ -1281,6 +1284,7 @@ event_enable_trigger_func(struct event_command *cmd_ops,
 		goto out_disable;
 	/* Just return zero, not the number of enabled functions */
 	ret = 0;
+	event_trigger_free(trigger_ops, trigger_data);
  out:
 	return ret;
 
@@ -1291,7 +1295,7 @@ event_enable_trigger_func(struct event_command *cmd_ops,
  out_free:
 	if (cmd_ops->set_filter)
 		cmd_ops->set_filter(NULL, trigger_data, NULL);
-	kfree(trigger_data);
+	event_trigger_free(trigger_ops, trigger_data);
 	kfree(enable_data);
 	goto out;
 }

From d9bbd8076836b1c3e648132afe73185f945b89a7 Mon Sep 17 00:00:00 2001
From: Artem Savkov <asavkov@redhat.com>
Date: Wed, 25 Jul 2018 16:20:38 +0200
Subject: [PATCH 139/519] tracing/kprobes: Fix trace_probe flags on
 enable_trace_kprobe() failure

commit 57ea2a34adf40f3a6e88409aafcf803b8945619a upstream.

If enable_trace_kprobe fails to enable the probe in enable_k(ret)probe
it returns an error, but does not unset the tp flags it set previously.
This results in a probe being considered enabled and failures like being
unable to remove the probe through kprobe_events file since probes_open()
expects every probe to be disabled.

Link: http://lkml.kernel.org/r/20180725102826.8300-1-asavkov@redhat.com
Link: http://lkml.kernel.org/r/20180725142038.4765-1-asavkov@redhat.com

Cc: Ingo Molnar <mingo@redhat.com>
Cc: stable@vger.kernel.org
Fixes: 41a7dd420c57 ("tracing/kprobes: Support ftrace_event_file base multibuffer")
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Artem Savkov <asavkov@redhat.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/trace/trace_kprobe.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index f2682799c215..a357d7e93934 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -349,11 +349,10 @@ static struct trace_kprobe *find_trace_kprobe(const char *event,
 static int
 enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
 {
+	struct event_file_link *link;
 	int ret = 0;
 
 	if (file) {
-		struct event_file_link *link;
-
 		link = kmalloc(sizeof(*link), GFP_KERNEL);
 		if (!link) {
 			ret = -ENOMEM;
@@ -373,6 +372,16 @@ enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
 		else
 			ret = enable_kprobe(&tk->rp.kp);
 	}
+
+	if (ret) {
+		if (file) {
+			list_del_rcu(&link->list);
+			kfree(link);
+			tk->tp.flags &= ~TP_FLAG_TRACE;
+		} else {
+			tk->tp.flags &= ~TP_FLAG_PROFILE;
+		}
+	}
  out:
 	return ret;
 }

From dab04fda26530ee686b14b02aa2c5e716cef153c Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 25 Jul 2018 22:28:56 -0400
Subject: [PATCH 140/519] tracing: Quiet gcc warning about maybe unused link
 variable

commit 2519c1bbe38d7acacc9aacba303ca6f97482ed53 upstream.

Commit 57ea2a34adf4 ("tracing/kprobes: Fix trace_probe flags on
enable_trace_kprobe() failure") added an if statement that depends on another
if statement that gcc doesn't see will initialize the "link" variable and
gives the warning:

 "warning: 'link' may be used uninitialized in this function"

It is really a false positive, but to quiet the warning, and also to make
sure that it never actually is used uninitialized, initialize the "link"
variable to NULL and add an if (!WARN_ON_ONCE(!link)) where the compiler
thinks it could be used uninitialized.

Cc: stable@vger.kernel.org
Fixes: 57ea2a34adf4 ("tracing/kprobes: Fix trace_probe flags on enable_trace_kprobe() failure")
Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/trace/trace_kprobe.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index a357d7e93934..f0ee722be520 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -349,7 +349,7 @@ static struct trace_kprobe *find_trace_kprobe(const char *event,
 static int
 enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
 {
-	struct event_file_link *link;
+	struct event_file_link *link = NULL;
 	int ret = 0;
 
 	if (file) {
@@ -375,7 +375,9 @@ enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
 
 	if (ret) {
 		if (file) {
-			list_del_rcu(&link->list);
+			/* Notice the if is true on not WARN() */
+			if (!WARN_ON_ONCE(!link))
+				list_del_rcu(&link->list);
 			kfree(link);
 			tk->tp.flags &= ~TP_FLAG_TRACE;
 		} else {

From 2670ecf4711430d9ee263cad666b6ea2034952d5 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Tue, 12 Jun 2018 08:57:53 +0200
Subject: [PATCH 141/519] xen/netfront: raise max number of slots in
 xennet_get_responses()

[ Upstream commit 57f230ab04d2910a06d17d988f1c4d7586a59113 ]

The max number of slots used in xennet_get_responses() is set to
MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD).

In old kernel-xen MAX_SKB_FRAGS was 18, while nowadays it is 17. This
difference is resulting in frequent messages "too many slots" and a
reduced network throughput for some workloads (factor 10 below that of
a kernel-xen based guest).

Replacing MAX_SKB_FRAGS by XEN_NETIF_NR_SLOTS_MIN for calculation of
the max number of slots to use solves that problem (tests showed no
more messages "too many slots" and throughput was as high as with the
kernel-xen based guest system).

Replace MAX_SKB_FRAGS-2 by XEN_NETIF_NR_SLOTS_MIN-1 in
netfront_tx_slot_available() for making it clearer what is really being
tested without actually modifying the tested value.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/xen-netfront.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index a0de2453fa09..3bb3d6d9117c 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -238,7 +238,7 @@ static void rx_refill_timeout(unsigned long data)
 static int netfront_tx_slot_available(struct netfront_queue *queue)
 {
 	return (queue->tx.req_prod_pvt - queue->tx.rsp_cons) <
-		(NET_TX_RING_SIZE - MAX_SKB_FRAGS - 2);
+		(NET_TX_RING_SIZE - XEN_NETIF_NR_SLOTS_MIN - 1);
 }
 
 static void xennet_maybe_wake_tx(struct netfront_queue *queue)
@@ -775,7 +775,7 @@ static int xennet_get_responses(struct netfront_queue *queue,
 	RING_IDX cons = queue->rx.rsp_cons;
 	struct sk_buff *skb = xennet_get_rx_skb(queue, cons);
 	grant_ref_t ref = xennet_get_rx_ref(queue, cons);
-	int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
+	int max = XEN_NETIF_NR_SLOTS_MIN + (rx->status <= RX_COPY_THRESHOLD);
 	int slots = 1;
 	int err = 0;
 	unsigned long ret;

From 55e526aa96db8a70b69524cbdbb8168a20e4ea38 Mon Sep 17 00:00:00 2001
From: Zhouyang Jia <jiazhouyang09@gmail.com>
Date: Mon, 11 Jun 2018 16:18:40 +0800
Subject: [PATCH 142/519] ALSA: emu10k1: add error handling for snd_ctl_add

[ Upstream commit 6d531e7b972cb62ded011c2dfcc2d9f72ea6c421 ]

When snd_ctl_add fails, the lack of error-handling code may
cause unexpected results.

This patch adds error-handling code after calling snd_ctl_add.

Signed-off-by: Zhouyang Jia <jiazhouyang09@gmail.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/emu10k1/emupcm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sound/pci/emu10k1/emupcm.c b/sound/pci/emu10k1/emupcm.c
index 14a305bd8a98..72e442d86bb1 100644
--- a/sound/pci/emu10k1/emupcm.c
+++ b/sound/pci/emu10k1/emupcm.c
@@ -1850,7 +1850,9 @@ int snd_emu10k1_pcm_efx(struct snd_emu10k1 *emu, int device)
 	if (!kctl)
 		return -ENOMEM;
 	kctl->id.device = device;
-	snd_ctl_add(emu->card, kctl);
+	err = snd_ctl_add(emu->card, kctl);
+	if (err < 0)
+		return err;
 
 	snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV, snd_dma_pci_data(emu->pci), 64*1024, 64*1024);
 

From 8edf67eda05a8aa9d1e1f0d8c3c9f98cd1cb93f7 Mon Sep 17 00:00:00 2001
From: Zhouyang Jia <jiazhouyang09@gmail.com>
Date: Mon, 11 Jun 2018 16:04:06 +0800
Subject: [PATCH 143/519] ALSA: fm801: add error handling for snd_ctl_add

[ Upstream commit ef1ffbe7889e99f5b5cccb41c89e5c94f50f3218 ]

When snd_ctl_add fails, the lack of error-handling code may
cause unexpected results.

This patch adds error-handling code after calling snd_ctl_add.

Signed-off-by: Zhouyang Jia <jiazhouyang09@gmail.com>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/fm801.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/sound/pci/fm801.c b/sound/pci/fm801.c
index 1fdd92b6f18f..d6e89a6d0bb9 100644
--- a/sound/pci/fm801.c
+++ b/sound/pci/fm801.c
@@ -1050,11 +1050,19 @@ static int snd_fm801_mixer(struct fm801 *chip)
 		if ((err = snd_ac97_mixer(chip->ac97_bus, &ac97, &chip->ac97_sec)) < 0)
 			return err;
 	}
-	for (i = 0; i < FM801_CONTROLS; i++)
-		snd_ctl_add(chip->card, snd_ctl_new1(&snd_fm801_controls[i], chip));
+	for (i = 0; i < FM801_CONTROLS; i++) {
+		err = snd_ctl_add(chip->card,
+			snd_ctl_new1(&snd_fm801_controls[i], chip));
+		if (err < 0)
+			return err;
+	}
 	if (chip->multichannel) {
-		for (i = 0; i < FM801_CONTROLS_MULTI; i++)
-			snd_ctl_add(chip->card, snd_ctl_new1(&snd_fm801_controls_multi[i], chip));
+		for (i = 0; i < FM801_CONTROLS_MULTI; i++) {
+			err = snd_ctl_add(chip->card,
+				snd_ctl_new1(&snd_fm801_controls_multi[i], chip));
+			if (err < 0)
+				return err;
+		}
 	}
 	return 0;
 }

From e6960af9566e9aeee3db07ac2d6a4f70f5731948 Mon Sep 17 00:00:00 2001
From: Scott Mayhew <smayhew@redhat.com>
Date: Fri, 8 Jun 2018 16:31:46 -0400
Subject: [PATCH 144/519] nfsd: fix potential use-after-free in
 nfsd4_decode_getdeviceinfo

[ Upstream commit 3171822fdcdd6e6d536047c425af6dc7a92dc585 ]

When running a fuzz tester against a KASAN-enabled kernel, the following
splat periodically occurs.

The problem occurs when the test sends a GETDEVICEINFO request with a
malformed xdr array (size but no data) for gdia_notify_types and the
array size is > 0x3fffffff, which results in an overflow in the value of
nbytes which is passed to read_buf().

If the array size is 0x40000000, 0x80000000, or 0xc0000000, then after
the overflow occurs, the value of nbytes 0, and when that happens the
pointer returned by read_buf() points to the end of the xdr data (i.e.
argp->end) when really it should be returning NULL.

Fix this by returning NFS4ERR_BAD_XDR if the array size is > 1000 (this
value is arbitrary, but it's the same threshold used by
nfsd4_decode_bitmap()... in could really be any value >= 1 since it's
expected to get at most a single bitmap in gdia_notify_types).

[  119.256854] ==================================================================
[  119.257611] BUG: KASAN: use-after-free in nfsd4_decode_getdeviceinfo+0x5a4/0x5b0 [nfsd]
[  119.258422] Read of size 4 at addr ffff880113ada000 by task nfsd/538

[  119.259146] CPU: 0 PID: 538 Comm: nfsd Not tainted 4.17.0+ #1
[  119.259662] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.3-1.fc25 04/01/2014
[  119.261202] Call Trace:
[  119.262265]  dump_stack+0x71/0xab
[  119.263371]  print_address_description+0x6a/0x270
[  119.264609]  kasan_report+0x258/0x380
[  119.265854]  ? nfsd4_decode_getdeviceinfo+0x5a4/0x5b0 [nfsd]
[  119.267291]  nfsd4_decode_getdeviceinfo+0x5a4/0x5b0 [nfsd]
[  119.268549]  ? nfs4svc_decode_compoundargs+0xa5b/0x13c0 [nfsd]
[  119.269873]  ? nfsd4_decode_sequence+0x490/0x490 [nfsd]
[  119.271095]  nfs4svc_decode_compoundargs+0xa5b/0x13c0 [nfsd]
[  119.272393]  ? nfsd4_release_compoundargs+0x1b0/0x1b0 [nfsd]
[  119.273658]  nfsd_dispatch+0x183/0x850 [nfsd]
[  119.274918]  svc_process+0x161c/0x31a0 [sunrpc]
[  119.276172]  ? svc_printk+0x190/0x190 [sunrpc]
[  119.277386]  ? svc_xprt_release+0x451/0x680 [sunrpc]
[  119.278622]  nfsd+0x2b9/0x430 [nfsd]
[  119.279771]  ? nfsd_destroy+0x1c0/0x1c0 [nfsd]
[  119.281157]  kthread+0x2db/0x390
[  119.282347]  ? kthread_create_worker_on_cpu+0xc0/0xc0
[  119.283756]  ret_from_fork+0x35/0x40

[  119.286041] Allocated by task 436:
[  119.287525]  kasan_kmalloc+0xa0/0xd0
[  119.288685]  kmem_cache_alloc+0xe9/0x1f0
[  119.289900]  get_empty_filp+0x7b/0x410
[  119.291037]  path_openat+0xca/0x4220
[  119.292242]  do_filp_open+0x182/0x280
[  119.293411]  do_sys_open+0x216/0x360
[  119.294555]  do_syscall_64+0xa0/0x2f0
[  119.295721]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

[  119.298068] Freed by task 436:
[  119.299271]  __kasan_slab_free+0x130/0x180
[  119.300557]  kmem_cache_free+0x78/0x210
[  119.301823]  rcu_process_callbacks+0x35b/0xbd0
[  119.303162]  __do_softirq+0x192/0x5ea

[  119.305443] The buggy address belongs to the object at ffff880113ada000
                which belongs to the cache filp of size 256
[  119.308556] The buggy address is located 0 bytes inside of
                256-byte region [ffff880113ada000, ffff880113ada100)
[  119.311376] The buggy address belongs to the page:
[  119.312728] page:ffffea00044eb680 count:1 mapcount:0 mapping:0000000000000000 index:0xffff880113ada780
[  119.314428] flags: 0x17ffe000000100(slab)
[  119.315740] raw: 0017ffe000000100 0000000000000000 ffff880113ada780 00000001000c0001
[  119.317379] raw: ffffea0004553c60 ffffea00045c11e0 ffff88011b167e00 0000000000000000
[  119.319050] page dumped because: kasan: bad access detected

[  119.321652] Memory state around the buggy address:
[  119.322993]  ffff880113ad9f00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[  119.324515]  ffff880113ad9f80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[  119.326087] >ffff880113ada000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  119.327547]                    ^
[  119.328730]  ffff880113ada080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  119.330218]  ffff880113ada100: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb
[  119.331740] ==================================================================

Signed-off-by: Scott Mayhew <smayhew@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfsd/nfs4xdr.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 57e3262ec57a..ee0da259a3d3 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1538,6 +1538,8 @@ nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp,
 	gdev->gd_maxcount = be32_to_cpup(p++);
 	num = be32_to_cpup(p++);
 	if (num) {
+		if (num > 1000)
+			goto xdr_error;
 		READ_BUF(4 * num);
 		gdev->gd_notify_types = be32_to_cpup(p++);
 		for (i = 1; i < num; i++) {

From b752dfd91a3e9712aa68468b63725c927adcfaa4 Mon Sep 17 00:00:00 2001
From: Chintan Pandya <cpandya@codeaurora.org>
Date: Thu, 7 Jun 2018 17:06:50 -0700
Subject: [PATCH 145/519] mm: vmalloc: avoid racy handling of debugobjects in
 vunmap

[ Upstream commit f3c01d2f3ade6790db67f80fef60df84424f8964 ]

Currently, __vunmap flow is,
 1) Release the VM area
 2) Free the debug objects corresponding to that vm area.

This leave some race window open.
 1) Release the VM area
 1.5) Some other client gets the same vm area
 1.6) This client allocates new debug objects on the same
      vm area
 2) Free the debug objects corresponding to this vm area.

Here, we actually free 'other' client's debug objects.

Fix this by freeing the debug objects first and then releasing the VM
area.

Link: http://lkml.kernel.org/r/1523961828-9485-2-git-send-email-cpandya@codeaurora.org
Signed-off-by: Chintan Pandya <cpandya@codeaurora.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Byungchul Park <byungchul.park@lge.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Yang <richard.weiyang@gmail.com>
Cc: Yisheng Xie <xieyisheng1@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/vmalloc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 8e3c9c5a3042..de8e372ece04 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1460,7 +1460,7 @@ static void __vunmap(const void *addr, int deallocate_pages)
 			addr))
 		return;
 
-	area = remove_vm_area(addr);
+	area = find_vmap_area((unsigned long)addr)->vm;
 	if (unlikely(!area)) {
 		WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
 				addr);
@@ -1470,6 +1470,7 @@ static void __vunmap(const void *addr, int deallocate_pages)
 	debug_check_no_locks_freed(addr, get_vm_area_size(area));
 	debug_check_no_obj_freed(addr, get_vm_area_size(area));
 
+	remove_vm_area(addr);
 	if (deallocate_pages) {
 		int i;
 

From 3231613224c076ed1f0984e382cbd2df0d64c260 Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Thu, 7 Jun 2018 17:05:17 -0700
Subject: [PATCH 146/519] mm/slub.c: add __printf verification to slab_err()

[ Upstream commit a38965bf941b7c2af50de09c96bc5f03e136caef ]

__printf is useful to verify format and arguments.  Remove the following
warning (with W=1):

  mm/slub.c:721:2: warning: function might be possible candidate for `gnu_printf' format attribute [-Wsuggest-attribute=format]

Link: http://lkml.kernel.org/r/20180505200706.19986-1-malat@debian.org
Signed-off-by: Mathieu Malaterre <malat@debian.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/slub.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/slub.c b/mm/slub.c
index 4cf3a9c768b1..2284c4333857 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -659,7 +659,7 @@ void object_err(struct kmem_cache *s, struct page *page,
 	print_trailer(s, page, object);
 }
 
-static void slab_err(struct kmem_cache *s, struct page *page,
+static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
 			const char *fmt, ...)
 {
 	va_list args;

From af1ecd4b5f740c30f7ed72bb8af0d7f74cc1b88d Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Tue, 5 Jun 2018 23:09:14 +0200
Subject: [PATCH 147/519] rtc: ensure rtc_set_alarm fails when alarms are not
 supported

[ Upstream commit abfdff44bc38e9e2ef7929f633fb8462632299d4 ]

When using RTC_ALM_SET or RTC_WKALM_SET with rtc_wkalrm.enabled not set,
rtc_timer_enqueue() is not called and rtc_set_alarm() may succeed but the
subsequent RTC_AIE_ON ioctl will fail. RTC_ALM_READ would also fail in that
case.

Ensure rtc_set_alarm() fails when alarms are not supported to avoid letting
programs think the alarms are working for a particular RTC when they are
not.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/rtc/interface.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index c2cf9485fe32..8c10f3db6336 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -349,6 +349,11 @@ int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 {
 	int err;
 
+	if (!rtc->ops)
+		return -ENODEV;
+	else if (!rtc->ops->set_alarm)
+		return -EINVAL;
+
 	err = rtc_valid_tm(&alarm->time);
 	if (err != 0)
 		return err;

From d77ff3576f734e5cc457679b8a5e21f413dbb68b Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Thu, 31 May 2018 18:45:21 +0200
Subject: [PATCH 148/519] netfilter: ipset: List timing out entries with
 "timeout 1" instead of zero

[ Upstream commit bd975e691486ba52790ba23cc9b4fecab7bc0d31 ]

When listing sets with timeout support, there's a probability that
just timing out entries with "0" timeout value is listed/saved.
However when restoring the saved list, the zero timeout value means
permanent elelements.

The new behaviour is that timing out entries are listed with "timeout 1"
instead of zero.

Fixes netfilter bugzilla #1258.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/netfilter/ipset/ip_set_timeout.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h
index 1d6a935c1ac5..8793f5a7b820 100644
--- a/include/linux/netfilter/ipset/ip_set_timeout.h
+++ b/include/linux/netfilter/ipset/ip_set_timeout.h
@@ -65,8 +65,14 @@ ip_set_timeout_set(unsigned long *timeout, u32 value)
 static inline u32
 ip_set_timeout_get(unsigned long *timeout)
 {
-	return *timeout == IPSET_ELEM_PERMANENT ? 0 :
-		jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC;
+	u32 t;
+
+	if (*timeout == IPSET_ELEM_PERMANENT)
+		return 0;
+
+	t = jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC;
+	/* Zero value in userspace means no timeout */
+	return t == 0 ? 1 : t;
 }
 
 #endif	/* __KERNEL__ */

From 52175c849bd4c01dd14038b4401d5044d99a6b0a Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Fri, 1 Jun 2018 11:31:44 -0700
Subject: [PATCH 149/519] infiniband: fix a possible use-after-free bug

[ Upstream commit cb2595c1393b4a5211534e6f0a0fbad369e21ad8 ]

ucma_process_join() will free the new allocated "mc" struct,
if there is any error after that, especially the copy_to_user().

But in parallel, ucma_leave_multicast() could find this "mc"
through idr_find() before ucma_process_join() frees it, since it
is already published.

So "mc" could be used in ucma_leave_multicast() after it is been
allocated and freed in ucma_process_join(), since we don't refcnt
it.

Fix this by separating "publish" from ID allocation, so that we
can get an ID first and publish it later after copy_to_user().

Fixes: c8f6a362bf3e ("RDMA/cma: Add multicast communication support")
Reported-by: Noam Rathaus <noamr@beyondsecurity.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/core/ucma.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 795938edce3f..55aa8d3d752f 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -217,7 +217,7 @@ static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx)
 		return NULL;
 
 	mutex_lock(&mut);
-	mc->id = idr_alloc(&multicast_idr, mc, 0, 0, GFP_KERNEL);
+	mc->id = idr_alloc(&multicast_idr, NULL, 0, 0, GFP_KERNEL);
 	mutex_unlock(&mut);
 	if (mc->id < 0)
 		goto error;
@@ -1375,6 +1375,10 @@ static ssize_t ucma_process_join(struct ucma_file *file,
 		goto err3;
 	}
 
+	mutex_lock(&mut);
+	idr_replace(&multicast_idr, mc, mc->id);
+	mutex_unlock(&mut);
+
 	mutex_unlock(&file->mut);
 	ucma_put_ctx(ctx);
 	return 0;

From c48557366d5244b7736b218633c5ef7a55c18f53 Mon Sep 17 00:00:00 2001
From: Stewart Smith <stewart@linux.ibm.com>
Date: Thu, 29 Mar 2018 17:02:46 +1100
Subject: [PATCH 150/519] hvc_opal: don't set tb_ticks_per_usec in
 udbg_init_opal_common()

[ Upstream commit 447808bf500a7cc92173266a59f8a494e132b122 ]

time_init() will set up tb_ticks_per_usec based on reality.
time_init() is called *after* udbg_init_opal_common() during boot.

from arch/powerpc/kernel/time.c:
  unsigned long tb_ticks_per_usec = 100; /* sane default */

Currently, all powernv systems have a timebase frequency of 512mhz
(512000000/1000000 == 0x200) - although there's nothing written
down anywhere that I can find saying that we couldn't make that
different based on the requirements in the ISA.

So, we've been (accidentally) thwacking the (currently) correct
(for powernv at least) value for tb_ticks_per_usec earlier than
we otherwise would have.

The "sane default" seems to be adequate for our purposes between
udbg_init_opal_common() and time_init() being called, and if it isn't,
then we should probably be setting it somewhere that isn't hvc_opal.c!

Signed-off-by: Stewart Smith <stewart@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/hvc/hvc_opal.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/tty/hvc/hvc_opal.c b/drivers/tty/hvc/hvc_opal.c
index 47b54c6aefd2..9f660e55d1ba 100644
--- a/drivers/tty/hvc/hvc_opal.c
+++ b/drivers/tty/hvc/hvc_opal.c
@@ -323,7 +323,6 @@ static void udbg_init_opal_common(void)
 	udbg_putc = udbg_opal_putc;
 	udbg_getc = udbg_opal_getc;
 	udbg_getc_poll = udbg_opal_getc_poll;
-	tb_ticks_per_usec = 0x200; /* Make udelay not suck */
 }
 
 void __init hvc_opal_init_early(void)

From b5145f8b3506ad25b4c31970057fabca1543b325 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 30 May 2018 20:31:22 +1000
Subject: [PATCH 151/519] powerpc/64s: Fix compiler store ordering to SLB
 shadow area

[ Upstream commit 926bc2f100c24d4842b3064b5af44ae964c1d81c ]

The stores to update the SLB shadow area must be made as they appear
in the C code, so that the hypervisor does not see an entry with
mismatched vsid and esid. Use WRITE_ONCE for this.

GCC has been observed to elide the first store to esid in the update,
which means that if the hypervisor interrupts the guest after storing
to vsid, it could see an entry with old esid and new vsid, which may
possibly result in memory corruption.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/mm/slb.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 515730e499fe..309027208f7c 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -69,14 +69,14 @@ static inline void slb_shadow_update(unsigned long ea, int ssize,
 	 * updating it.  No write barriers are needed here, provided
 	 * we only update the current CPU's SLB shadow buffer.
 	 */
-	p->save_area[index].esid = 0;
-	p->save_area[index].vsid = cpu_to_be64(mk_vsid_data(ea, ssize, flags));
-	p->save_area[index].esid = cpu_to_be64(mk_esid_data(ea, ssize, index));
+	WRITE_ONCE(p->save_area[index].esid, 0);
+	WRITE_ONCE(p->save_area[index].vsid, cpu_to_be64(mk_vsid_data(ea, ssize, flags)));
+	WRITE_ONCE(p->save_area[index].esid, cpu_to_be64(mk_esid_data(ea, ssize, index)));
 }
 
 static inline void slb_shadow_clear(enum slb_index index)
 {
-	get_slb_shadow()->save_area[index].esid = 0;
+	WRITE_ONCE(get_slb_shadow()->save_area[index].esid, 0);
 }
 
 static inline void create_shadowed_slbe(unsigned long ea, int ssize,

From ded30c95e8588ccdd9f281f36c0d37d7d030ef9d Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Tue, 29 May 2018 14:56:19 +0300
Subject: [PATCH 152/519] RDMA/mad: Convert BUG_ONs to error flows

[ Upstream commit 2468b82d69e3a53d024f28d79ba0fdb8bf43dfbf ]

Let's perform checks in-place instead of BUG_ONs.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/core/mad.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 8d84c563ba75..616173b7a5e8 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -1548,7 +1548,8 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
 			    mad_reg_req->oui, 3)) {
 			method = &(*vendor_table)->vendor_class[
 						vclass]->method_table[i];
-			BUG_ON(!*method);
+			if (!*method)
+				goto error3;
 			goto check_in_use;
 		}
 	}
@@ -1558,10 +1559,12 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
 				vclass]->oui[i])) {
 			method = &(*vendor_table)->vendor_class[
 				vclass]->method_table[i];
-			BUG_ON(*method);
 			/* Allocate method table for this OUI */
-			if ((ret = allocate_method_table(method)))
-				goto error3;
+			if (!*method) {
+				ret = allocate_method_table(method);
+				if (ret)
+					goto error3;
+			}
 			memcpy((*vendor_table)->vendor_class[vclass]->oui[i],
 			       mad_reg_req->oui, 3);
 			goto check_in_use;

From 7a5468e02be689557fe4fead5d4897adc5c6932f Mon Sep 17 00:00:00 2001
From: Anatoly Pugachev <matorola@gmail.com>
Date: Mon, 28 May 2018 02:06:37 +0300
Subject: [PATCH 153/519] disable loading f2fs module on PAGE_SIZE > 4KB

[ Upstream commit 4071e67cffcc5c2a007116a02437471351f550eb ]

The following patch disables loading of f2fs module on architectures
which have PAGE_SIZE > 4096 , since it is impossible to mount f2fs on
such architectures , log messages are:

mount: /mnt: wrong fs type, bad option, bad superblock on
/dev/vdiskb1, missing codepage or helper program, or other error.
/dev/vdiskb1: F2FS filesystem,
UUID=1d8b9ca4-2389-4910-af3b-10998969f09c, volume name ""

May 15 18:03:13 ttip kernel: F2FS-fs (vdiskb1): Invalid
page_cache_size (8192), supports only 4KB
May 15 18:03:13 ttip kernel: F2FS-fs (vdiskb1): Can't find valid F2FS
filesystem in 1th superblock
May 15 18:03:13 ttip kernel: F2FS-fs (vdiskb1): Invalid
page_cache_size (8192), supports only 4KB
May 15 18:03:13 ttip kernel: F2FS-fs (vdiskb1): Can't find valid F2FS
filesystem in 2th superblock
May 15 18:03:13 ttip kernel: F2FS-fs (vdiskb1): Invalid
page_cache_size (8192), supports only 4KB

which was introduced by git commit 5c9b469295fb6b10d98923eab5e79c4edb80ed20

tested on git kernel 4.17.0-rc6-00309-gec30dcf7f425

with patch applied:

modprobe: ERROR: could not insert 'f2fs': Invalid argument
May 28 01:40:28 v215 kernel: F2FS not supported on PAGE_SIZE(8192) != 4096

Signed-off-by: Anatoly Pugachev <matorola@gmail.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/f2fs/super.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 4f666368aa85..6cc67e1bbb41 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1566,6 +1566,12 @@ static int __init init_f2fs_fs(void)
 {
 	int err;
 
+	if (PAGE_SIZE != F2FS_BLKSIZE) {
+		printk("F2FS not supported on PAGE_SIZE(%lu) != %d\n",
+				PAGE_SIZE, F2FS_BLKSIZE);
+		return -EINVAL;
+	}
+
 	f2fs_build_trace_ios();
 
 	err = init_inodecache();

From 52941707f789de36db5a12cd4e585a77140d546f Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Sat, 26 May 2018 18:03:34 +0800
Subject: [PATCH 154/519] f2fs: fix to don't trigger writeback during recovery

[ Upstream commit 64c74a7ab505ea40d1b3e5d02735ecab08ae1b14 ]

- f2fs_fill_super
 - recover_fsync_data
  - recover_data
   - del_fsync_inode
    - iput
     - iput_final
      - write_inode_now
       - f2fs_write_inode
        - f2fs_balance_fs
         - f2fs_balance_fs_bg
          - sync_dirty_inodes

With data_flush mount option, during recovery, in order to avoid entering
above writeback flow, let's detect recovery status and do skip in
f2fs_balance_fs_bg.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Yunlei He <heyunlei@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/f2fs/segment.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index f77b3258454a..2bba0c4ef4b7 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -295,6 +295,9 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi)
 
 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
 {
+	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
+		return;
+
 	/* try to shrink extent cache when there is no enough memory */
 	if (!available_free_memory(sbi, EXTENT_CACHE))
 		f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);

From 3b14ad7ca75ec8600a73538cc6240be008683585 Mon Sep 17 00:00:00 2001
From: "Shuah Khan (Samsung OSG)" <shuah@kernel.org>
Date: Tue, 29 May 2018 16:13:03 -0600
Subject: [PATCH 155/519] usbip: usbip_detach: Fix memory, udev context and
 udev leak

[ Upstream commit d179f99a651685b19333360e6558110da2fe9bd7 ]

detach_port() fails to call usbip_vhci_driver_close() from its error
path after usbip_vhci_detach_device() returns failure, leaking memory
allocated in usbip_vhci_driver_open() and holding udev_context and udev
references. Fix it to call usbip_vhci_driver_close().

Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/usb/usbip/src/usbip_detach.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tools/usb/usbip/src/usbip_detach.c b/tools/usb/usbip/src/usbip_detach.c
index 9db9d21bb2ec..6a8db858caa5 100644
--- a/tools/usb/usbip/src/usbip_detach.c
+++ b/tools/usb/usbip/src/usbip_detach.c
@@ -43,7 +43,7 @@ void usbip_detach_usage(void)
 
 static int detach_port(char *port)
 {
-	int ret;
+	int ret = 0;
 	uint8_t portnum;
 	char path[PATH_MAX+1];
 
@@ -73,9 +73,12 @@ static int detach_port(char *port)
 	}
 
 	ret = usbip_vhci_detach_device(portnum);
-	if (ret < 0)
-		return -1;
+	if (ret < 0) {
+		ret = -1;
+		goto call_driver_close;
+	}
 
+call_driver_close:
 	usbip_vhci_driver_close();
 
 	return ret;

From b4d9e5e88b8f0d71b00928bf8c5d952ce6fbdcc9 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 3 May 2018 11:25:08 -0700
Subject: [PATCH 156/519] perf/x86/intel/uncore: Correct fixed counter index
 check in generic code

[ Upstream commit 4749f8196452eeb73cf2086a6a9705bae479d33d ]

There is no index which is bigger than UNCORE_PMC_IDX_FIXED. The only
exception is client IMC uncore, which has been specially handled.
For generic code, it is not correct to use >= to check fixed counter.
The code quality issue will bring problem when a new counter index is
introduced.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: acme@kernel.org
Cc: eranian@google.com
Link: http://lkml.kernel.org/r/1525371913-10597-3-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/perf_event_intel_uncore.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 61215a69b03d..b22e9c4dd111 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -229,7 +229,7 @@ void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *e
 	u64 prev_count, new_count, delta;
 	int shift;
 
-	if (event->hw.idx >= UNCORE_PMC_IDX_FIXED)
+	if (event->hw.idx == UNCORE_PMC_IDX_FIXED)
 		shift = 64 - uncore_fixed_ctr_bits(box);
 	else
 		shift = 64 - uncore_perf_ctr_bits(box);

From d6a260fe056bef936912f7f9f119a996a1bebe70 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 3 May 2018 11:25:07 -0700
Subject: [PATCH 157/519] perf/x86/intel/uncore: Correct fixed counter index
 check for NHM

[ Upstream commit d71f11c076c420c4e2fceb4faefa144e055e0935 ]

For Nehalem and Westmere, there is only one fixed counter for W-Box.
There is no index which is bigger than UNCORE_PMC_IDX_FIXED.
It is not correct to use >= to check fixed counter.
The code quality issue will bring problem when new counter index is
introduced.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: acme@kernel.org
Cc: eranian@google.com
Link: http://lkml.kernel.org/r/1525371913-10597-2-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c
index 2749965afed0..83cadc2605a7 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c
@@ -240,7 +240,7 @@ static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct p
 {
 	struct hw_perf_event *hwc = &event->hw;
 
-	if (hwc->idx >= UNCORE_PMC_IDX_FIXED)
+	if (hwc->idx == UNCORE_PMC_IDX_FIXED)
 		wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0);
 	else if (box->pmu->type->event_mask & NHMEX_PMON_CTL_EN_BIT0)
 		wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22);

From 8477cd5e00234d26c58ffe5cf99a7dde57841315 Mon Sep 17 00:00:00 2001
From: Shaul Triebitz <shaul.triebitz@intel.com>
Date: Thu, 22 Mar 2018 14:14:45 +0200
Subject: [PATCH 158/519] iwlwifi: pcie: fix race in Rx buffer allocator

[ Upstream commit 0f22e40053bd5378ad1e3250e65c574fd61c0cd6 ]

Make sure the rx_allocator worker is canceled before running the
rx_init routine.  rx_init frees and re-allocates all rxb's pages.  The
rx_allocator worker also allocates pages for the used rxb's.  Running
rx_init and rx_allocator simultaniously causes a kernel panic.  Fix
that by canceling the work in rx_init.

Signed-off-by: Shaul Triebitz <shaul.triebitz@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/iwlwifi/pcie/rx.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/iwlwifi/pcie/rx.c b/drivers/net/wireless/iwlwifi/pcie/rx.c
index e06591f625c4..d6f9858ff2de 100644
--- a/drivers/net/wireless/iwlwifi/pcie/rx.c
+++ b/drivers/net/wireless/iwlwifi/pcie/rx.c
@@ -713,6 +713,8 @@ int iwl_pcie_rx_init(struct iwl_trans *trans)
 						WQ_HIGHPRI | WQ_UNBOUND, 1);
 	INIT_WORK(&rba->rx_alloc, iwl_pcie_rx_allocator_work);
 
+	cancel_work_sync(&rba->rx_alloc);
+
 	spin_lock(&rba->lock);
 	atomic_set(&rba->req_pending, 0);
 	atomic_set(&rba->req_ready, 0);

From 6d143e2c4545d7e2a13b22a5fcfcc98b38b9457d Mon Sep 17 00:00:00 2001
From: Thierry Escande <thierry.escande@linaro.org>
Date: Tue, 29 May 2018 18:37:16 +0200
Subject: [PATCH 159/519] Bluetooth: hci_qca: Fix "Sleep inside atomic section"
 warning

[ Upstream commit 9960521c44a5d828f29636ceac0600603ecbddbf ]

This patch fixes the following warning during boot:

 do not call blocking ops when !TASK_RUNNING; state=1 set at
 [<(ptrval)>] qca_setup+0x194/0x750 [hci_uart]
 WARNING: CPU: 2 PID: 1878 at kernel/sched/core.c:6135
 __might_sleep+0x7c/0x88

In qca_set_baudrate(), the current task state is set to
TASK_UNINTERRUPTIBLE before going to sleep for 300ms. It was then
restored to TASK_INTERRUPTIBLE. This patch sets the current task state
back to TASK_RUNNING instead.

Signed-off-by: Thierry Escande <thierry.escande@linaro.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/bluetooth/hci_qca.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index 476d39c7ba20..ecfb9ed2cff6 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -884,7 +884,7 @@ static int qca_set_baudrate(struct hci_dev *hdev, uint8_t baudrate)
 	 */
 	set_current_state(TASK_UNINTERRUPTIBLE);
 	schedule_timeout(msecs_to_jiffies(BAUDRATE_SETTLE_TIMEOUT_MS));
-	set_current_state(TASK_INTERRUPTIBLE);
+	set_current_state(TASK_RUNNING);
 
 	return 0;
 }

From 276a8ad16b34837b3ad586beb7339057e48510a7 Mon Sep 17 00:00:00 2001
From: Jian-Hong Pan <jian-hong@endlessm.com>
Date: Mon, 21 May 2018 18:09:20 +0800
Subject: [PATCH 160/519] Bluetooth: btusb: Add a new Realtek 8723DE ID
 2ff8:b011

[ Upstream commit 66d9975c5a7c40aa7e4bb0ec0b0c37ba1f190923 ]

Without this patch we cannot turn on the Bluethooth adapter on ASUS
E406MA.

T:  Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#=  2 Spd=12   MxCh= 0
D:  Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs=  1
P:  Vendor=2ff8 ProdID=b011 Rev= 2.00
S:  Manufacturer=Realtek
S:  Product=802.11n WLAN Adapter
S:  SerialNumber=00e04c000001
C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=500mA
I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=81(I) Atr=03(Int.) MxPS=  16 Ivl=1ms
E:  Ad=02(O) Atr=02(Bulk) MxPS=  64 Ivl=0ms
E:  Ad=82(I) Atr=02(Bulk) MxPS=  64 Ivl=0ms
I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=   0 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=   0 Ivl=1ms
I:  If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=   9 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=   9 Ivl=1ms
I:  If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  17 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  17 Ivl=1ms
I:  If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  25 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  25 Ivl=1ms
I:  If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  33 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  33 Ivl=1ms
I:  If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  49 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  49 Ivl=1ms

Signed-off-by: Jian-Hong Pan <jian-hong@endlessm.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/bluetooth/btusb.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 91676535a1a3..4a899b41145e 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -339,6 +339,9 @@ static const struct usb_device_id blacklist_table[] = {
 	/* Additional Realtek 8723BU Bluetooth devices */
 	{ USB_DEVICE(0x7392, 0xa611), .driver_info = BTUSB_REALTEK },
 
+	/* Additional Realtek 8723DE Bluetooth devices */
+	{ USB_DEVICE(0x2ff8, 0xb011), .driver_info = BTUSB_REALTEK },
+
 	/* Additional Realtek 8821AE Bluetooth devices */
 	{ USB_DEVICE(0x0b05, 0x17dc), .driver_info = BTUSB_REALTEK },
 	{ USB_DEVICE(0x13d3, 0x3414), .driver_info = BTUSB_REALTEK },

From 8fe7c570ce064514b0b928089966e551bafc443b Mon Sep 17 00:00:00 2001
From: Kai Chieh Chuang <kaichieh.chuang@mediatek.com>
Date: Mon, 28 May 2018 10:18:18 +0800
Subject: [PATCH 161/519] ASoC: dpcm: fix BE dai not hw_free and shutdown

[ Upstream commit 9c0ac70ad24d76b873c1551e27790c7f6a815d5c ]

In case, one BE is used by two FE1/FE2
FE1--->BE-->
       |
FE2----]
when FE1/FE2 call dpcm_be_dai_hw_free() together
the BE users will be 2 (> 1), hence cannot be hw_free
the be state will leave at, ex. SND_SOC_DPCM_STATE_STOP

later FE1/FE2 call dpcm_be_dai_shutdown(),
will be skip due to wrong state.
leaving the BE not being hw_free and shutdown.

The BE dai will be hw_free later when calling
dpcm_be_dai_shutdown() if still in invalid state.

Signed-off-by: KaiChieh Chuang <kaichieh.chuang@mediatek.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/soc-pcm.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 977066ba1769..43b80db952d1 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -1682,8 +1682,10 @@ int dpcm_be_dai_shutdown(struct snd_soc_pcm_runtime *fe, int stream)
 			continue;
 
 		if ((be->dpcm[stream].state != SND_SOC_DPCM_STATE_HW_FREE) &&
-		    (be->dpcm[stream].state != SND_SOC_DPCM_STATE_OPEN))
-			continue;
+		    (be->dpcm[stream].state != SND_SOC_DPCM_STATE_OPEN)) {
+			soc_pcm_hw_free(be_substream);
+			be->dpcm[stream].state = SND_SOC_DPCM_STATE_HW_FREE;
+		}
 
 		dev_dbg(be->dev, "ASoC: close BE %s\n",
 			dpcm->fe->dai_link->name);

From d47b6f739d45661e046e06fb40f843c10f452463 Mon Sep 17 00:00:00 2001
From: Vincent Palatin <vpalatin@chromium.org>
Date: Wed, 18 Apr 2018 12:23:58 +0200
Subject: [PATCH 162/519] mfd: cros_ec: Fail early if we cannot identify the EC

[ Upstream commit 0dbbf25561b29ffab5ba6277429760abdf49ceff ]

If we cannot communicate with the EC chip to detect the protocol version
and its features, it's very likely useless to continue. Else we will
commit all kind of uninformed mistakes (using the wrong protocol, the
wrong buffer size, mixing the EC with other chips).

Signed-off-by: Vincent Palatin <vpalatin@chromium.org>
Acked-by: Benson Leung <bleung@chromium.org>
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Reviewed-by: Gwendal Grignou <gwendal@chromium.org>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mfd/cros_ec.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c
index 0eee63542038..115a6f67ab51 100644
--- a/drivers/mfd/cros_ec.c
+++ b/drivers/mfd/cros_ec.c
@@ -68,7 +68,11 @@ int cros_ec_register(struct cros_ec_device *ec_dev)
 
 	mutex_init(&ec_dev->lock);
 
-	cros_ec_query_all(ec_dev);
+	err = cros_ec_query_all(ec_dev);
+	if (err) {
+		dev_err(dev, "Cannot identify the EC: error %d\n", err);
+		return err;
+	}
 
 	err = mfd_add_devices(ec_dev->dev, PLATFORM_DEVID_AUTO, &ec_cell, 1,
 			      NULL, ec_dev->irq, NULL);

From efd8946b75739deb614b5c889452bed98ba32fa0 Mon Sep 17 00:00:00 2001
From: Ganapathi Bhat <gbhat@marvell.com>
Date: Thu, 24 May 2018 19:18:27 +0530
Subject: [PATCH 163/519] mwifiex: handle race during mwifiex_usb_disconnect

[ Upstream commit b817047ae70c0bd67b677b65d0d69d72cd6e9728 ]

Race condition is observed during rmmod of mwifiex_usb:

1. The rmmod thread will call mwifiex_usb_disconnect(), download
   SHUTDOWN command and do wait_event_interruptible_timeout(),
   waiting for response.

2. The main thread will handle the response and will do a
   wake_up_interruptible(), unblocking rmmod thread.

3. On getting unblocked, rmmod thread  will make rx_cmd.urb = NULL in
   mwifiex_usb_free().

4. The main thread will try to resubmit rx_cmd.urb in
   mwifiex_usb_submit_rx_urb(), which is NULL.

To fix, wait for main thread to complete before calling
mwifiex_usb_free().

Signed-off-by: Ganapathi Bhat <gbhat@marvell.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/mwifiex/usb.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/wireless/mwifiex/usb.c b/drivers/net/wireless/mwifiex/usb.c
index e43aff932360..1a1b1de87583 100644
--- a/drivers/net/wireless/mwifiex/usb.c
+++ b/drivers/net/wireless/mwifiex/usb.c
@@ -624,6 +624,9 @@ static void mwifiex_usb_disconnect(struct usb_interface *intf)
 					 MWIFIEX_FUNC_SHUTDOWN);
 	}
 
+	if (adapter->workqueue)
+		flush_workqueue(adapter->workqueue);
+
 	mwifiex_usb_free(card);
 
 	mwifiex_dbg(adapter, FATAL,

From b5326119895704e7912ffad25bb3360aa75bb780 Mon Sep 17 00:00:00 2001
From: Eyal Reizer <eyalreizer@gmail.com>
Date: Mon, 28 May 2018 11:36:42 +0300
Subject: [PATCH 164/519] wlcore: sdio: check for valid platform device data
 before suspend

[ Upstream commit 6e91d48371e79862ea2c05867aaebe4afe55a865 ]

the wl pointer can be null In case only wlcore_sdio is probed while
no WiLink module is successfully probed, as in the case of mounting a
wl12xx module while using a device tree file configured with wl18xx
related settings.
In this case the system was crashing in wl1271_suspend() as platform
device data is not set.
Make sure wl the pointer is valid before using it.

Signed-off-by: Eyal Reizer <eyalr@ti.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ti/wlcore/sdio.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/wireless/ti/wlcore/sdio.c b/drivers/net/wireless/ti/wlcore/sdio.c
index c172da56b550..e4a8280cea83 100644
--- a/drivers/net/wireless/ti/wlcore/sdio.c
+++ b/drivers/net/wireless/ti/wlcore/sdio.c
@@ -388,6 +388,11 @@ static int wl1271_suspend(struct device *dev)
 	mmc_pm_flag_t sdio_flags;
 	int ret = 0;
 
+	if (!wl) {
+		dev_err(dev, "no wilink module was probed\n");
+		goto out;
+	}
+
 	dev_dbg(dev, "wl1271 suspend. wow_enabled: %d\n",
 		wl->wow_enabled);
 

From 46431d9c28f6859f8e568ac7db92137f1da31100 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Mon, 21 May 2018 08:43:02 -0400
Subject: [PATCH 165/519] media: videobuf2-core: don't call memop 'finish' when
 queueing

[ Upstream commit 90b2da89a083e1395cb322521a42397c49ae4500 ]

When a buffer is queued or requeued in vb2_buffer_done, then don't
call the finish memop. In this case the buffer is only returned to vb2,
not to userspace.

Calling 'finish' here will cause an unbalance when the queue is
canceled, since the core will call the same memop again.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/v4l2-core/videobuf2-core.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c
index bb1e19f7ed5a..0c1a42bf27fd 100644
--- a/drivers/media/v4l2-core/videobuf2-core.c
+++ b/drivers/media/v4l2-core/videobuf2-core.c
@@ -870,9 +870,12 @@ void vb2_buffer_done(struct vb2_buffer *vb, enum vb2_buffer_state state)
 	dprintk(4, "done processing on buffer %d, state: %d\n",
 			vb->index, state);
 
-	/* sync buffers */
-	for (plane = 0; plane < vb->num_planes; ++plane)
-		call_void_memop(vb, finish, vb->planes[plane].mem_priv);
+	if (state != VB2_BUF_STATE_QUEUED &&
+	    state != VB2_BUF_STATE_REQUEUEING) {
+		/* sync buffers */
+		for (plane = 0; plane < vb->num_planes; ++plane)
+			call_void_memop(vb, finish, vb->planes[plane].mem_priv);
+	}
 
 	spin_lock_irqsave(&q->done_lock, flags);
 	if (state == VB2_BUF_STATE_QUEUED ||

From 082c9832168598ee825894f126c66476ee8be8ac Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 24 Apr 2018 14:53:56 +0200
Subject: [PATCH 166/519] btrfs: add barriers to btrfs_sync_log before
 log_commit_wait wakeups

[ Upstream commit 3d3a2e610ea5e7c6d4f9481ecce5d8e2d8317843 ]

Currently the code assumes that there's an implied barrier by the
sequence of code preceding the wakeup, namely the mutex unlock.

As Nikolay pointed out:

I think this is wrong (not your code) but the original assumption that
the RELEASE semantics provided by mutex_unlock is sufficient.
According to memory-barriers.txt:

Section 'LOCK ACQUISITION FUNCTIONS' states:

 (2) RELEASE operation implication:

     Memory operations issued before the RELEASE will be completed before the
     RELEASE operation has completed.

     Memory operations issued after the RELEASE *may* be completed before the
     RELEASE operation has completed.

(I've bolded the may portion)

The example given there:

As an example, consider the following:

    *A = a;
    *B = b;
    ACQUIRE
    *C = c;
    *D = d;
    RELEASE
    *E = e;
    *F = f;

The following sequence of events is acceptable:

    ACQUIRE, {*F,*A}, *E, {*C,*D}, *B, RELEASE

So if we assume that *C is modifying the flag which the waitqueue is checking,
and *E is the actual wakeup, then those accesses can be re-ordered...

IMHO this code should be considered broken...
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/tree-log.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 738f5d6beb95..2c7f9a5f8717 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2961,8 +2961,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 	mutex_unlock(&log_root_tree->log_mutex);
 
 	/*
-	 * The barrier before waitqueue_active is implied by mutex_unlock
+	 * The barrier before waitqueue_active is needed so all the updates
+	 * above are seen by the woken threads. It might not be necessary, but
+	 * proving that seems to be hard.
 	 */
+	smp_mb();
 	if (waitqueue_active(&log_root_tree->log_commit_wait[index2]))
 		wake_up(&log_root_tree->log_commit_wait[index2]);
 out:
@@ -2973,8 +2976,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 	mutex_unlock(&root->log_mutex);
 
 	/*
-	 * The barrier before waitqueue_active is implied by mutex_unlock
+	 * The barrier before waitqueue_active is needed so all the updates
+	 * above are seen by the woken threads. It might not be necessary, but
+	 * proving that seems to be hard.
 	 */
+	smp_mb();
 	if (waitqueue_active(&root->log_commit_wait[index1]))
 		wake_up(&root->log_commit_wait[index1]);
 	return ret;

From 40e082b99a1eb935dab65a869593617407483016 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Mon, 14 May 2018 09:38:13 +0800
Subject: [PATCH 167/519] btrfs: qgroup: Finish rescan when hit the last leaf
 of extent tree

[ Upstream commit ff3d27a048d926b3920ccdb75d98788c567cae0d ]

Under the following case, qgroup rescan can double account cowed tree
blocks:

In this case, extent tree only has one tree block.

-
| transid=5 last committed=4
| btrfs_qgroup_rescan_worker()
| |- btrfs_start_transaction()
| |  transid = 5
| |- qgroup_rescan_leaf()
|    |- btrfs_search_slot_for_read() on extent tree
|       Get the only extent tree block from commit root (transid = 4).
|       Scan it, set qgroup_rescan_progress to the last
|       EXTENT/META_ITEM + 1
|       now qgroup_rescan_progress = A + 1.
|
| fs tree get CoWed, new tree block is at A + 16K
| transid 5 get committed
-
| transid=6 last committed=5
| btrfs_qgroup_rescan_worker()
| btrfs_qgroup_rescan_worker()
| |- btrfs_start_transaction()
| |  transid = 5
| |- qgroup_rescan_leaf()
|    |- btrfs_search_slot_for_read() on extent tree
|       Get the only extent tree block from commit root (transid = 5).
|       scan it using qgroup_rescan_progress (A + 1).
|       found new tree block beyong A, and it's fs tree block,
|       account it to increase qgroup numbers.
-

In above case, tree block A, and tree block A + 16K get accounted twice,
while qgroup rescan should stop when it already reach the last leaf,
other than continue using its qgroup_rescan_progress.

Such case could happen by just looping btrfs/017 and with some
possibility it can hit such double qgroup accounting problem.

Fix it by checking the path to determine if we should finish qgroup
rescan, other than relying on next loop to exit.

Reported-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/qgroup.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 88d9b66e2207..a751937dded5 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2185,6 +2185,21 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
 	BUG();
 }
 
+/*
+ * Check if the leaf is the last leaf. Which means all node pointers
+ * are at their last position.
+ */
+static bool is_last_leaf(struct btrfs_path *path)
+{
+	int i;
+
+	for (i = 1; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) {
+		if (path->slots[i] != btrfs_header_nritems(path->nodes[i]) - 1)
+			return false;
+	}
+	return true;
+}
+
 /*
  * returns < 0 on error, 0 when more leafs are to be scanned.
  * returns 1 when done.
@@ -2198,6 +2213,7 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
 	struct ulist *roots = NULL;
 	struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
 	u64 num_bytes;
+	bool done;
 	int slot;
 	int ret;
 
@@ -2225,6 +2241,7 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
 		mutex_unlock(&fs_info->qgroup_rescan_lock);
 		return ret;
 	}
+	done = is_last_leaf(path);
 
 	btrfs_item_key_to_cpu(path->nodes[0], &found,
 			      btrfs_header_nritems(path->nodes[0]) - 1);
@@ -2271,6 +2288,8 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
 	}
 	btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
 
+	if (done && !ret)
+		ret = 1;
 	return ret;
 }
 

From 549f4ee6884758a683ab1e0245987a8a7a38563a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 18 May 2018 18:56:24 +0200
Subject: [PATCH 168/519] PCI: Prevent sysfs disable of device while driver is
 attached

[ Upstream commit 6f5cdfa802733dcb561bf664cc89d203f2fd958f ]

Manipulating the enable_cnt behind the back of the driver will wreak
complete havoc with the kernel state, so disallow it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Acked-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pci/pci-sysfs.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index ec91cd17bf34..5fb4ed6ea322 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -180,13 +180,16 @@ static ssize_t enable_store(struct device *dev, struct device_attribute *attr,
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (!val) {
-		if (pci_is_enabled(pdev))
-			pci_disable_device(pdev);
-		else
-			result = -EIO;
-	} else
+	device_lock(dev);
+	if (dev->driver)
+		result = -EBUSY;
+	else if (val)
 		result = pci_enable_device(pdev);
+	else if (pci_is_enabled(pdev))
+		pci_disable_device(pdev);
+	else
+		result = -EIO;
+	device_unlock(dev);
 
 	return result < 0 ? result : count;
 }

From da1602d2d95f857cfbc8b4b59c3da2a9988fc38d Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven.eckelmann@openmesh.com>
Date: Wed, 23 May 2018 11:11:30 +0300
Subject: [PATCH 169/519] ath: Add regulatory mapping for FCC3_ETSIC

[ Upstream commit 01fb2994a98dc72c8818c274f7b5983d5dd885c7 ]

The regdomain code is used to select the correct the correct conformance
test limits (CTL) for a country. If the regdomain code isn't available and
it is still programmed in the EEPROM then it will cause an error and stop
the initialization with:

  Invalid EEPROM contents

The current CTL mappings for this regdomain code are:

* 2.4GHz: ETSI
* 5GHz: FCC

Signed-off-by: Sven Eckelmann <sven.eckelmann@openmesh.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ath/regd_common.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h
index bdd2b4d61f2f..7d955fa8c24c 100644
--- a/drivers/net/wireless/ath/regd_common.h
+++ b/drivers/net/wireless/ath/regd_common.h
@@ -35,6 +35,7 @@ enum EnumRd {
 	FRANCE_RES = 0x31,
 	FCC3_FCCA = 0x3A,
 	FCC3_WORLD = 0x3B,
+	FCC3_ETSIC = 0x3F,
 
 	ETSI1_WORLD = 0x37,
 	ETSI3_ETSIA = 0x32,
@@ -168,6 +169,7 @@ static struct reg_dmn_pair_mapping regDomainPairs[] = {
 	{FCC2_ETSIC, CTL_FCC, CTL_ETSI},
 	{FCC3_FCCA, CTL_FCC, CTL_FCC},
 	{FCC3_WORLD, CTL_FCC, CTL_ETSI},
+	{FCC3_ETSIC, CTL_FCC, CTL_ETSI},
 	{FCC4_FCCA, CTL_FCC, CTL_FCC},
 	{FCC5_FCCA, CTL_FCC, CTL_FCC},
 	{FCC6_FCCA, CTL_FCC, CTL_FCC},

From a64815422466d95561c7dd68fec26b693e5893f1 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven.eckelmann@openmesh.com>
Date: Wed, 23 May 2018 11:11:18 +0300
Subject: [PATCH 170/519] ath: Add regulatory mapping for ETSI8_WORLD

[ Upstream commit 45faf6e096da8bb80e1ddf8c08a26a9601d9469e ]

The regdomain code is used to select the correct the correct conformance
test limits (CTL) for a country. If the regdomain code isn't available and
it is still programmed in the EEPROM then it will cause an error and stop
the initialization with:

  Invalid EEPROM contents

The current CTL mappings for this regdomain code are:

* 2.4GHz: ETSI
* 5GHz: ETSI

Signed-off-by: Sven Eckelmann <sven.eckelmann@openmesh.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ath/regd_common.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h
index 7d955fa8c24c..7c0fcbbf1900 100644
--- a/drivers/net/wireless/ath/regd_common.h
+++ b/drivers/net/wireless/ath/regd_common.h
@@ -45,6 +45,7 @@ enum EnumRd {
 	ETSI4_ETSIC = 0x38,
 	ETSI5_WORLD = 0x39,
 	ETSI6_WORLD = 0x34,
+	ETSI8_WORLD = 0x3D,
 	ETSI_RESERVED = 0x33,
 
 	MKK1_MKKA = 0x40,
@@ -181,6 +182,7 @@ static struct reg_dmn_pair_mapping regDomainPairs[] = {
 	{ETSI4_WORLD, CTL_ETSI, CTL_ETSI},
 	{ETSI5_WORLD, CTL_ETSI, CTL_ETSI},
 	{ETSI6_WORLD, CTL_ETSI, CTL_ETSI},
+	{ETSI8_WORLD, CTL_ETSI, CTL_ETSI},
 
 	/* XXX: For ETSI3_ETSIA, Was NO_CTL meant for the 2 GHz band ? */
 	{ETSI3_ETSIA, CTL_ETSI, CTL_ETSI},

From beb2347e3f7a9757cb43a85ae3b63f154b88e060 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven.eckelmann@openmesh.com>
Date: Wed, 23 May 2018 11:11:14 +0300
Subject: [PATCH 171/519] ath: Add regulatory mapping for APL13_WORLD

[ Upstream commit 9ba8df0c52b3e6baa436374b429d3d73bd09a320 ]

The regdomain code is used to select the correct the correct conformance
test limits (CTL) for a country. If the regdomain code isn't available and
it is still programmed in the EEPROM then it will cause an error and stop
the initialization with:

  Invalid EEPROM contents

The current CTL mappings for this regdomain code are:

* 2.4GHz: ETSI
* 5GHz: ETSI

Signed-off-by: Sven Eckelmann <sven.eckelmann@openmesh.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ath/regd_common.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h
index 7c0fcbbf1900..2c873840a46a 100644
--- a/drivers/net/wireless/ath/regd_common.h
+++ b/drivers/net/wireless/ath/regd_common.h
@@ -69,6 +69,7 @@ enum EnumRd {
 	APL1_ETSIC = 0x55,
 	APL2_ETSIC = 0x56,
 	APL5_WORLD = 0x58,
+	APL13_WORLD = 0x5A,
 	APL6_WORLD = 0x5B,
 	APL7_FCCA = 0x5C,
 	APL8_WORLD = 0x5D,
@@ -195,6 +196,7 @@ static struct reg_dmn_pair_mapping regDomainPairs[] = {
 	{APL3_WORLD, CTL_FCC, CTL_ETSI},
 	{APL4_WORLD, CTL_FCC, CTL_ETSI},
 	{APL5_WORLD, CTL_FCC, CTL_ETSI},
+	{APL13_WORLD, CTL_ETSI, CTL_ETSI},
 	{APL6_WORLD, CTL_ETSI, CTL_ETSI},
 	{APL8_WORLD, CTL_ETSI, CTL_ETSI},
 	{APL9_WORLD, CTL_ETSI, CTL_ETSI},

From ca1d2c2d4087dfe59d90b314c465d6038799fc28 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven.eckelmann@openmesh.com>
Date: Wed, 23 May 2018 11:11:05 +0300
Subject: [PATCH 172/519] ath: Add regulatory mapping for APL2_FCCA

[ Upstream commit 4f183687e3fad3ce0e06e38976cad81bc4541990 ]

The regdomain code is used to select the correct the correct conformance
test limits (CTL) for a country. If the regdomain code isn't available and
it is still programmed in the EEPROM then it will cause an error and stop
the initialization with:

  Invalid EEPROM contents

The current CTL mappings for this regdomain code are:

* 2.4GHz: FCC
* 5GHz: FCC

Signed-off-by: Sven Eckelmann <sven.eckelmann@openmesh.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ath/regd_common.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h
index 2c873840a46a..d8a7db4976f0 100644
--- a/drivers/net/wireless/ath/regd_common.h
+++ b/drivers/net/wireless/ath/regd_common.h
@@ -61,6 +61,7 @@ enum EnumRd {
 	MKK1_MKKA1 = 0x4A,
 	MKK1_MKKA2 = 0x4B,
 	MKK1_MKKC = 0x4C,
+	APL2_FCCA = 0x4D,
 
 	APL3_FCCA = 0x50,
 	APL1_WORLD = 0x52,
@@ -193,6 +194,7 @@ static struct reg_dmn_pair_mapping regDomainPairs[] = {
 	{FCC1_FCCA, CTL_FCC, CTL_FCC},
 	{APL1_WORLD, CTL_FCC, CTL_ETSI},
 	{APL2_WORLD, CTL_FCC, CTL_ETSI},
+	{APL2_FCCA, CTL_FCC, CTL_FCC},
 	{APL3_WORLD, CTL_FCC, CTL_ETSI},
 	{APL4_WORLD, CTL_FCC, CTL_ETSI},
 	{APL5_WORLD, CTL_FCC, CTL_ETSI},

From b90059eadf8c4bdcf58ee4ac309eb9f412643a26 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven.eckelmann@openmesh.com>
Date: Wed, 23 May 2018 11:10:54 +0300
Subject: [PATCH 173/519] ath: Add regulatory mapping for Uganda

[ Upstream commit 1ea3986ad2bc72081c69f3fbc1e5e0eeb3c44f17 ]

The country code is used by the ath to detect the ISO 3166-1 alpha-2 name
and to select the correct conformance test limits (CTL) for a country. If
the country isn't available and it is still programmed in the EEPROM then
it will cause an error and stop the initialization with:

  Invalid EEPROM contents

The current CTL mappings for this country are:

* 2.4GHz: ETSI
* 5GHz: FCC

Signed-off-by: Sven Eckelmann <sven.eckelmann@openmesh.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ath/regd.h        | 1 +
 drivers/net/wireless/ath/regd_common.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/wireless/ath/regd.h b/drivers/net/wireless/ath/regd.h
index 37f53bd8fcb1..18a4d0557f18 100644
--- a/drivers/net/wireless/ath/regd.h
+++ b/drivers/net/wireless/ath/regd.h
@@ -175,6 +175,7 @@ enum CountryCode {
 	CTRY_TUNISIA = 788,
 	CTRY_TURKEY = 792,
 	CTRY_UAE = 784,
+	CTRY_UGANDA = 800,
 	CTRY_UKRAINE = 804,
 	CTRY_UNITED_KINGDOM = 826,
 	CTRY_UNITED_STATES = 840,
diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h
index d8a7db4976f0..cba1020bc854 100644
--- a/drivers/net/wireless/ath/regd_common.h
+++ b/drivers/net/wireless/ath/regd_common.h
@@ -467,6 +467,7 @@ static struct country_code_to_enum_rd allCountries[] = {
 	{CTRY_TRINIDAD_Y_TOBAGO, FCC3_WORLD, "TT"},
 	{CTRY_TUNISIA, ETSI3_WORLD, "TN"},
 	{CTRY_TURKEY, ETSI3_WORLD, "TR"},
+	{CTRY_UGANDA, FCC3_WORLD, "UG"},
 	{CTRY_UKRAINE, NULL1_WORLD, "UA"},
 	{CTRY_UAE, NULL1_WORLD, "AE"},
 	{CTRY_UNITED_KINGDOM, ETSI1_WORLD, "GB"},

From 0ac4e043466f5f3916e0bd3dacbf440ebacc9f61 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven.eckelmann@openmesh.com>
Date: Wed, 23 May 2018 11:10:48 +0300
Subject: [PATCH 174/519] ath: Add regulatory mapping for Tanzania

[ Upstream commit 667ddac5745fb9fddfe8f7fd2523070f50bd4442 ]

The country code is used by the ath to detect the ISO 3166-1 alpha-2 name
and to select the correct conformance test limits (CTL) for a country. If
the country isn't available and it is still programmed in the EEPROM then
it will cause an error and stop the initialization with:

  Invalid EEPROM contents

The current CTL mappings for this country are:

* 2.4GHz: ETSI
* 5GHz: FCC

Signed-off-by: Sven Eckelmann <sven.eckelmann@openmesh.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ath/regd.h        | 1 +
 drivers/net/wireless/ath/regd_common.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/wireless/ath/regd.h b/drivers/net/wireless/ath/regd.h
index 18a4d0557f18..7439e62f5262 100644
--- a/drivers/net/wireless/ath/regd.h
+++ b/drivers/net/wireless/ath/regd.h
@@ -170,6 +170,7 @@ enum CountryCode {
 	CTRY_SWITZERLAND = 756,
 	CTRY_SYRIA = 760,
 	CTRY_TAIWAN = 158,
+	CTRY_TANZANIA = 834,
 	CTRY_THAILAND = 764,
 	CTRY_TRINIDAD_Y_TOBAGO = 780,
 	CTRY_TUNISIA = 788,
diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h
index cba1020bc854..b85dc86cc188 100644
--- a/drivers/net/wireless/ath/regd_common.h
+++ b/drivers/net/wireless/ath/regd_common.h
@@ -463,6 +463,7 @@ static struct country_code_to_enum_rd allCountries[] = {
 	{CTRY_SWITZERLAND, ETSI1_WORLD, "CH"},
 	{CTRY_SYRIA, NULL1_WORLD, "SY"},
 	{CTRY_TAIWAN, APL3_FCCA, "TW"},
+	{CTRY_TANZANIA, APL1_WORLD, "TZ"},
 	{CTRY_THAILAND, FCC3_WORLD, "TH"},
 	{CTRY_TRINIDAD_Y_TOBAGO, FCC3_WORLD, "TT"},
 	{CTRY_TUNISIA, ETSI3_WORLD, "TN"},

From 49ad264237d82059109e982e3d35966edd51fef9 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven.eckelmann@openmesh.com>
Date: Wed, 23 May 2018 11:10:43 +0300
Subject: [PATCH 175/519] ath: Add regulatory mapping for Serbia

[ Upstream commit 2a3169a54bb53717928392a04fb84deb765b51f1 ]

The country code is used by the ath to detect the ISO 3166-1 alpha-2 name
and to select the correct conformance test limits (CTL) for a country. If
the country isn't available and it is still programmed in the EEPROM then
it will cause an error and stop the initialization with:

  Invalid EEPROM contents

The current CTL mappings for this country are:

* 2.4GHz: ETSI
* 5GHz: ETSI

Signed-off-by: Sven Eckelmann <sven.eckelmann@openmesh.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ath/regd.h        | 1 +
 drivers/net/wireless/ath/regd_common.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/wireless/ath/regd.h b/drivers/net/wireless/ath/regd.h
index 7439e62f5262..a0ed5f8b554e 100644
--- a/drivers/net/wireless/ath/regd.h
+++ b/drivers/net/wireless/ath/regd.h
@@ -159,6 +159,7 @@ enum CountryCode {
 	CTRY_ROMANIA = 642,
 	CTRY_RUSSIA = 643,
 	CTRY_SAUDI_ARABIA = 682,
+	CTRY_SERBIA = 688,
 	CTRY_SERBIA_MONTENEGRO = 891,
 	CTRY_SINGAPORE = 702,
 	CTRY_SLOVAKIA = 703,
diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h
index b85dc86cc188..1ced5a323cf8 100644
--- a/drivers/net/wireless/ath/regd_common.h
+++ b/drivers/net/wireless/ath/regd_common.h
@@ -452,6 +452,7 @@ static struct country_code_to_enum_rd allCountries[] = {
 	{CTRY_ROMANIA, NULL1_WORLD, "RO"},
 	{CTRY_RUSSIA, NULL1_WORLD, "RU"},
 	{CTRY_SAUDI_ARABIA, NULL1_WORLD, "SA"},
+	{CTRY_SERBIA, ETSI1_WORLD, "RS"},
 	{CTRY_SERBIA_MONTENEGRO, ETSI1_WORLD, "CS"},
 	{CTRY_SINGAPORE, APL6_WORLD, "SG"},
 	{CTRY_SLOVAKIA, ETSI1_WORLD, "SK"},

From d35dcd927fa5643183c6a5ca9c086688857b6763 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven.eckelmann@openmesh.com>
Date: Wed, 23 May 2018 11:09:59 +0300
Subject: [PATCH 176/519] ath: Add regulatory mapping for Bermuda

[ Upstream commit 9c790f2d234f65697e3b0948adbfdf36dbe63dd7 ]

The country code is used by the ath to detect the ISO 3166-1 alpha-2 name
and to select the correct conformance test limits (CTL) for a country. If
the country isn't available and it is still programmed in the EEPROM then
it will cause an error and stop the initialization with:

  Invalid EEPROM contents

The current CTL mappings for this country are:

* 2.4GHz: FCC
* 5GHz: FCC

Signed-off-by: Sven Eckelmann <sven.eckelmann@openmesh.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ath/regd.h        | 1 +
 drivers/net/wireless/ath/regd_common.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/wireless/ath/regd.h b/drivers/net/wireless/ath/regd.h
index a0ed5f8b554e..a9334d514154 100644
--- a/drivers/net/wireless/ath/regd.h
+++ b/drivers/net/wireless/ath/regd.h
@@ -74,6 +74,7 @@ enum CountryCode {
 	CTRY_BELARUS = 112,
 	CTRY_BELGIUM = 56,
 	CTRY_BELIZE = 84,
+	CTRY_BERMUDA = 60,
 	CTRY_BOLIVIA = 68,
 	CTRY_BOSNIA_HERZ = 70,
 	CTRY_BRAZIL = 76,
diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h
index 1ced5a323cf8..e13b96e45d53 100644
--- a/drivers/net/wireless/ath/regd_common.h
+++ b/drivers/net/wireless/ath/regd_common.h
@@ -313,6 +313,7 @@ static struct country_code_to_enum_rd allCountries[] = {
 	{CTRY_BELGIUM, ETSI1_WORLD, "BE"},
 	{CTRY_BELGIUM2, ETSI4_WORLD, "BL"},
 	{CTRY_BELIZE, APL1_ETSIC, "BZ"},
+	{CTRY_BERMUDA, FCC3_FCCA, "BM"},
 	{CTRY_BOLIVIA, APL1_ETSIC, "BO"},
 	{CTRY_BOSNIA_HERZ, ETSI1_WORLD, "BA"},
 	{CTRY_BRAZIL, FCC3_WORLD, "BR"},

From 8b299e29871ebafa9b5d1b8b30394f7c853445d4 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven.eckelmann@openmesh.com>
Date: Wed, 23 May 2018 11:09:53 +0300
Subject: [PATCH 177/519] ath: Add regulatory mapping for Bahamas

[ Upstream commit 699e2302c286a14afe7b7394151ce6c4e1790cc1 ]

The country code is used by the ath to detect the ISO 3166-1 alpha-2 name
and to select the correct conformance test limits (CTL) for a country. If
the country isn't available and it is still programmed in the EEPROM then
it will cause an error and stop the initialization with:

  Invalid EEPROM contents

The current CTL mappings for this country are:

* 2.4GHz: ETSI
* 5GHz: FCC

Signed-off-by: Sven Eckelmann <sven.eckelmann@openmesh.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ath/regd.h        | 1 +
 drivers/net/wireless/ath/regd_common.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/wireless/ath/regd.h b/drivers/net/wireless/ath/regd.h
index a9334d514154..184b6810cde9 100644
--- a/drivers/net/wireless/ath/regd.h
+++ b/drivers/net/wireless/ath/regd.h
@@ -68,6 +68,7 @@ enum CountryCode {
 	CTRY_AUSTRALIA = 36,
 	CTRY_AUSTRIA = 40,
 	CTRY_AZERBAIJAN = 31,
+	CTRY_BAHAMAS = 44,
 	CTRY_BAHRAIN = 48,
 	CTRY_BANGLADESH = 50,
 	CTRY_BARBADOS = 52,
diff --git a/drivers/net/wireless/ath/regd_common.h b/drivers/net/wireless/ath/regd_common.h
index e13b96e45d53..15bbd1e0d912 100644
--- a/drivers/net/wireless/ath/regd_common.h
+++ b/drivers/net/wireless/ath/regd_common.h
@@ -306,6 +306,7 @@ static struct country_code_to_enum_rd allCountries[] = {
 	{CTRY_AUSTRALIA2, FCC6_WORLD, "AU"},
 	{CTRY_AUSTRIA, ETSI1_WORLD, "AT"},
 	{CTRY_AZERBAIJAN, ETSI4_WORLD, "AZ"},
+	{CTRY_BAHAMAS, FCC3_WORLD, "BS"},
 	{CTRY_BAHRAIN, APL6_WORLD, "BH"},
 	{CTRY_BANGLADESH, NULL1_WORLD, "BD"},
 	{CTRY_BARBADOS, FCC2_WORLD, "BB"},

From 753c9c38e28728cdfd8a3e4031f04823c660e960 Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Thu, 22 Mar 2018 21:20:03 +0100
Subject: [PATCH 178/519] powerpc/32: Add a missing include header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit c89ca593220931c150cffda24b4d4ccf82f13fc8 ]

The header file <linux/syscalls.h> was missing from the includes. Fix the
following warning, treated as error with W=1:

  arch/powerpc/kernel/pci_32.c:286:6: error: no previous prototype for ‘sys_pciconfig_iobase’ [-Werror=missing-prototypes]

Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/kernel/pci_32.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index 1f7930037cb7..d9e41b77dd13 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -11,6 +11,7 @@
 #include <linux/sched.h>
 #include <linux/errno.h>
 #include <linux/bootmem.h>
+#include <linux/syscalls.h>
 #include <linux/irq.h>
 #include <linux/list.h>
 #include <linux/of.h>

From c2d11a8dc4ddae672ed5d6b23179ef265ac24692 Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Thu, 22 Mar 2018 21:19:56 +0100
Subject: [PATCH 179/519] powerpc/chrp/time: Make some functions static, add
 missing header include
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit b87a358b4a1421abd544c0b554b1b7159b2b36c0 ]

Add a missing include <platforms/chrp/chrp.h>.

These functions can all be static, make it so. Fix warnings treated as
errors with W=1:

  arch/powerpc/platforms/chrp/time.c:41:13: error: no previous prototype for ‘chrp_time_init’ [-Werror=missing-prototypes]
  arch/powerpc/platforms/chrp/time.c:66:5: error: no previous prototype for ‘chrp_cmos_clock_read’ [-Werror=missing-prototypes]
  arch/powerpc/platforms/chrp/time.c:74:6: error: no previous prototype for ‘chrp_cmos_clock_write’ [-Werror=missing-prototypes]
  arch/powerpc/platforms/chrp/time.c:86:5: error: no previous prototype for ‘chrp_set_rtc_time’ [-Werror=missing-prototypes]
  arch/powerpc/platforms/chrp/time.c:130:6: error: no previous prototype for ‘chrp_get_rtc_time’ [-Werror=missing-prototypes]

Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/platforms/chrp/time.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/chrp/time.c b/arch/powerpc/platforms/chrp/time.c
index f803f4b8ab6f..8608e358217f 100644
--- a/arch/powerpc/platforms/chrp/time.c
+++ b/arch/powerpc/platforms/chrp/time.c
@@ -27,6 +27,8 @@
 #include <asm/sections.h>
 #include <asm/time.h>
 
+#include <platforms/chrp/chrp.h>
+
 extern spinlock_t rtc_lock;
 
 #define NVRAM_AS0  0x74
@@ -62,7 +64,7 @@ long __init chrp_time_init(void)
 	return 0;
 }
 
-int chrp_cmos_clock_read(int addr)
+static int chrp_cmos_clock_read(int addr)
 {
 	if (nvram_as1 != 0)
 		outb(addr>>8, nvram_as1);
@@ -70,7 +72,7 @@ int chrp_cmos_clock_read(int addr)
 	return (inb(nvram_data));
 }
 
-void chrp_cmos_clock_write(unsigned long val, int addr)
+static void chrp_cmos_clock_write(unsigned long val, int addr)
 {
 	if (nvram_as1 != 0)
 		outb(addr>>8, nvram_as1);

From dd76f0988378d6e09879435b0b27d3426e767419 Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Wed, 4 Apr 2018 22:13:05 +0200
Subject: [PATCH 180/519] powerpc/powermac: Add missing prototype for
 note_bootable_part()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit f72cf3f1d49f2c35d6cb682af2e8c93550f264e4 ]

Add a missing prototype for function `note_bootable_part` to silence a
warning treated as error with W=1:

  arch/powerpc/platforms/powermac/setup.c:361:12: error: no previous prototype for ‘note_bootable_part’ [-Werror=missing-prototypes]

Suggested-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/platforms/powermac/setup.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 8dd78f4e1af4..32fc56cf6261 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -359,6 +359,7 @@ static int pmac_late_init(void)
 }
 machine_late_initcall(powermac, pmac_late_init);
 
+void note_bootable_part(dev_t dev, int part, int goodness);
 /*
  * This is __init_refok because we check for "initializing" before
  * touching any of the __init sensitive things and "initializing"

From d3456beb1aab696ed7cae61183d0fd148d9d4f91 Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Wed, 4 Apr 2018 22:07:46 +0200
Subject: [PATCH 181/519] powerpc/powermac: Mark variable x as unused
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 5a4b475cf8511da721f20ba432c244061db7139f ]

Since the value of x is never intended to be read, declare it with gcc
attribute as unused. Fix warning treated as error with W=1:

  arch/powerpc/platforms/powermac/bootx_init.c:471:21: error: variable ‘x’ set but not used [-Werror=unused-but-set-variable]

Suggested-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/platforms/powermac/bootx_init.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c
index 76f5013c35e5..89237b84b096 100644
--- a/arch/powerpc/platforms/powermac/bootx_init.c
+++ b/arch/powerpc/platforms/powermac/bootx_init.c
@@ -467,7 +467,7 @@ void __init bootx_init(unsigned long r3, unsigned long r4)
 	boot_infos_t *bi = (boot_infos_t *) r4;
 	unsigned long hdr;
 	unsigned long space;
-	unsigned long ptr, x;
+	unsigned long ptr;
 	char *model;
 	unsigned long offset = reloc_offset();
 
@@ -561,6 +561,8 @@ void __init bootx_init(unsigned long r3, unsigned long r4)
 	 * MMU switched OFF, so this should not be useful anymore.
 	 */
 	if (bi->version < 4) {
+		unsigned long x __maybe_unused;
+
 		bootx_printf("Touching pages...\n");
 
 		/*

From a1fba0a39100549444ff4cc031ed927d3c0d462e Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Thu, 24 May 2018 11:02:06 +0000
Subject: [PATCH 182/519] powerpc/8xx: fix invalid register expression in
 head_8xx.S

[ Upstream commit e4ccb1dae6bdef228d729c076c38161ef6e7ca34 ]

New binutils generate the following warning

  AS      arch/powerpc/kernel/head_8xx.o
arch/powerpc/kernel/head_8xx.S: Assembler messages:
arch/powerpc/kernel/head_8xx.S:916: Warning: invalid register expression

This patch fixes it.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/kernel/head_8xx.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 78c1eba4c04a..01e274e6907b 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -720,7 +720,7 @@ start_here:
 	tovirt(r6,r6)
 	lis	r5, abatron_pteptrs@h
 	ori	r5, r5, abatron_pteptrs@l
-	stw	r5, 0xf0(r0)	/* Must match your Abatron config file */
+	stw	r5, 0xf0(0)	/* Must match your Abatron config file */
 	tophys(r5,r5)
 	stw	r6, 0(r5)
 

From b6cfbfc127ca8b897b7434d52e3c67f2424695bd Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Wed, 23 May 2018 21:07:12 +0200
Subject: [PATCH 183/519] pinctrl: at91-pio4: add missing of_node_put

[ Upstream commit 21816364715f508c10da1e087e352bc1e326614f ]

The device node iterators perform an of_node_get on each iteration, so a
jump out of the loop requires an of_node_put.

The semantic patch that fixes this problem is as follows
(http://coccinelle.lip6.fr):

// <smpl>
@@
expression root,e;
local idexpression child;
iterator name for_each_child_of_node;
@@

 for_each_child_of_node(root, child) {
   ... when != of_node_put(child)
       when != e = child
+  of_node_put(child);
?  break;
   ...
}
... when != child
// </smpl>

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Acked-by: Ludovic Desroches <ludovic.desroches@microchip.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pinctrl/pinctrl-at91-pio4.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/pinctrl/pinctrl-at91-pio4.c b/drivers/pinctrl/pinctrl-at91-pio4.c
index 271cca63e9bd..9aa82a4e9e25 100644
--- a/drivers/pinctrl/pinctrl-at91-pio4.c
+++ b/drivers/pinctrl/pinctrl-at91-pio4.c
@@ -568,8 +568,10 @@ static int atmel_pctl_dt_node_to_map(struct pinctrl_dev *pctldev,
 		for_each_child_of_node(np_config, np) {
 			ret = atmel_pctl_dt_subnode_to_map(pctldev, np, map,
 						    &reserved_maps, num_maps);
-			if (ret < 0)
+			if (ret < 0) {
+				of_node_put(np);
 				break;
+			}
 		}
 	}
 

From c51df1153c835b0b6b1b436e0394f0e76758bc59 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Wed, 23 May 2018 17:19:22 -0500
Subject: [PATCH 184/519] PCI: pciehp: Request control of native hotplug only
 if supported

[ Upstream commit 408fec36a1ab3d14273c2116b449ef1e9be3cb8b ]

Currently we request control of native PCIe hotplug unconditionally.
Native PCIe hotplug events are handled by the pciehp driver, and if it is
not enabled those events will be lost.

Request control of native PCIe hotplug only if the pciehp driver is
enabled, so we will actually handle native PCIe hotplug events.

Suggested-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/pci_root.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index ae3fe4e64203..3b0b4bd67b71 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -472,9 +472,11 @@ static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm)
 	}
 
 	control = OSC_PCI_EXPRESS_CAPABILITY_CONTROL
-		| OSC_PCI_EXPRESS_NATIVE_HP_CONTROL
 		| OSC_PCI_EXPRESS_PME_CONTROL;
 
+	if (IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE))
+		control |= OSC_PCI_EXPRESS_NATIVE_HP_CONTROL;
+
 	if (pci_aer_available()) {
 		if (aer_acpi_firmware_first())
 			dev_info(&device->dev,

From 5c174f95e7ded3f492d05425a205420dad0f7926 Mon Sep 17 00:00:00 2001
From: Xinming Hu <huxm@marvell.com>
Date: Fri, 18 May 2018 15:38:54 +0800
Subject: [PATCH 185/519] mwifiex: correct histogram data with appropriate
 index

[ Upstream commit 30bfce0b63fa68c14ae1613eb9d259fa18644074 ]

Correct snr/nr/rssi data index to avoid possible buffer underflow.

Signed-off-by: Xinming Hu <huxm@marvell.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/mwifiex/util.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mwifiex/util.c b/drivers/net/wireless/mwifiex/util.c
index 0cec8a64473e..eb5ffa5b1c6c 100644
--- a/drivers/net/wireless/mwifiex/util.c
+++ b/drivers/net/wireless/mwifiex/util.c
@@ -702,12 +702,14 @@ void mwifiex_hist_data_set(struct mwifiex_private *priv, u8 rx_rate, s8 snr,
 			   s8 nflr)
 {
 	struct mwifiex_histogram_data *phist_data = priv->hist_data;
+	s8 nf   = -nflr;
+	s8 rssi = snr - nflr;
 
 	atomic_inc(&phist_data->num_samples);
 	atomic_inc(&phist_data->rx_rate[rx_rate]);
-	atomic_inc(&phist_data->snr[snr]);
-	atomic_inc(&phist_data->noise_flr[128 + nflr]);
-	atomic_inc(&phist_data->sig_str[nflr - snr]);
+	atomic_inc(&phist_data->snr[snr + 128]);
+	atomic_inc(&phist_data->noise_flr[nf + 128]);
+	atomic_inc(&phist_data->sig_str[rssi + 128]);
 }
 
 /* function to reset histogram data during init/reset */

From 3bdb17c1bf1f9414065f997873d01cfee17dceee Mon Sep 17 00:00:00 2001
From: Maya Erez <merez@codeaurora.org>
Date: Thu, 3 May 2018 16:37:16 +0530
Subject: [PATCH 186/519] scsi: ufs: fix exception event handling

[ Upstream commit 2e3611e9546c2ed4def152a51dfd34e8dddae7a5 ]

The device can set the exception event bit in one of the response UPIU,
for example to notify the need for urgent BKOPs operation.  In such a
case, the host driver calls ufshcd_exception_event_handler to handle
this notification.  When trying to check the exception event status (for
finding the cause for the exception event), the device may be busy with
additional SCSI commands handling and may not respond within the 100ms
timeout.

To prevent that, we need to block SCSI commands during handling of
exception events and allow retransmissions of the query requests, in
case of timeout.

Signed-off-by: Subhash Jadavani <subhashj@codeaurora.org>
Signed-off-by: Maya Erez <merez@codeaurora.org>
Signed-off-by: Can Guo <cang@codeaurora.org>
Signed-off-by: Asutosh Das <asutoshd@codeaurora.org>
Reviewed-by: Subhash Jadavani <subhashj@codeaurora.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/ufs/ufshcd.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 18f26cf1e24d..8c58adadb728 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -3447,6 +3447,7 @@ static void ufshcd_exception_event_handler(struct work_struct *work)
 	hba = container_of(work, struct ufs_hba, eeh_work);
 
 	pm_runtime_get_sync(hba->dev);
+	scsi_block_requests(hba->host);
 	err = ufshcd_get_ee_status(hba, &status);
 	if (err) {
 		dev_err(hba->dev, "%s: failed to get exception status %d\n",
@@ -3462,6 +3463,7 @@ static void ufshcd_exception_event_handler(struct work_struct *work)
 					__func__, err);
 	}
 out:
+	scsi_unblock_requests(hba->host);
 	pm_runtime_put_sync(hba->dev);
 	return;
 }

From c1da6e315e1b00867c0a0c900b3549c5f940aea9 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 17 May 2018 20:02:23 +0200
Subject: [PATCH 187/519] ALSA: emu10k1: Rate-limit error messages about page
 errors

[ Upstream commit 11d42c81036324697d367600bfc16f6dd37636fd ]

The error messages at sanity checks of memory pages tend to repeat too
many times once when it hits, and without the rate limit, it may flood
and become unreadable.  Replace such messages with the *_ratelimited()
variant.

Bugzilla: http://bugzilla.opensuse.org/show_bug.cgi?id=1093027
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/emu10k1/memory.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sound/pci/emu10k1/memory.c b/sound/pci/emu10k1/memory.c
index 4f1f69be1865..8c778fa33031 100644
--- a/sound/pci/emu10k1/memory.c
+++ b/sound/pci/emu10k1/memory.c
@@ -237,13 +237,13 @@ search_empty(struct snd_emu10k1 *emu, int size)
 static int is_valid_page(struct snd_emu10k1 *emu, dma_addr_t addr)
 {
 	if (addr & ~emu->dma_mask) {
-		dev_err(emu->card->dev,
+		dev_err_ratelimited(emu->card->dev,
 			"max memory size is 0x%lx (addr = 0x%lx)!!\n",
 			emu->dma_mask, (unsigned long)addr);
 		return 0;
 	}
 	if (addr & (EMUPAGESIZE-1)) {
-		dev_err(emu->card->dev, "page is not aligned\n");
+		dev_err_ratelimited(emu->card->dev, "page is not aligned\n");
 		return 0;
 	}
 	return 1;
@@ -334,7 +334,7 @@ snd_emu10k1_alloc_pages(struct snd_emu10k1 *emu, struct snd_pcm_substream *subst
 		else
 			addr = snd_pcm_sgbuf_get_addr(substream, ofs);
 		if (! is_valid_page(emu, addr)) {
-			dev_err(emu->card->dev,
+			dev_err_ratelimited(emu->card->dev,
 				"emu: failure page = %d\n", idx);
 			mutex_unlock(&hdr->block_mutex);
 			return NULL;

From 9f543747d4b460b418fa29cb87d26258af96e6ae Mon Sep 17 00:00:00 2001
From: Anson Huang <Anson.Huang@nxp.com>
Date: Thu, 17 May 2018 15:27:22 +0800
Subject: [PATCH 188/519] regulator: pfuze100: add .is_enable() for
 pfuze100_swb_regulator_ops

[ Upstream commit 0b01fd3d40fe6402e5fa3b491ef23109feb1aaa5 ]

If is_enabled() is not defined, regulator core will assume
this regulator is already enabled, then it can NOT be really
enabled after disabled.

Based on Li Jun's patch from the NXP kernel tree.

Signed-off-by: Anson Huang <Anson.Huang@nxp.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/regulator/pfuze100-regulator.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/regulator/pfuze100-regulator.c b/drivers/regulator/pfuze100-regulator.c
index 2a44e5dd9c2a..c68556bf6f39 100644
--- a/drivers/regulator/pfuze100-regulator.c
+++ b/drivers/regulator/pfuze100-regulator.c
@@ -152,6 +152,7 @@ static struct regulator_ops pfuze100_sw_regulator_ops = {
 static struct regulator_ops pfuze100_swb_regulator_ops = {
 	.enable = regulator_enable_regmap,
 	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
 	.list_voltage = regulator_list_voltage_table,
 	.map_voltage = regulator_map_voltage_ascend,
 	.set_voltage_sel = regulator_set_voltage_sel_regmap,

From c356cd64f01667cadf12d4fa0008164207bf533e Mon Sep 17 00:00:00 2001
From: Yufen Yu <yuyufen@huawei.com>
Date: Fri, 4 May 2018 18:08:10 +0800
Subject: [PATCH 189/519] md: fix NULL dereference of mddev->pers in
 remove_and_add_spares()

[ Upstream commit c42a0e2675721e1444f56e6132a07b7b1ec169ac ]

We met NULL pointer BUG as follow:

[  151.760358] BUG: unable to handle kernel NULL pointer dereference at 0000000000000060
[  151.761340] PGD 80000001011eb067 P4D 80000001011eb067 PUD 1011ea067 PMD 0
[  151.762039] Oops: 0000 [#1] SMP PTI
[  151.762406] Modules linked in:
[  151.762723] CPU: 2 PID: 3561 Comm: mdadm-test Kdump: loaded Not tainted 4.17.0-rc1+ #238
[  151.763542] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1.fc26 04/01/2014
[  151.764432] RIP: 0010:remove_and_add_spares.part.56+0x13c/0x3a0
[  151.765061] RSP: 0018:ffffc90001d7fcd8 EFLAGS: 00010246
[  151.765590] RAX: 0000000000000000 RBX: ffff88013601d600 RCX: 0000000000000000
[  151.766306] RDX: 0000000000000000 RSI: ffff88013601d600 RDI: ffff880136187000
[  151.767014] RBP: ffff880136187018 R08: 0000000000000003 R09: 0000000000000051
[  151.767728] R10: ffffc90001d7fed8 R11: 0000000000000000 R12: ffff88013601d600
[  151.768447] R13: ffff8801298b1300 R14: ffff880136187000 R15: 0000000000000000
[  151.769160] FS:  00007f2624276700(0000) GS:ffff88013ae80000(0000) knlGS:0000000000000000
[  151.769971] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  151.770554] CR2: 0000000000000060 CR3: 0000000111aac000 CR4: 00000000000006e0
[  151.771272] Call Trace:
[  151.771542]  md_ioctl+0x1df2/0x1e10
[  151.771906]  ? __switch_to+0x129/0x440
[  151.772295]  ? __schedule+0x244/0x850
[  151.772672]  blkdev_ioctl+0x4bd/0x970
[  151.773048]  block_ioctl+0x39/0x40
[  151.773402]  do_vfs_ioctl+0xa4/0x610
[  151.773770]  ? dput.part.23+0x87/0x100
[  151.774151]  ksys_ioctl+0x70/0x80
[  151.774493]  __x64_sys_ioctl+0x16/0x20
[  151.774877]  do_syscall_64+0x5b/0x180
[  151.775258]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

For raid6, when two disk of the array are offline, two spare disks can
be added into the array. Before spare disks recovery completing,
system reboot and mdadm thinks it is ok to restart the degraded
array by md_ioctl(). Since disks in raid6 is not only_parity(),
raid5_run() will abort, when there is no PPL feature or not setting
'start_dirty_degraded' parameter. Therefore, mddev->pers is NULL.

But, mddev->raid_disks has been set and it will not be cleared when
raid5_run abort. md_ioctl() can execute cmd 'HOT_REMOVE_DISK' to
remove a disk by mdadm, which will cause NULL pointer dereference
in remove_and_add_spares() finally.

Signed-off-by: Yufen Yu <yuyufen@huawei.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/md.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 0663463df2f7..07f307402351 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -6145,6 +6145,9 @@ static int hot_remove_disk(struct mddev *mddev, dev_t dev)
 	struct md_rdev *rdev;
 	int ret = -1;
 
+	if (!mddev->pers)
+		return -ENODEV;
+
 	rdev = find_rdev(mddev, dev);
 	if (!rdev)
 		return -ENXIO;

From 199b0431d563fd22ded0455e8bf4cd28c839fe7d Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 25 Apr 2018 11:04:21 -0400
Subject: [PATCH 190/519] media: smiapp: fix timeout checking in
 smiapp_read_nvm

[ Upstream commit 7a2148dfda8001c983f0effd9afd8a7fa58e99c4 ]

The current code decrements the timeout counter i and the end of
each loop i is incremented, so the check for timeout will always
be false and hence the timeout mechanism is just a dead code path.
Potentially, if the RD_READY bit is not set, we could end up in
an infinite loop.

Fix this so the timeout starts from 1000 and decrements to zero,
if at the end of the loop i is zero we have a timeout condition.

Detected by CoverityScan, CID#1324008 ("Logically dead code")

Fixes: ccfc97bdb5ae ("[media] smiapp: Add driver")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/i2c/smiapp/smiapp-core.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/media/i2c/smiapp/smiapp-core.c b/drivers/media/i2c/smiapp/smiapp-core.c
index fb39dfd55e75..46a052c5be2e 100644
--- a/drivers/media/i2c/smiapp/smiapp-core.c
+++ b/drivers/media/i2c/smiapp/smiapp-core.c
@@ -981,7 +981,7 @@ static int smiapp_read_nvm(struct smiapp_sensor *sensor,
 		if (rval)
 			goto out;
 
-		for (i = 0; i < 1000; i++) {
+		for (i = 1000; i > 0; i--) {
 			rval = smiapp_read(
 				sensor,
 				SMIAPP_REG_U8_DATA_TRANSFER_IF_1_STATUS, &s);
@@ -992,11 +992,10 @@ static int smiapp_read_nvm(struct smiapp_sensor *sensor,
 			if (s & SMIAPP_DATA_TRANSFER_IF_1_STATUS_RD_READY)
 				break;
 
-			if (--i == 0) {
-				rval = -ETIMEDOUT;
-				goto out;
-			}
-
+		}
+		if (!i) {
+			rval = -ETIMEDOUT;
+			goto out;
 		}
 
 		for (i = 0; i < SMIAPP_NVM_PAGE_SIZE; i++) {

From c8306ac3b5fd3b60ccc06f2c52645ad9f58b5260 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 16 May 2018 20:07:18 +0200
Subject: [PATCH 191/519] ALSA: usb-audio: Apply rate limit to warning messages
 in URB complete callback

[ Upstream commit 377a879d9832f4ba69bd6a1fc996bb4181b1e504 ]

retire_capture_urb() may print warning messages when the given URB
doesn't align, and this may flood the system log easily.
Put the rate limit to the message for avoiding it.

Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1093485
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/usb/pcm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
index 8e8db4ddf365..a9079654107c 100644
--- a/sound/usb/pcm.c
+++ b/sound/usb/pcm.c
@@ -1300,7 +1300,7 @@ static void retire_capture_urb(struct snd_usb_substream *subs,
 		if (bytes % (runtime->sample_bits >> 3) != 0) {
 			int oldbytes = bytes;
 			bytes = frames * stride;
-			dev_warn(&subs->dev->dev,
+			dev_warn_ratelimited(&subs->dev->dev,
 				 "Corrected urb data len. %d->%d\n",
 							oldbytes, bytes);
 		}

From f12b01b896a506a5ad44120b2315f3018c2ea1e0 Mon Sep 17 00:00:00 2001
From: Terry Junge <terry.junge@plantronics.com>
Date: Mon, 30 Apr 2018 13:32:46 -0700
Subject: [PATCH 192/519] HID: hid-plantronics: Re-resend Update to map button
 for PTT products

[ Upstream commit 37e376df5f4993677c33968a0c19b0c5acbf1108 ]

Add a mapping for Push-To-Talk joystick trigger button.

Tested on ChromeBox/ChromeBook with various Plantronics devices.

Signed-off-by: Terry Junge <terry.junge@plantronics.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hid/hid-plantronics.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/hid/hid-plantronics.c b/drivers/hid/hid-plantronics.c
index febb21ee190e..584b10d3fc3d 100644
--- a/drivers/hid/hid-plantronics.c
+++ b/drivers/hid/hid-plantronics.c
@@ -2,7 +2,7 @@
  *  Plantronics USB HID Driver
  *
  *  Copyright (c) 2014 JD Cole <jd.cole@plantronics.com>
- *  Copyright (c) 2015 Terry Junge <terry.junge@plantronics.com>
+ *  Copyright (c) 2015-2018 Terry Junge <terry.junge@plantronics.com>
  */
 
 /*
@@ -48,6 +48,10 @@ static int plantronics_input_mapping(struct hid_device *hdev,
 	unsigned short mapped_key;
 	unsigned long plt_type = (unsigned long)hid_get_drvdata(hdev);
 
+	/* special case for PTT products */
+	if (field->application == HID_GD_JOYSTICK)
+		goto defaulted;
+
 	/* handle volume up/down mapping */
 	/* non-standard types or multi-HID interfaces - plt_type is PID */
 	if (!(plt_type & HID_USAGE_PAGE)) {

From 1f5e33948005cd1b720fd58717bb971109432875 Mon Sep 17 00:00:00 2001
From: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Date: Tue, 24 Apr 2018 15:15:13 +0200
Subject: [PATCH 193/519] drm/radeon: fix mode_valid's return type

[ Upstream commit 7a47f20eb1fb8fa8d7a8fe3a4fd8c721f04c2174 ]

The method struct drm_connector_helper_funcs::mode_valid is defined
as returning an 'enum drm_mode_status' but the driver implementation
for this method uses an 'int' for it.

Fix this by using 'enum drm_mode_status' in the driver too.

Signed-off-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/radeon/radeon_connectors.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 1a2a7365d0b5..c6bf378534f8 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -844,7 +844,7 @@ static int radeon_lvds_get_modes(struct drm_connector *connector)
 	return ret;
 }
 
-static int radeon_lvds_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status radeon_lvds_mode_valid(struct drm_connector *connector,
 				  struct drm_display_mode *mode)
 {
 	struct drm_encoder *encoder = radeon_best_single_encoder(connector);
@@ -993,7 +993,7 @@ static int radeon_vga_get_modes(struct drm_connector *connector)
 	return ret;
 }
 
-static int radeon_vga_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status radeon_vga_mode_valid(struct drm_connector *connector,
 				  struct drm_display_mode *mode)
 {
 	struct drm_device *dev = connector->dev;
@@ -1136,7 +1136,7 @@ static int radeon_tv_get_modes(struct drm_connector *connector)
 	return 1;
 }
 
-static int radeon_tv_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status radeon_tv_mode_valid(struct drm_connector *connector,
 				struct drm_display_mode *mode)
 {
 	if ((mode->hdisplay > 1024) || (mode->vdisplay > 768))
@@ -1477,7 +1477,7 @@ static void radeon_dvi_force(struct drm_connector *connector)
 		radeon_connector->use_digital = true;
 }
 
-static int radeon_dvi_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status radeon_dvi_mode_valid(struct drm_connector *connector,
 				  struct drm_display_mode *mode)
 {
 	struct drm_device *dev = connector->dev;
@@ -1778,7 +1778,7 @@ radeon_dp_detect(struct drm_connector *connector, bool force)
 	return ret;
 }
 
-static int radeon_dp_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status radeon_dp_mode_valid(struct drm_connector *connector,
 				  struct drm_display_mode *mode)
 {
 	struct drm_device *dev = connector->dev;

From 6a409759c64a8eeac8ccb23dcbc3c89241f2455c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= <j.neuschaefer@gmx.net>
Date: Thu, 10 May 2018 23:59:19 +0200
Subject: [PATCH 194/519] powerpc/embedded6xx/hlwd-pic: Prevent interrupts from
 being handled by Starlet
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 9dcb3df4281876731e4e8bff7940514d72375154 ]

The interrupt controller inside the Wii's Hollywood chip is connected to
two masters, the "Broadway" PowerPC and the "Starlet" ARM926, each with
their own interrupt status and mask registers.

When booting the Wii with mini[1], interrupts from the SD card
controller (IRQ 7) are handled by the ARM, because mini provides SD
access over IPC. Linux however can't currently use or disable this IPC
service, so both sides try to handle IRQ 7 without coordination.

Let's instead make sure that all interrupts that are unmasked on the PPC
side are masked on the ARM side; this will also make sure that Linux can
properly talk to the SD card controller (and potentially other devices).

If access to a device through IPC is desired in the future, interrupts
from that device should not be handled by Linux directly.

[1]: https://github.com/lewurm/mini

Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/platforms/embedded6xx/hlwd-pic.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
index 9b7975706bfc..9485f1024d46 100644
--- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
@@ -35,6 +35,8 @@
  */
 #define HW_BROADWAY_ICR		0x00
 #define HW_BROADWAY_IMR		0x04
+#define HW_STARLET_ICR		0x08
+#define HW_STARLET_IMR		0x0c
 
 
 /*
@@ -74,6 +76,9 @@ static void hlwd_pic_unmask(struct irq_data *d)
 	void __iomem *io_base = irq_data_get_irq_chip_data(d);
 
 	setbits32(io_base + HW_BROADWAY_IMR, 1 << irq);
+
+	/* Make sure the ARM (aka. Starlet) doesn't handle this interrupt. */
+	clrbits32(io_base + HW_STARLET_IMR, 1 << irq);
 }
 
 
From 29f8cd10e537dc39254e4fcf36decf43529e6986 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor@chromium.org>
Date: Wed, 9 May 2018 12:12:15 -0700
Subject: [PATCH 195/519] HID: i2c-hid: check if device is there before really
 probing

[ Upstream commit b3a81b6c4fc6730ac49e20d789a93c0faabafc98 ]

On many Chromebooks touch devices are multi-sourced; the components are
electrically compatible and one can be freely swapped for another without
changing the OS image or firmware.

To avoid bunch of scary messages when device is not actually present in the
system let's try testing basic communication with it and if there is no
response terminate probe early with -ENXIO.

Signed-off-by: Dmitry Torokhov <dtor@chromium.org>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hid/i2c-hid/i2c-hid.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c
index a5fed668fde1..4248d253c32a 100644
--- a/drivers/hid/i2c-hid/i2c-hid.c
+++ b/drivers/hid/i2c-hid/i2c-hid.c
@@ -1017,6 +1017,14 @@ static int i2c_hid_probe(struct i2c_client *client,
 	pm_runtime_set_active(&client->dev);
 	pm_runtime_enable(&client->dev);
 
+	/* Make sure there is something at this address */
+	ret = i2c_smbus_read_byte(client);
+	if (ret < 0) {
+		dev_dbg(&client->dev, "nothing at this address: %d\n", ret);
+		ret = -ENXIO;
+		goto err_pm;
+	}
+
 	ret = i2c_hid_fetch_hid_descriptor(ihid);
 	if (ret < 0)
 		goto err_pm;

From 6dbfa9b5ae65063cd61dc7fa11332e00bb794d8b Mon Sep 17 00:00:00 2001
From: DaeRyong Jeong <threeearcat@gmail.com>
Date: Tue, 1 May 2018 00:27:04 +0900
Subject: [PATCH 196/519] tty: Fix data race in
 tty_insert_flip_string_fixed_flag

[ Upstream commit b6da31b2c07c46f2dcad1d86caa835227a16d9ff ]

Unlike normal serials, in pty layer, there is no guarantee that multiple
threads don't insert input characters at the same time. If it is happened,
tty_insert_flip_string_fixed_flag can be executed concurrently. This can
lead slab out-of-bounds write in tty_insert_flip_string_fixed_flag.

Call sequences are as follows.
CPU0                                    CPU1
n_tty_ioctl_helper                      n_tty_ioctl_helper
__start_tty                             tty_send_xchar
tty_wakeup                              pty_write
n_hdlc_tty_wakeup                       tty_insert_flip_string
n_hdlc_send_frames                      tty_insert_flip_string_fixed_flag
pty_write
tty_insert_flip_string
tty_insert_flip_string_fixed_flag

To fix the race, acquire port->lock in pty_write() before it inserts input
characters to tty buffer. It prevents multiple threads from inserting
input characters concurrently.

The crash log is as follows:
BUG: KASAN: slab-out-of-bounds in tty_insert_flip_string_fixed_flag+0xb5/
0x130 drivers/tty/tty_buffer.c:316 at addr ffff880114fcc121
Write of size 1792 by task syz-executor0/30017
CPU: 1 PID: 30017 Comm: syz-executor0 Not tainted 4.8.0 #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS rel-1.8.2-0-g33fbe13 by qemu-project.org 04/01/2014
 0000000000000000 ffff88011638f888 ffffffff81694cc3 ffff88007d802140
 ffff880114fcb300 ffff880114fcc300 ffff880114fcb300 ffff88011638f8b0
 ffffffff8130075c ffff88011638f940 ffff88007d802140 ffff880194fcc121
Call Trace:
 __dump_stack lib/dump_stack.c:15 [inline]
 dump_stack+0xb3/0x110 lib/dump_stack.c:51
 kasan_object_err+0x1c/0x70 mm/kasan/report.c:156
 print_address_description mm/kasan/report.c:194 [inline]
 kasan_report_error+0x1f7/0x4e0 mm/kasan/report.c:283
 kasan_report+0x36/0x40 mm/kasan/report.c:303
 check_memory_region_inline mm/kasan/kasan.c:292 [inline]
 check_memory_region+0x13e/0x1a0 mm/kasan/kasan.c:299
 memcpy+0x37/0x50 mm/kasan/kasan.c:335
 tty_insert_flip_string_fixed_flag+0xb5/0x130 drivers/tty/tty_buffer.c:316
 tty_insert_flip_string include/linux/tty_flip.h:35 [inline]
 pty_write+0x7f/0xc0 drivers/tty/pty.c:115
 n_hdlc_send_frames+0x1d4/0x3b0 drivers/tty/n_hdlc.c:419
 n_hdlc_tty_wakeup+0x73/0xa0 drivers/tty/n_hdlc.c:496
 tty_wakeup+0x92/0xb0 drivers/tty/tty_io.c:601
 __start_tty.part.26+0x66/0x70 drivers/tty/tty_io.c:1018
 __start_tty+0x34/0x40 drivers/tty/tty_io.c:1013
 n_tty_ioctl_helper+0x146/0x1e0 drivers/tty/tty_ioctl.c:1138
 n_hdlc_tty_ioctl+0xb3/0x2b0 drivers/tty/n_hdlc.c:794
 tty_ioctl+0xa85/0x16d0 drivers/tty/tty_io.c:2992
 vfs_ioctl fs/ioctl.c:43 [inline]
 do_vfs_ioctl+0x13e/0xba0 fs/ioctl.c:679
 SYSC_ioctl fs/ioctl.c:694 [inline]
 SyS_ioctl+0x8f/0xc0 fs/ioctl.c:685
 entry_SYSCALL_64_fastpath+0x1f/0xbd

Signed-off-by: DaeRyong Jeong <threeearcat@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/pty.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index 96aa0ad32497..c8a2e5b0eff7 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -106,16 +106,19 @@ static void pty_unthrottle(struct tty_struct *tty)
 static int pty_write(struct tty_struct *tty, const unsigned char *buf, int c)
 {
 	struct tty_struct *to = tty->link;
+	unsigned long flags;
 
 	if (tty->stopped)
 		return 0;
 
 	if (c > 0) {
+		spin_lock_irqsave(&to->port->lock, flags);
 		/* Stuff the data into the input queue of the other end */
 		c = tty_insert_flip_string(to->port, buf, c);
 		/* And shovel */
 		if (c)
 			tty_flip_buffer_push(to->port);
+		spin_unlock_irqrestore(&to->port->lock, flags);
 	}
 	return c;
 }

From 7a9a331f0a5ca9434b3284a9a081c491b9075615 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 8 May 2018 13:14:33 +0100
Subject: [PATCH 197/519] dma-iommu: Fix compilation when !CONFIG_IOMMU_DMA
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 8a22a3e1e768c309b718f99bd86f9f25a453e0dc ]

Inclusion of include/dma-iommu.h when CONFIG_IOMMU_DMA is not selected
results in the following splat:

In file included from drivers/irqchip/irq-gic-v3-mbi.c:20:0:
./include/linux/dma-iommu.h:95:69: error: unknown type name ‘dma_addr_t’
 static inline int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base)
                                                                     ^~~~~~~~~~
./include/linux/dma-iommu.h:108:74: warning: ‘struct list_head’ declared inside parameter list will not be visible outside of this definition or declaration
 static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list)
                                                                          ^~~~~~~~~
scripts/Makefile.build:312: recipe for target 'drivers/irqchip/irq-gic-v3-mbi.o' failed

Fix it by including linux/types.h.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Rob Herring <robh@kernel.org>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Cc: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Cc: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lkml.kernel.org/r/20180508121438.11301-5-marc.zyngier@arm.com
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/dma-iommu.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index fc481037478a..19baa7f4f403 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -17,6 +17,7 @@
 #define __DMA_IOMMU_H
 
 #ifdef __KERNEL__
+#include <linux/types.h>
 #include <asm/errno.h>
 
 #ifdef CONFIG_IOMMU_DMA

From 09b8746fd4fb430c2dfcd257b8649bf6754df979 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Tue, 12 Jul 2016 07:21:46 -0400
Subject: [PATCH 198/519] media: rcar_jpu: Add missing clk_disable_unprepare()
 on error in jpu_open()

[ Upstream commit 43d0d3c52787df0221d1c52494daabd824fe84f1 ]

Add the missing clk_disable_unprepare() before return from
jpu_open() in the software reset error handling case.

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Acked-by: Mikhail Ulyanov <mikhail.ulyanov@cogentembedded.com>
Reviewed-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Signed-off-by: Hans Verkuil <hansverk@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/platform/rcar_jpu.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/media/platform/rcar_jpu.c b/drivers/media/platform/rcar_jpu.c
index f8e3e83c52a2..20de5e9fc217 100644
--- a/drivers/media/platform/rcar_jpu.c
+++ b/drivers/media/platform/rcar_jpu.c
@@ -1278,7 +1278,7 @@ static int jpu_open(struct file *file)
 		/* ...issue software reset */
 		ret = jpu_reset(jpu);
 		if (ret)
-			goto device_prepare_rollback;
+			goto jpu_reset_rollback;
 	}
 
 	jpu->ref_count++;
@@ -1286,6 +1286,8 @@ static int jpu_open(struct file *file)
 	mutex_unlock(&jpu->mutex);
 	return 0;
 
+jpu_reset_rollback:
+	clk_disable_unprepare(jpu->clk);
 device_prepare_rollback:
 	mutex_unlock(&jpu->mutex);
 v4l_prepare_rollback:

From cdebef38f64805a1802452acadd33e690bd7c503 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Wed, 9 May 2018 09:28:12 +0900
Subject: [PATCH 199/519] libata: Fix command retry decision

[ Upstream commit 804689ad2d9b66d0d3920b48cf05881049d44589 ]

For failed commands with valid sense data (e.g. NCQ commands),
scsi_check_sense() is used in ata_analyze_tf() to determine if the
command can be retried. In such case, rely on this decision and ignore
the command error mask based decision done in ata_worth_retry().

This fixes useless retries of commands such as unaligned writes on zoned
disks (TYPE_ZAC).

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ata/libata-eh.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 75cced210b2a..7db76b5c7ada 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -2198,12 +2198,16 @@ static void ata_eh_link_autopsy(struct ata_link *link)
 		if (qc->err_mask & ~AC_ERR_OTHER)
 			qc->err_mask &= ~AC_ERR_OTHER;
 
-		/* SENSE_VALID trumps dev/unknown error and revalidation */
+		/*
+		 * SENSE_VALID trumps dev/unknown error and revalidation. Upper
+		 * layers will determine whether the command is worth retrying
+		 * based on the sense data and device class/type. Otherwise,
+		 * determine directly if the command is worth retrying using its
+		 * error mask and flags.
+		 */
 		if (qc->flags & ATA_QCFLAG_SENSE_VALID)
 			qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER);
-
-		/* determine whether the command is worth retrying */
-		if (ata_eh_worth_retry(qc))
+		else if (ata_eh_worth_retry(qc))
 			qc->flags |= ATA_QCFLAG_RETRY;
 
 		/* accumulate error info */

From 1fd65d6a00853204652489fbe37278d3771638f8 Mon Sep 17 00:00:00 2001
From: Brad Love <brad@nextdimension.cc>
Date: Fri, 4 May 2018 17:53:35 -0400
Subject: [PATCH 200/519] media: saa7164: Fix driver name in debug output

[ Upstream commit 0cc4655cb57af0b7e105d075c4f83f8046efafe7 ]

This issue was reported by a user who downloaded a corrupt saa7164
firmware, then went looking for a valid xc5000 firmware to fix the
error displayed...but the device in question has no xc5000, thus after
much effort, the wild goose chase eventually led to a support call.

The xc5000 has nothing to do with saa7164 (as far as I can tell),
so replace the string with saa7164 as well as give a meaningful
hint on the firmware mismatch.

Signed-off-by: Brad Love <brad@nextdimension.cc>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/pci/saa7164/saa7164-fw.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/media/pci/saa7164/saa7164-fw.c b/drivers/media/pci/saa7164/saa7164-fw.c
index 269e0782c7b6..93d53195e8ca 100644
--- a/drivers/media/pci/saa7164/saa7164-fw.c
+++ b/drivers/media/pci/saa7164/saa7164-fw.c
@@ -430,7 +430,8 @@ int saa7164_downloadfirmware(struct saa7164_dev *dev)
 			__func__, fw->size);
 
 		if (fw->size != fwlength) {
-			printk(KERN_ERR "xc5000: firmware incorrect size\n");
+			printk(KERN_ERR "saa7164: firmware incorrect size %zu != %u\n",
+				fw->size, fwlength);
 			ret = -ENOMEM;
 			goto out;
 		}

From b43d01323cf16432db5303dad3e5dfe0271c9240 Mon Sep 17 00:00:00 2001
From: Jane Wan <Jane.Wan@nokia.com>
Date: Tue, 8 May 2018 14:19:53 -0700
Subject: [PATCH 201/519] mtd: rawnand: fsl_ifc: fix FSL NAND driver to read
 all ONFI parameter pages

[ Upstream commit a75bbe71a27875fdc61cde1af6d799037cef6bed ]

Per ONFI specification (Rev. 4.0), if the CRC of the first parameter page
read is not valid, the host should read redundant parameter page copies.
Fix FSL NAND driver to read the two redundant copies which are mandatory
in the specification.

Signed-off-by: Jane Wan <Jane.Wan@nokia.com>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/nand/fsl_ifc_nand.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c
index 5e3fa5861039..2c0bbaed3609 100644
--- a/drivers/mtd/nand/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/fsl_ifc_nand.c
@@ -449,9 +449,16 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command,
 
 	case NAND_CMD_READID:
 	case NAND_CMD_PARAM: {
+		/*
+		 * For READID, read 8 bytes that are currently used.
+		 * For PARAM, read all 3 copies of 256-bytes pages.
+		 */
+		int len = 8;
 		int timing = IFC_FIR_OP_RB;
-		if (command == NAND_CMD_PARAM)
+		if (command == NAND_CMD_PARAM) {
 			timing = IFC_FIR_OP_RBCD;
+			len = 256 * 3;
+		}
 
 		ifc_out32((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) |
 			  (IFC_FIR_OP_UA  << IFC_NAND_FIR0_OP1_SHIFT) |
@@ -461,12 +468,8 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command,
 			  &ifc->ifc_nand.nand_fcr0);
 		ifc_out32(column, &ifc->ifc_nand.row3);
 
-		/*
-		 * although currently it's 8 bytes for READID, we always read
-		 * the maximum 256 bytes(for PARAM)
-		 */
-		ifc_out32(256, &ifc->ifc_nand.nand_fbcr);
-		ifc_nand_ctrl->read_bytes = 256;
+		ifc_out32(len, &ifc->ifc_nand.nand_fbcr);
+		ifc_nand_ctrl->read_bytes = len;
 
 		set_addr(mtd, 0, 0, 0);
 		fsl_ifc_run_command(mtd);

From e97509a1b36ccb5e0b4743b4b5168c8bdb9df52c Mon Sep 17 00:00:00 2001
From: Sean Lanigan <sean@lano.id.au>
Date: Fri, 4 May 2018 16:48:23 +1000
Subject: [PATCH 202/519] brcmfmac: Add support for bcm43364 wireless chipset

[ Upstream commit 9c4a121e82634aa000a702c98cd6f05b27d6e186 ]

Add support for the BCM43364 chipset via an SDIO interface, as used in
e.g. the Murata 1FX module.

The BCM43364 uses the same firmware as the BCM43430 (which is already
included), the only difference is the omission of Bluetooth.

However, the SDIO_ID for the BCM43364 is 02D0:A9A4, giving it a MODALIAS
of sdio:c00v02D0dA9A4, which doesn't get recognised and hence doesn't
load the brcmfmac module. Adding the 'A9A4' ID in the appropriate place
triggers the brcmfmac driver to load, and then correctly use the
firmware file 'brcmfmac43430-sdio.bin'.

Signed-off-by: Sean Lanigan <sean@lano.id.au>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c | 1 +
 include/linux/mmc/sdio_ids.h                     | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c
index 59cef6c69fe8..91da67657f81 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c
@@ -1109,6 +1109,7 @@ static const struct sdio_device_id brcmf_sdmmc_ids[] = {
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43340),
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43341),
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43362),
+ 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43364),
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4335_4339),
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43430),
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4345),
diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h
index 83430f2ea757..e0325706b76d 100644
--- a/include/linux/mmc/sdio_ids.h
+++ b/include/linux/mmc/sdio_ids.h
@@ -33,6 +33,7 @@
 #define SDIO_DEVICE_ID_BROADCOM_43341		0xa94d
 #define SDIO_DEVICE_ID_BROADCOM_4335_4339	0x4335
 #define SDIO_DEVICE_ID_BROADCOM_43362		0xa962
+#define SDIO_DEVICE_ID_BROADCOM_43364		0xa9a4
 #define SDIO_DEVICE_ID_BROADCOM_43430		0xa9a6
 #define SDIO_DEVICE_ID_BROADCOM_4345		0x4345
 #define SDIO_DEVICE_ID_BROADCOM_4354		0x4354

From 36ba2c8f617686ccfe1b0a9337ed3f662ce891fb Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.ibm.com>
Date: Tue, 8 May 2018 10:18:39 +0200
Subject: [PATCH 203/519] s390/cpum_sf: Add data entry sizes to sampling
 trailer entry

[ Upstream commit 77715b7ddb446bd39a06f3376e85f4bb95b29bb8 ]

The CPU Measurement sampling facility creates a trailer entry for each
Sample-Data-Block of stored samples. The trailer entry contains the sizes
(in bytes) of the stored sampling types:
 - basic-sampling data entry size
 - diagnostic-sampling data entry size
Both sizes are 2 bytes long.

This patch changes the trailer entry definition to reflect this.

Fixes: fcc77f507333 ("s390/cpum_sf: Atomically reset trailer entry fields of sample-data-blocks")
Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
Reviewed-by: Hendrik Brueckner <brueckner@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/include/asm/cpu_mf.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index 9dd04b9e9782..80c0b9c9c3b2 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -129,7 +129,9 @@ struct hws_trailer_entry {
 			unsigned int f:1;	/* 0 - Block Full Indicator   */
 			unsigned int a:1;	/* 1 - Alert request control  */
 			unsigned int t:1;	/* 2 - Timestamp format	      */
-			unsigned long long:61;	/* 3 - 63: Reserved	      */
+			unsigned int :29;	/* 3 - 31: Reserved	      */
+			unsigned int bsdes:16;	/* 32-47: size of basic SDE   */
+			unsigned int dsdes:16;	/* 48-63: size of diagnostic SDE */
 		};
 		unsigned long long flags;	/* 0 - 63: All indicators     */
 	};

From c4837ace416459c3f7394d73137ed0bb02869939 Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.ibm.com>
Date: Tue, 8 May 2018 07:53:39 +0200
Subject: [PATCH 204/519] perf: fix invalid bit in diagnostic entry

[ Upstream commit 3c0a83b14ea71fef5ccc93a3bd2de5f892be3194 ]

The s390 CPU measurement facility sampling mode supports basic entries
and diagnostic entries. Each entry has a valid bit to indicate the
status of the entry as valid or invalid.

This bit is bit 31 in the diagnostic entry, but the bit mask definition
refers to bit 30.

Fix this by making the reserved field one bit larger.

Fixes: 7e75fc3ff4cf ("s390/cpum_sf: Add raw data sampling to support the diagnostic-sampling function")
Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
Reviewed-by: Hendrik Brueckner <brueckner@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/include/asm/cpu_mf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index 80c0b9c9c3b2..b2f8c52b3840 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -113,7 +113,7 @@ struct hws_basic_entry {
 
 struct hws_diag_entry {
 	unsigned int def:16;	    /* 0-15  Data Entry Format		 */
-	unsigned int R:14;	    /* 16-19 and 20-30 reserved		 */
+	unsigned int R:15;	    /* 16-19 and 20-30 reserved		 */
 	unsigned int I:1;	    /* 31 entry valid or invalid	 */
 	u8	     data[];	    /* Machine-dependent sample data	 */
 } __packed;

From 092b0288f150e17ed626079685a04318abbfbd81 Mon Sep 17 00:00:00 2001
From: Wenwen Wang <wang6495@umn.edu>
Date: Mon, 7 May 2018 19:46:43 -0500
Subject: [PATCH 205/519] scsi: 3w-9xxx: fix a missing-check bug

[ Upstream commit c9318a3e0218bc9dacc25be46b9eec363259536f ]

In twa_chrdev_ioctl(), the ioctl driver command is firstly copied from
the userspace pointer 'argp' and saved to the kernel object
'driver_command'.  Then a security check is performed on the data buffer
size indicated by 'driver_command', which is
'driver_command.buffer_length'. If the security check is passed, the
entire ioctl command is copied again from the 'argp' pointer and saved
to the kernel object 'tw_ioctl'. Then, various operations are performed
on 'tw_ioctl' according to the 'cmd'. Given that the 'argp' pointer
resides in userspace, a malicious userspace process can race to change
the buffer size between the two copies. This way, the user can bypass
the security check and inject invalid data buffer size. This can cause
potential security issues in the following execution.

This patch checks for capable(CAP_SYS_ADMIN) in twa_chrdev_open()t o
avoid the above issues.

Signed-off-by: Wenwen Wang <wang6495@umn.edu>
Acked-by: Adam Radford <aradford@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/3w-9xxx.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c
index a56a7b243e91..5466246c69b4 100644
--- a/drivers/scsi/3w-9xxx.c
+++ b/drivers/scsi/3w-9xxx.c
@@ -889,6 +889,11 @@ static int twa_chrdev_open(struct inode *inode, struct file *file)
 	unsigned int minor_number;
 	int retval = TW_IOCTL_ERROR_OS_ENODEV;
 
+	if (!capable(CAP_SYS_ADMIN)) {
+		retval = -EACCES;
+		goto out;
+	}
+
 	minor_number = iminor(inode);
 	if (minor_number >= twa_device_extension_count)
 		goto out;

From 2c29ed33045b82322518452b20b9b2ff9ebec2f7 Mon Sep 17 00:00:00 2001
From: Wenwen Wang <wang6495@umn.edu>
Date: Mon, 7 May 2018 19:54:01 -0500
Subject: [PATCH 206/519] scsi: 3w-xxxx: fix a missing-check bug

[ Upstream commit 9899e4d3523faaef17c67141aa80ff2088f17871 ]

In tw_chrdev_ioctl(), the length of the data buffer is firstly copied
from the userspace pointer 'argp' and saved to the kernel object
'data_buffer_length'. Then a security check is performed on it to make
sure that the length is not more than 'TW_MAX_IOCTL_SECTORS *
512'. Otherwise, an error code -EINVAL is returned. If the security
check is passed, the entire ioctl command is copied again from the
'argp' pointer and saved to the kernel object 'tw_ioctl'. Then, various
operations are performed on 'tw_ioctl' according to the 'cmd'. Given
that the 'argp' pointer resides in userspace, a malicious userspace
process can race to change the buffer length between the two
copies. This way, the user can bypass the security check and inject
invalid data buffer length. This can cause potential security issues in
the following execution.

This patch checks for capable(CAP_SYS_ADMIN) in tw_chrdev_open() to
avoid the above issues.

Signed-off-by: Wenwen Wang <wang6495@umn.edu>
Acked-by: Adam Radford <aradford@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/3w-xxxx.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c
index 2940bd769936..14af38036287 100644
--- a/drivers/scsi/3w-xxxx.c
+++ b/drivers/scsi/3w-xxxx.c
@@ -1034,6 +1034,9 @@ static int tw_chrdev_open(struct inode *inode, struct file *file)
 
 	dprintk(KERN_WARNING "3w-xxxx: tw_ioctl_open()\n");
 
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
 	minor_number = iminor(inode);
 	if (minor_number >= tw_device_extension_count)
 		return -ENODEV;

From 4005ecaf46f28f89bf58ec28f7f7997a6fe15ecd Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 3 May 2018 13:54:32 +0300
Subject: [PATCH 207/519] scsi: megaraid: silence a static checker bug

[ Upstream commit 27e833dabab74ee665e487e291c9afc6d71effba ]

If we had more than 32 megaraid cards then it would cause memory
corruption.  That's not likely, of course, but it's handy to enforce it
and make the static checker happy.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/megaraid.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c
index 9d05302a3bcd..19bffe0b2cc0 100644
--- a/drivers/scsi/megaraid.c
+++ b/drivers/scsi/megaraid.c
@@ -4197,6 +4197,9 @@ megaraid_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	int irq, i, j;
 	int error = -ENODEV;
 
+	if (hba_count >= MAX_CONTROLLERS)
+		goto out;
+
 	if (pci_enable_device(pdev))
 		goto out;
 	pci_set_master(pdev);

From b9229445a4ae6237150d86a095586c094a1b967c Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Date: Thu, 26 Apr 2018 13:51:16 +0200
Subject: [PATCH 208/519] thermal: exynos: fix setting rising_threshold for
 Exynos5433

[ Upstream commit 8bfc218d0ebbabcba8ed2b8ec1831e0cf1f71629 ]

Add missing clearing of the previous value when setting rising
temperature threshold.

Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/thermal/samsung/exynos_tmu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c
index fa61eff88496..16d45a25284f 100644
--- a/drivers/thermal/samsung/exynos_tmu.c
+++ b/drivers/thermal/samsung/exynos_tmu.c
@@ -585,6 +585,7 @@ static int exynos5433_tmu_initialize(struct platform_device *pdev)
 		threshold_code = temp_to_code(data, temp);
 
 		rising_threshold = readl(data->base + rising_reg_offset);
+		rising_threshold &= ~(0xff << j * 8);
 		rising_threshold |= (threshold_code << j * 8);
 		writel(rising_threshold, data->base + rising_reg_offset);
 

From b23dab51e987787e358397b24831505668625b8a Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Thu, 3 May 2018 18:37:17 -0700
Subject: [PATCH 209/519] bpf: fix references to free_bpf_prog_info() in
 comments

[ Upstream commit ab7f5bf0928be2f148d000a6eaa6c0a36e74750e ]

Comments in the verifier refer to free_bpf_prog_info() which
seems to have never existed in tree.  Replace it with
free_used_maps().

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/bpf/verifier.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 79e3c21a35d0..35dfa9e9d69e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2101,7 +2101,7 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
 			/* hold the map. If the program is rejected by verifier,
 			 * the map will be released by release_maps() or it
 			 * will be used by the valid program until it's unloaded
-			 * and all maps are released in free_bpf_prog_info()
+			 * and all maps are released in free_used_maps()
 			 */
 			map = bpf_map_inc(map, false);
 			if (IS_ERR(map)) {
@@ -2487,7 +2487,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
 		vfree(log_buf);
 	if (!env->prog->aux->used_maps)
 		/* if we didn't copy map pointers into bpf_prog_info, release
-		 * them now. Otherwise free_bpf_prog_info() will release them.
+		 * them now. Otherwise free_used_maps() will release them.
 		 */
 		release_maps(env);
 	*prog = env->prog;

From 3d4a1e1cf348b5afab54d5903769f8eb5fe8e830 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Fri, 20 Apr 2018 08:32:16 -0400
Subject: [PATCH 210/519] media: siano: get rid of __le32/__le16 cast warnings

[ Upstream commit e1b7f11b37def5f3021c06e8c2b4953e099357aa ]

Those are all false-positives that appear with smatch when building for
arm:

  drivers/media/common/siano/smsendian.c:38:36: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:38:36: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:38:36: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:38:36: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:38:36: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:38:36: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:47:44: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:47:44: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:47:44: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:47:44: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:47:44: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:47:44: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:67:35: warning: cast to restricted __le16
  drivers/media/common/siano/smsendian.c:67:35: warning: cast to restricted __le16
  drivers/media/common/siano/smsendian.c:67:35: warning: cast to restricted __le16
  drivers/media/common/siano/smsendian.c:67:35: warning: cast to restricted __le16
  drivers/media/common/siano/smsendian.c:84:44: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:84:44: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:84:44: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:84:44: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:84:44: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:84:44: warning: cast to restricted __le32
  drivers/media/common/siano/smsendian.c:98:26: warning: cast to restricted __le16
  drivers/media/common/siano/smsendian.c:98:26: warning: cast to restricted __le16
  drivers/media/common/siano/smsendian.c:98:26: warning: cast to restricted __le16
  drivers/media/common/siano/smsendian.c:98:26: warning: cast to restricted __le16
  drivers/media/common/siano/smsendian.c:99:28: warning: cast to restricted __le16
  drivers/media/common/siano/smsendian.c:99:28: warning: cast to restricted __le16
  drivers/media/common/siano/smsendian.c:99:28: warning: cast to restricted __le16
  drivers/media/common/siano/smsendian.c:99:28: warning: cast to restricted __le16
  drivers/media/common/siano/smsendian.c:100:27: warning: cast to restricted __le16
  drivers/media/common/siano/smsendian.c:100:27: warning: cast to restricted __le16
  drivers/media/common/siano/smsendian.c:100:27: warning: cast to restricted __le16
  drivers/media/common/siano/smsendian.c:100:27: warning: cast to restricted __le16

Get rid of them by adding explicit forced casts.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/common/siano/smsendian.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/media/common/siano/smsendian.c b/drivers/media/common/siano/smsendian.c
index bfe831c10b1c..b95a631f23f9 100644
--- a/drivers/media/common/siano/smsendian.c
+++ b/drivers/media/common/siano/smsendian.c
@@ -35,7 +35,7 @@ void smsendian_handle_tx_message(void *buffer)
 	switch (msg->x_msg_header.msg_type) {
 	case MSG_SMS_DATA_DOWNLOAD_REQ:
 	{
-		msg->msg_data[0] = le32_to_cpu(msg->msg_data[0]);
+		msg->msg_data[0] = le32_to_cpu((__force __le32)(msg->msg_data[0]));
 		break;
 	}
 
@@ -44,7 +44,7 @@ void smsendian_handle_tx_message(void *buffer)
 				sizeof(struct sms_msg_hdr))/4;
 
 		for (i = 0; i < msg_words; i++)
-			msg->msg_data[i] = le32_to_cpu(msg->msg_data[i]);
+			msg->msg_data[i] = le32_to_cpu((__force __le32)msg->msg_data[i]);
 
 		break;
 	}
@@ -64,7 +64,7 @@ void smsendian_handle_rx_message(void *buffer)
 	{
 		struct sms_version_res *ver =
 			(struct sms_version_res *) msg;
-		ver->chip_model = le16_to_cpu(ver->chip_model);
+		ver->chip_model = le16_to_cpu((__force __le16)ver->chip_model);
 		break;
 	}
 
@@ -81,7 +81,7 @@ void smsendian_handle_rx_message(void *buffer)
 				sizeof(struct sms_msg_hdr))/4;
 
 		for (i = 0; i < msg_words; i++)
-			msg->msg_data[i] = le32_to_cpu(msg->msg_data[i]);
+			msg->msg_data[i] = le32_to_cpu((__force __le32)msg->msg_data[i]);
 
 		break;
 	}
@@ -95,9 +95,9 @@ void smsendian_handle_message_header(void *msg)
 #ifdef __BIG_ENDIAN
 	struct sms_msg_hdr *phdr = (struct sms_msg_hdr *)msg;
 
-	phdr->msg_type = le16_to_cpu(phdr->msg_type);
-	phdr->msg_length = le16_to_cpu(phdr->msg_length);
-	phdr->msg_flags = le16_to_cpu(phdr->msg_flags);
+	phdr->msg_type = le16_to_cpu((__force __le16)phdr->msg_type);
+	phdr->msg_length = le16_to_cpu((__force __le16)phdr->msg_length);
+	phdr->msg_flags = le16_to_cpu((__force __le16)phdr->msg_flags);
 #endif /* __BIG_ENDIAN */
 }
 EXPORT_SYMBOL_GPL(smsendian_handle_message_header);

From f1a64c117f1363f17cfc7e5bd410ec6222031501 Mon Sep 17 00:00:00 2001
From: Satendra Singh Thakur <satendra.t@samsung.com>
Date: Thu, 3 May 2018 11:19:32 +0530
Subject: [PATCH 211/519] drm/atomic: Handling the case when setting old crtc
 for plane

[ Upstream commit fc2a69f3903dfd97cd47f593e642b47918c949df ]

In the func drm_atomic_set_crtc_for_plane, with the current code,
if crtc of the plane_state and crtc passed as argument to the func
are same, entire func will executed in vein.
It will get state of crtc and clear and set the bits in plane_mask.
All these steps are not required for same old crtc.
Ideally, we should do nothing in this case, this patch handles the same,
and causes the program to return without doing anything in such scenario.

Signed-off-by: Satendra Singh Thakur <satendra.t@samsung.com>
Cc: Madhur Verma <madhur.verma@samsung.com>
Cc: Hemanshu Srivastava <hemanshu.s@samsung.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/1525326572-25854-1-git-send-email-satendra.t@samsung.com
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/drm_atomic.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 50d74e5ce41b..355ad1b97df6 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -960,7 +960,9 @@ drm_atomic_set_crtc_for_plane(struct drm_plane_state *plane_state,
 {
 	struct drm_plane *plane = plane_state->plane;
 	struct drm_crtc_state *crtc_state;
-
+	/* Nothing to do for same crtc*/
+	if (plane_state->crtc == crtc)
+		return 0;
 	if (plane_state->crtc) {
 		crtc_state = drm_atomic_get_crtc_state(plane_state->state,
 						       plane_state->crtc);

From 0743d20878ddec1f959d0fbb419cb4dc2ebfc374 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Wed, 2 May 2018 22:48:16 +0900
Subject: [PATCH 212/519] ALSA: hda/ca0132: fix build failure when a local
 macro is defined

[ Upstream commit 8e142e9e628975b0dddd05cf1b095331dff6e2de ]

DECLARE_TLV_DB_SCALE (alias of SNDRV_CTL_TLVD_DECLARE_DB_SCALE) is used but
tlv.h is not included. This causes build failure when local macro is
defined by comment-out.

This commit fixes the bug. At the same time, the alias macro is replaced
with a destination macro added at a commit 46e860f76804 ("ALSA: rename
TLV-related macros so that they're friendly to user applications")

Reported-by: Connor McAdams <conmanx360@gmail.com>
Fixes: 44f0c9782cc6 ('ALSA: hda/ca0132: Add tuning controls')
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_ca0132.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c
index 29e1ce2263bc..c55c0131be0a 100644
--- a/sound/pci/hda/patch_ca0132.c
+++ b/sound/pci/hda/patch_ca0132.c
@@ -38,6 +38,10 @@
 /* Enable this to see controls for tuning purpose. */
 /*#define ENABLE_TUNING_CONTROLS*/
 
+#ifdef ENABLE_TUNING_CONTROLS
+#include <sound/tlv.h>
+#endif
+
 #define FLOAT_ZERO	0x00000000
 #define FLOAT_ONE	0x3f800000
 #define FLOAT_TWO	0x40000000
@@ -3067,8 +3071,8 @@ static int equalizer_ctl_put(struct snd_kcontrol *kcontrol,
 	return 1;
 }
 
-static const DECLARE_TLV_DB_SCALE(voice_focus_db_scale, 2000, 100, 0);
-static const DECLARE_TLV_DB_SCALE(eq_db_scale, -2400, 100, 0);
+static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(voice_focus_db_scale, 2000, 100, 0);
+static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(eq_db_scale, -2400, 100, 0);
 
 static int add_tuning_control(struct hda_codec *codec,
 				hda_nid_t pnid, hda_nid_t nid,

From 4ea02c98bf3bb86356b460fe42c5c86f932fc3ce Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Mon, 9 Apr 2018 22:28:27 +0300
Subject: [PATCH 213/519] memory: tegra: Do not handle spurious interrupts

[ Upstream commit bf3fbdfbec947cdd04b2f2c4bce11534c8786eee ]

The ISR reads interrupts-enable mask, but doesn't utilize it. Apply the
mask to the interrupt status and don't handle interrupts that MC driver
haven't asked for. Kernel would disable spurious MC IRQ and report the
error. This would happen only in a case of a very severe bug.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/memory/tegra/mc.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c
index a1ae0cc2b86d..13dcee4ed0cf 100644
--- a/drivers/memory/tegra/mc.c
+++ b/drivers/memory/tegra/mc.c
@@ -252,8 +252,11 @@ static irqreturn_t tegra_mc_irq(int irq, void *data)
 	unsigned int bit;
 
 	/* mask all interrupts to avoid flooding */
-	status = mc_readl(mc, MC_INTSTATUS);
 	mask = mc_readl(mc, MC_INTMASK);
+	status = mc_readl(mc, MC_INTSTATUS) & mask;
+
+	if (!status)
+		return IRQ_NONE;
 
 	for_each_set_bit(bit, &status, 32) {
 		const char *error = status_names[bit] ?: "unknown";

From e55c920c902705ac7e8d4ee565192fbb3dd251ae Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Mon, 9 Apr 2018 22:28:29 +0300
Subject: [PATCH 214/519] memory: tegra: Apply interrupts mask per SoC

[ Upstream commit 1c74d5c0de0c2cc29fef97a19251da2ad6f579bd ]

Currently we are enabling handling of interrupts specific to Tegra124+
which happen to overlap with previous generations. Let's specify
interrupts mask per SoC generation for consistency and in a preparation
of squashing of Tegra20 driver into the common one that will enable
handling of GART faults which may be undesirable by newer generations.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/memory/tegra/mc.c       | 21 +++------------------
 drivers/memory/tegra/mc.h       |  9 +++++++++
 drivers/memory/tegra/tegra114.c |  2 ++
 drivers/memory/tegra/tegra124.c |  6 ++++++
 drivers/memory/tegra/tegra210.c |  3 +++
 drivers/memory/tegra/tegra30.c  |  2 ++
 include/soc/tegra/mc.h          |  2 ++
 7 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c
index 13dcee4ed0cf..6ab481ee8ece 100644
--- a/drivers/memory/tegra/mc.c
+++ b/drivers/memory/tegra/mc.c
@@ -20,14 +20,6 @@
 #include "mc.h"
 
 #define MC_INTSTATUS 0x000
-#define  MC_INT_DECERR_MTS (1 << 16)
-#define  MC_INT_SECERR_SEC (1 << 13)
-#define  MC_INT_DECERR_VPR (1 << 12)
-#define  MC_INT_INVALID_APB_ASID_UPDATE (1 << 11)
-#define  MC_INT_INVALID_SMMU_PAGE (1 << 10)
-#define  MC_INT_ARBITRATION_EMEM (1 << 9)
-#define  MC_INT_SECURITY_VIOLATION (1 << 8)
-#define  MC_INT_DECERR_EMEM (1 << 6)
 
 #define MC_INTMASK 0x004
 
@@ -248,13 +240,11 @@ static const char *const error_names[8] = {
 static irqreturn_t tegra_mc_irq(int irq, void *data)
 {
 	struct tegra_mc *mc = data;
-	unsigned long status, mask;
+	unsigned long status;
 	unsigned int bit;
 
 	/* mask all interrupts to avoid flooding */
-	mask = mc_readl(mc, MC_INTMASK);
-	status = mc_readl(mc, MC_INTSTATUS) & mask;
-
+	status = mc_readl(mc, MC_INTSTATUS) & mc->soc->intmask;
 	if (!status)
 		return IRQ_NONE;
 
@@ -349,7 +339,6 @@ static int tegra_mc_probe(struct platform_device *pdev)
 	const struct of_device_id *match;
 	struct resource *res;
 	struct tegra_mc *mc;
-	u32 value;
 	int err;
 
 	match = of_match_node(tegra_mc_of_match, pdev->dev.of_node);
@@ -417,11 +406,7 @@ static int tegra_mc_probe(struct platform_device *pdev)
 
 	WARN(!mc->soc->client_id_mask, "Missing client ID mask for this SoC\n");
 
-	value = MC_INT_DECERR_MTS | MC_INT_SECERR_SEC | MC_INT_DECERR_VPR |
-		MC_INT_INVALID_APB_ASID_UPDATE | MC_INT_INVALID_SMMU_PAGE |
-		MC_INT_SECURITY_VIOLATION | MC_INT_DECERR_EMEM;
-
-	mc_writel(mc, value, MC_INTMASK);
+	mc_writel(mc, mc->soc->intmask, MC_INTMASK);
 
 	return 0;
 }
diff --git a/drivers/memory/tegra/mc.h b/drivers/memory/tegra/mc.h
index ddb16676c3af..24e020b4609b 100644
--- a/drivers/memory/tegra/mc.h
+++ b/drivers/memory/tegra/mc.h
@@ -14,6 +14,15 @@
 
 #include <soc/tegra/mc.h>
 
+#define MC_INT_DECERR_MTS (1 << 16)
+#define MC_INT_SECERR_SEC (1 << 13)
+#define MC_INT_DECERR_VPR (1 << 12)
+#define MC_INT_INVALID_APB_ASID_UPDATE (1 << 11)
+#define MC_INT_INVALID_SMMU_PAGE (1 << 10)
+#define MC_INT_ARBITRATION_EMEM (1 << 9)
+#define MC_INT_SECURITY_VIOLATION (1 << 8)
+#define MC_INT_DECERR_EMEM (1 << 6)
+
 static inline u32 mc_readl(struct tegra_mc *mc, unsigned long offset)
 {
 	return readl(mc->regs + offset);
diff --git a/drivers/memory/tegra/tegra114.c b/drivers/memory/tegra/tegra114.c
index ba8fff3d66a6..6d2a5a849d92 100644
--- a/drivers/memory/tegra/tegra114.c
+++ b/drivers/memory/tegra/tegra114.c
@@ -930,4 +930,6 @@ const struct tegra_mc_soc tegra114_mc_soc = {
 	.atom_size = 32,
 	.client_id_mask = 0x7f,
 	.smmu = &tegra114_smmu_soc,
+	.intmask = MC_INT_INVALID_SMMU_PAGE | MC_INT_SECURITY_VIOLATION |
+		   MC_INT_DECERR_EMEM,
 };
diff --git a/drivers/memory/tegra/tegra124.c b/drivers/memory/tegra/tegra124.c
index 21e7255e3d96..234e74f97a4b 100644
--- a/drivers/memory/tegra/tegra124.c
+++ b/drivers/memory/tegra/tegra124.c
@@ -1019,6 +1019,9 @@ const struct tegra_mc_soc tegra124_mc_soc = {
 	.smmu = &tegra124_smmu_soc,
 	.emem_regs = tegra124_mc_emem_regs,
 	.num_emem_regs = ARRAY_SIZE(tegra124_mc_emem_regs),
+	.intmask = MC_INT_DECERR_MTS | MC_INT_SECERR_SEC | MC_INT_DECERR_VPR |
+		   MC_INT_INVALID_APB_ASID_UPDATE | MC_INT_INVALID_SMMU_PAGE |
+		   MC_INT_SECURITY_VIOLATION | MC_INT_DECERR_EMEM,
 };
 #endif /* CONFIG_ARCH_TEGRA_124_SOC */
 
@@ -1041,5 +1044,8 @@ const struct tegra_mc_soc tegra132_mc_soc = {
 	.atom_size = 32,
 	.client_id_mask = 0x7f,
 	.smmu = &tegra132_smmu_soc,
+	.intmask = MC_INT_DECERR_MTS | MC_INT_SECERR_SEC | MC_INT_DECERR_VPR |
+		   MC_INT_INVALID_APB_ASID_UPDATE | MC_INT_INVALID_SMMU_PAGE |
+		   MC_INT_SECURITY_VIOLATION | MC_INT_DECERR_EMEM,
 };
 #endif /* CONFIG_ARCH_TEGRA_132_SOC */
diff --git a/drivers/memory/tegra/tegra210.c b/drivers/memory/tegra/tegra210.c
index 5e144abe4c18..47c78a6d8f00 100644
--- a/drivers/memory/tegra/tegra210.c
+++ b/drivers/memory/tegra/tegra210.c
@@ -1077,4 +1077,7 @@ const struct tegra_mc_soc tegra210_mc_soc = {
 	.atom_size = 64,
 	.client_id_mask = 0xff,
 	.smmu = &tegra210_smmu_soc,
+	.intmask = MC_INT_DECERR_MTS | MC_INT_SECERR_SEC | MC_INT_DECERR_VPR |
+		   MC_INT_INVALID_APB_ASID_UPDATE | MC_INT_INVALID_SMMU_PAGE |
+		   MC_INT_SECURITY_VIOLATION | MC_INT_DECERR_EMEM,
 };
diff --git a/drivers/memory/tegra/tegra30.c b/drivers/memory/tegra/tegra30.c
index b44737840e70..d0689428ea1a 100644
--- a/drivers/memory/tegra/tegra30.c
+++ b/drivers/memory/tegra/tegra30.c
@@ -952,4 +952,6 @@ const struct tegra_mc_soc tegra30_mc_soc = {
 	.atom_size = 16,
 	.client_id_mask = 0x7f,
 	.smmu = &tegra30_smmu_soc,
+	.intmask = MC_INT_INVALID_SMMU_PAGE | MC_INT_SECURITY_VIOLATION |
+		   MC_INT_DECERR_EMEM,
 };
diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h
index 44202ff897fd..f759e0918037 100644
--- a/include/soc/tegra/mc.h
+++ b/include/soc/tegra/mc.h
@@ -99,6 +99,8 @@ struct tegra_mc_soc {
 	u8 client_id_mask;
 
 	const struct tegra_smmu_soc *smmu;
+
+	u32 intmask;
 };
 
 struct tegra_mc {

From aa9190401a553e96edb4abe32b101d3e5c83f18f Mon Sep 17 00:00:00 2001
From: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Date: Tue, 24 Apr 2018 15:14:57 +0200
Subject: [PATCH 215/519] drm/gma500: fix psb_intel_lvds_mode_valid()'s return
 type

[ Upstream commit 2ea009095c6e7396915a1d0dd480c41f02985f79 ]

The method struct drm_connector_helper_funcs::mode_valid is defined
as returning an 'enum drm_mode_status' but the driver implementation
for this method, psb_intel_lvds_mode_valid(), uses an 'int' for it.

Fix this by using 'enum drm_mode_status' for psb_intel_lvds_mode_valid().

Signed-off-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20180424131458.2060-1-luc.vanoostenryck@gmail.com
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/gma500/psb_intel_drv.h  | 2 +-
 drivers/gpu/drm/gma500/psb_intel_lvds.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/gma500/psb_intel_drv.h b/drivers/gpu/drm/gma500/psb_intel_drv.h
index 860dd2177ca1..283570080d47 100644
--- a/drivers/gpu/drm/gma500/psb_intel_drv.h
+++ b/drivers/gpu/drm/gma500/psb_intel_drv.h
@@ -252,7 +252,7 @@ extern int intelfb_remove(struct drm_device *dev,
 extern bool psb_intel_lvds_mode_fixup(struct drm_encoder *encoder,
 				      const struct drm_display_mode *mode,
 				      struct drm_display_mode *adjusted_mode);
-extern int psb_intel_lvds_mode_valid(struct drm_connector *connector,
+extern enum drm_mode_status psb_intel_lvds_mode_valid(struct drm_connector *connector,
 				     struct drm_display_mode *mode);
 extern int psb_intel_lvds_set_property(struct drm_connector *connector,
 					struct drm_property *property,
diff --git a/drivers/gpu/drm/gma500/psb_intel_lvds.c b/drivers/gpu/drm/gma500/psb_intel_lvds.c
index 61e3a097a478..ccd1b8bf0fd5 100644
--- a/drivers/gpu/drm/gma500/psb_intel_lvds.c
+++ b/drivers/gpu/drm/gma500/psb_intel_lvds.c
@@ -343,7 +343,7 @@ static void psb_intel_lvds_restore(struct drm_connector *connector)
 	}
 }
 
-int psb_intel_lvds_mode_valid(struct drm_connector *connector,
+enum drm_mode_status psb_intel_lvds_mode_valid(struct drm_connector *connector,
 				 struct drm_display_mode *mode)
 {
 	struct drm_psb_private *dev_priv = connector->dev->dev_private;

From 0eb135eea738adae5bbe3a93b806576e20cd66f6 Mon Sep 17 00:00:00 2001
From: Chris Novakovic <chris@chrisn.me.uk>
Date: Tue, 24 Apr 2018 03:56:37 +0100
Subject: [PATCH 216/519] ipconfig: Correctly initialise ic_nameservers

[ Upstream commit 300eec7c0a2495f771709c7642aa15f7cc148b83 ]

ic_nameservers, which stores the list of name servers discovered by
ipconfig, is initialised (i.e. has all of its elements set to NONE, or
0xffffffff) by ic_nameservers_predef() in the following scenarios:

 - before the "ip=" and "nfsaddrs=" kernel command line parameters are
   parsed (in ip_auto_config_setup());
 - before autoconfiguring via DHCP or BOOTP (in ic_bootp_init()), in
   order to clear any values that may have been set after parsing "ip="
   or "nfsaddrs=" and are no longer needed.

This means that ic_nameservers_predef() is not called when neither "ip="
nor "nfsaddrs=" is specified on the kernel command line. In this
scenario, every element in ic_nameservers remains set to 0x00000000,
which is indistinguishable from ANY and causes pnp_seq_show() to write
the following (bogus) information to /proc/net/pnp:

  #MANUAL
  nameserver 0.0.0.0
  nameserver 0.0.0.0
  nameserver 0.0.0.0

This is potentially problematic for systems that blindly link
/etc/resolv.conf to /proc/net/pnp.

Ensure that ic_nameservers is also initialised when neither "ip=" nor
"nfsaddrs=" are specified by calling ic_nameservers_predef() in
ip_auto_config(), but only when ip_auto_config_setup() was not called
earlier. This causes the following to be written to /proc/net/pnp, and
is consistent with what gets written when ipconfig is configured
manually but no name servers are specified on the kernel command line:

  #MANUAL

Signed-off-by: Chris Novakovic <chris@chrisn.me.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/ipconfig.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 9d6b9c4c5f82..60f564db25a3 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -790,6 +790,11 @@ static void __init ic_bootp_init_ext(u8 *e)
  */
 static inline void __init ic_bootp_init(void)
 {
+	/* Re-initialise all name servers to NONE, in case any were set via the
+	 * "ip=" or "nfsaddrs=" kernel command line parameters: any IP addresses
+	 * specified there will already have been decoded but are no longer
+	 * needed
+	 */
 	ic_nameservers_predef();
 
 	dev_add_pack(&bootp_packet_type);
@@ -1423,6 +1428,13 @@ static int __init ip_auto_config(void)
 	int err;
 	unsigned int i;
 
+	/* Initialise all name servers to NONE (but only if the "ip=" or
+	 * "nfsaddrs=" kernel command line parameters weren't decoded, otherwise
+	 * we'll overwrite the IP addresses specified there)
+	 */
+	if (ic_set_manually == 0)
+		ic_nameservers_predef();
+
 #ifdef CONFIG_PROC_FS
 	proc_create("pnp", S_IRUGO, init_net.proc_net, &pnp_seq_fops);
 #endif /* CONFIG_PROC_FS */
@@ -1640,6 +1652,7 @@ static int __init ip_auto_config_setup(char *addrs)
 		return 1;
 	}
 
+	/* Initialise all name servers to NONE */
 	ic_nameservers_predef();
 
 	/* Parse string for static IP assignment.  */

From 7315e0d38a29792cbb32b84e17e413412171e1e1 Mon Sep 17 00:00:00 2001
From: Siva Rebbagondla <siva.rebbagondla@redpinesignals.com>
Date: Wed, 11 Apr 2018 12:13:32 +0530
Subject: [PATCH 217/519] rsi: Fix 'invalid vdd' warning in mmc

[ Upstream commit 78e450719c702784e42af6da912d3692fd3da0cb ]

While performing cleanup, driver is messing with card->ocr
value by not masking rocr against ocr_avail. Below panic
is observed with some of the SDIO host controllers due to
this. Issue is resolved by reverting incorrect modifications
to vdd.

[  927.423821] mmc1: Invalid vdd 0x1f
[  927.423925] Modules linked in: rsi_sdio(+) cmac bnep arc4 rsi_91x
	       mac80211 cfg80211 btrsi rfcomm bluetooth ecdh_generic
[  927.424073] CPU: 0 PID: 1624 Comm: insmod Tainted: G		W        4.15.0-1000-caracalla #1
[  927.424075] Hardware name: Dell Inc. Edge Gateway	3003/      , BIOS 01.00.06 01/22/2018
[  927.424082] RIP: 0010:sdhci_set_power_noreg+0xdd/0x190[sdhci]
[  927.424085] RSP: 0018:ffffac3fc064b930 EFLAGS:  00010282
[  927.424107] Call Trace:
[  927.424118]  sdhci_set_power+0x5a/0x60 [sdhci]
[  927.424125]  sdhci_set_ios+0x360/0x3b0 [sdhci]
[  927.424133]  mmc_set_initial_state+0x92/0x120
[  927.424137]  mmc_power_up.part.34+0x33/0x1d0
[  927.424141]  mmc_power_up+0x17/0x20
[  927.424147]  mmc_sdio_runtime_resume+0x2d/0x50
[  927.424151]  mmc_runtime_resume+0x17/0x20
[  927.424156]  __rpm_callback+0xc4/0x200
[  927.424161]  ? idr_alloc_cyclic+0x57/0xd0
[  927.424165]  ? mmc_runtime_suspend+0x20/0x20
[  927.424169]  rpm_callback+0x24/0x80
[  927.424172]  ? mmc_runtime_suspend+0x20/0x20
[  927.424176]  rpm_resume+0x4b3/0x6c0
[  927.424181]  __pm_runtime_resume+0x4e/0x80
[  927.424188]  driver_probe_device+0x41/0x490
[  927.424192]  __driver_attach+0xdf/0xf0
[  927.424196]  ? driver_probe_device+0x490/0x490
[  927.424201]  bus_for_each_dev+0x6c/0xc0
[  927.424205]  driver_attach+0x1e/0x20
[  927.424209]  bus_add_driver+0x1f4/0x270
[  927.424217]  ? rsi_sdio_ack_intr+0x50/0x50 [rsi_sdio]
[  927.424221]  driver_register+0x60/0xe0
[  927.424227]  ? rsi_sdio_ack_intr+0x50/0x50 [rsi_sdio]
[  927.424231]  sdio_register_driver+0x20/0x30
[  927.424237]  rsi_module_init+0x16/0x40 [rsi_sdio]

Signed-off-by: Siva Rebbagondla <siva.rebbagondla@redpinesignals.com>
Signed-off-by: Amitkumar Karwar <amit.karwar@redpinesignals.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/rsi/rsi_91x_sdio.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c
index 8428858204a6..fc895b466ebb 100644
--- a/drivers/net/wireless/rsi/rsi_91x_sdio.c
+++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c
@@ -155,7 +155,6 @@ static void rsi_reset_card(struct sdio_func *pfunction)
 	int err;
 	struct mmc_card *card = pfunction->card;
 	struct mmc_host *host = card->host;
-	s32 bit = (fls(host->ocr_avail) - 1);
 	u8 cmd52_resp;
 	u32 clock, resp, i;
 	u16 rca;
@@ -175,7 +174,6 @@ static void rsi_reset_card(struct sdio_func *pfunction)
 	msleep(20);
 
 	/* Initialize the SDIO card */
-	host->ios.vdd = bit;
 	host->ios.chip_select = MMC_CS_DONTCARE;
 	host->ios.bus_mode = MMC_BUSMODE_OPENDRAIN;
 	host->ios.power_mode = MMC_POWER_UP;

From 2dde48178ba5e66a2ac7235eaebad5655ae6db3e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ondrej=20Mosn=C3=A1=C4=8Dek?= <omosnace@redhat.com>
Date: Mon, 9 Apr 2018 10:00:06 +0200
Subject: [PATCH 218/519] audit: allow not equal op for audit by executable

[ Upstream commit 23bcc480dac204c7dbdf49d96b2c918ed98223c2 ]

Current implementation of auditing by executable name only implements
the 'equal' operator. This patch extends it to also support the 'not
equal' operator.

See: https://github.com/linux-audit/audit-kernel/issues/53

Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Reviewed-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/auditfilter.c | 2 +-
 kernel/auditsc.c     | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index b8ff9e193753..b57f929f1b46 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -406,7 +406,7 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f)
 			return -EINVAL;
 		break;
 	case AUDIT_EXE:
-		if (f->op != Audit_equal)
+		if (f->op != Audit_not_equal && f->op != Audit_equal)
 			return -EINVAL;
 		if (entry->rule.listnr != AUDIT_FILTER_EXIT)
 			return -EINVAL;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 7444f95f3ee9..0fe8b337291a 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -470,6 +470,8 @@ static int audit_filter_rules(struct task_struct *tsk,
 			break;
 		case AUDIT_EXE:
 			result = audit_exe_compare(tsk, rule->exe);
+			if (f->op == Audit_not_equal)
+				result = !result;
 			break;
 		case AUDIT_UID:
 			result = audit_uid_comparator(cred->uid, f->op, f->uid);

From e9996e124ea742fb54b60b59329858bdc2918c9a Mon Sep 17 00:00:00 2001
From: Michal Simek <michal.simek@xilinx.com>
Date: Tue, 10 Apr 2018 15:05:42 +0200
Subject: [PATCH 219/519] microblaze: Fix simpleImage format generation

[ Upstream commit ece97f3a5fb50cf5f98886fbc63c9665f2bb199d ]

simpleImage generation was broken for some time. This patch is fixing
steps how simpleImage.*.ub file is generated. Steps are objdump of
vmlinux and create .ub.
Also make sure that there is striped elf version with .strip suffix.

Signed-off-by: Michal Simek <michal.simek@xilinx.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/microblaze/boot/Makefile | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/microblaze/boot/Makefile b/arch/microblaze/boot/Makefile
index 91d2068da1b9..0f3fe6a151dc 100644
--- a/arch/microblaze/boot/Makefile
+++ b/arch/microblaze/boot/Makefile
@@ -21,17 +21,19 @@ $(obj)/linux.bin.gz: $(obj)/linux.bin FORCE
 quiet_cmd_cp = CP      $< $@$2
 	cmd_cp = cat $< >$@$2 || (rm -f $@ && echo false)
 
-quiet_cmd_strip = STRIP   $@
+quiet_cmd_strip = STRIP   $< $@$2
 	cmd_strip = $(STRIP) -K microblaze_start -K _end -K __log_buf \
-				-K _fdt_start vmlinux -o $@
+				-K _fdt_start $< -o $@$2
 
 UIMAGE_LOADADDR = $(CONFIG_KERNEL_BASE_ADDR)
+UIMAGE_IN = $@
+UIMAGE_OUT = $@.ub
 
 $(obj)/simpleImage.%: vmlinux FORCE
 	$(call if_changed,cp,.unstrip)
 	$(call if_changed,objcopy)
 	$(call if_changed,uimage)
-	$(call if_changed,strip)
-	@echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
+	$(call if_changed,strip,.strip)
+	@echo 'Kernel: $(UIMAGE_OUT) is ready' ' (#'`cat .version`')'
 
 clean-files += simpleImage.*.unstrip linux.bin.ub dts/*.dtb

From 5f66271532ce7f00bc98aeaa945ca5cd22abb742 Mon Sep 17 00:00:00 2001
From: Dominik Bozek <dominikx.bozek@intel.com>
Date: Fri, 13 Apr 2018 10:42:31 -0700
Subject: [PATCH 220/519] usb: hub: Don't wait for connect state at resume for
 powered-off ports

[ Upstream commit 5d111f5190848d6fb1c414dc57797efea3526a2f ]

wait_for_connected() wait till a port change status to
USB_PORT_STAT_CONNECTION, but this is not possible if
the port is unpowered. The loop will only exit at timeout.

Such case take place if an over-current incident happen
while system is in S3. Then during resume wait_for_connected()
will wait 2s, which may be noticeable by the user.

Signed-off-by: Dominik Bozek <dominikx.bozek@intel.com>
Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 93756664592a..2facffea2ee0 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -3308,6 +3308,10 @@ static int wait_for_ss_port_enable(struct usb_device *udev,
 	while (delay_ms < 2000) {
 		if (status || *portstatus & USB_PORT_STAT_CONNECTION)
 			break;
+		if (!port_is_power_on(hub, *portstatus)) {
+			status = -ENODEV;
+			break;
+		}
 		msleep(20);
 		delay_ms += 20;
 		status = hub_port_status(hub, *port1, portstatus, portchange);

From d4ceb46ad29110a9065f8b35c41f780d7a98891a Mon Sep 17 00:00:00 2001
From: Tudor-Dan Ambarus <tudor.ambarus@microchip.com>
Date: Tue, 3 Apr 2018 09:39:01 +0300
Subject: [PATCH 221/519] crypto: authencesn - don't leak pointers to authenc
 keys

[ Upstream commit 31545df391d58a3bb60e29b1192644a6f2b5a8dd ]

In crypto_authenc_esn_setkey we save pointers to the authenc keys
in a local variable of type struct crypto_authenc_keys and we don't
zeroize it after use. Fix this and don't leak pointers to the
authenc keys.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 crypto/authencesn.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/crypto/authencesn.c b/crypto/authencesn.c
index 52154ef21b5e..fa0c4567f697 100644
--- a/crypto/authencesn.c
+++ b/crypto/authencesn.c
@@ -90,6 +90,7 @@ static int crypto_authenc_esn_setkey(struct crypto_aead *authenc_esn, const u8 *
 					   CRYPTO_TFM_RES_MASK);
 
 out:
+	memzero_explicit(&keys, sizeof(keys));
 	return err;
 
 badkey:

From b573c815efbc285cc0828d871184ed935e92ec07 Mon Sep 17 00:00:00 2001
From: Tudor-Dan Ambarus <tudor.ambarus@microchip.com>
Date: Tue, 3 Apr 2018 09:39:00 +0300
Subject: [PATCH 222/519] crypto: authenc - don't leak pointers to authenc keys

[ Upstream commit ad2fdcdf75d169e7a5aec6c7cb421c0bec8ec711 ]

In crypto_authenc_setkey we save pointers to the authenc keys in
a local variable of type struct crypto_authenc_keys and we don't
zeroize it after use. Fix this and don't leak pointers to the
authenc keys.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 crypto/authenc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/crypto/authenc.c b/crypto/authenc.c
index 55a354d57251..b7290c5b1eaa 100644
--- a/crypto/authenc.c
+++ b/crypto/authenc.c
@@ -108,6 +108,7 @@ static int crypto_authenc_setkey(struct crypto_aead *authenc, const u8 *key,
 				       CRYPTO_TFM_RES_MASK);
 
 out:
+	memzero_explicit(&keys, sizeof(keys));
 	return err;
 
 badkey:

From ced0f4f3a5815ea2ed6de6ec1783871a44cd6295 Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Wed, 14 Mar 2018 11:41:36 -0400
Subject: [PATCH 223/519] media: omap3isp: fix unbalanced dma_iommu_mapping

[ Upstream commit b7e1e6859fbf60519fd82d7120cee106a6019512 ]

The OMAP3 ISP driver manages its MMU mappings through the IOMMU-aware
ARM DMA backend. The current code creates a dma_iommu_mapping and
attaches this to the ISP device, but never detaches the mapping in
either the probe failure paths or the driver remove path resulting
in an unbalanced mapping refcount and a memory leak. Fix this properly.

Reported-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Suman Anna <s-anna@ti.com>
Tested-by: Pavel Machek <pavel@ucw.cz>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/platform/omap3isp/isp.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/media/platform/omap3isp/isp.c b/drivers/media/platform/omap3isp/isp.c
index 56e683b19a73..91e02c1ff392 100644
--- a/drivers/media/platform/omap3isp/isp.c
+++ b/drivers/media/platform/omap3isp/isp.c
@@ -2077,6 +2077,7 @@ static int isp_initialize_modules(struct isp_device *isp)
 
 static void isp_detach_iommu(struct isp_device *isp)
 {
+	arm_iommu_detach_device(isp->dev);
 	arm_iommu_release_mapping(isp->mapping);
 	isp->mapping = NULL;
 	iommu_group_remove_device(isp->dev);
@@ -2110,8 +2111,7 @@ static int isp_attach_iommu(struct isp_device *isp)
 	mapping = arm_iommu_create_mapping(&platform_bus_type, SZ_1G, SZ_2G);
 	if (IS_ERR(mapping)) {
 		dev_err(isp->dev, "failed to create ARM IOMMU mapping\n");
-		ret = PTR_ERR(mapping);
-		goto error;
+		return PTR_ERR(mapping);
 	}
 
 	isp->mapping = mapping;
@@ -2126,7 +2126,8 @@ static int isp_attach_iommu(struct isp_device *isp)
 	return 0;
 
 error:
-	isp_detach_iommu(isp);
+	arm_iommu_release_mapping(isp->mapping);
+	isp->mapping = NULL;
 	return ret;
 }
 

From 87812af9bbe9d10c69478c749073a048b7f86672 Mon Sep 17 00:00:00 2001
From: Xose Vazquez Perez <xose.vazquez@gmail.com>
Date: Sat, 7 Apr 2018 00:47:23 +0200
Subject: [PATCH 224/519] scsi: scsi_dh: replace too broad "TP9" string with
 the exact models

[ Upstream commit 37b37d2609cb0ac267280ef27350b962d16d272e ]

SGI/TP9100 is not an RDAC array:
    ^^^
https://git.opensvc.com/gitweb.cgi?p=multipath-tools/.git;a=blob;f=libmultipath/hwtable.c;h=88b4700beb1d8940008020fbe4c3cd97d62f4a56;hb=HEAD#l235

This partially reverts commit 35204772ea03 ("[SCSI] scsi_dh_rdac :
Consolidate rdac strings together")

[mkp: fixed up the new entries to align with rest of struct]

Cc: NetApp RDAC team <ng-eseries-upstream-maintainers@netapp.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: James E.J. Bottomley <jejb@linux.vnet.ibm.com>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: SCSI ML <linux-scsi@vger.kernel.org>
Cc: DM ML <dm-devel@redhat.com>
Signed-off-by: Xose Vazquez Perez <xose.vazquez@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/scsi_dh.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_dh.c b/drivers/scsi/scsi_dh.c
index 5711d58f9e81..a8ebaeace154 100644
--- a/drivers/scsi/scsi_dh.c
+++ b/drivers/scsi/scsi_dh.c
@@ -58,7 +58,10 @@ static const struct scsi_dh_blist scsi_dh_blist[] = {
 	{"IBM", "3526",			"rdac", },
 	{"IBM", "3542",			"rdac", },
 	{"IBM", "3552",			"rdac", },
-	{"SGI", "TP9",			"rdac", },
+	{"SGI", "TP9300",		"rdac", },
+	{"SGI", "TP9400",		"rdac", },
+	{"SGI", "TP9500",		"rdac", },
+	{"SGI", "TP9700",		"rdac", },
 	{"SGI", "IS",			"rdac", },
 	{"STK", "OPENstorage",		"rdac", },
 	{"STK", "FLEXLINE 380",		"rdac", },

From dd476b5e5b4a42130a8a4c991ba31eeceffe6057 Mon Sep 17 00:00:00 2001
From: Shivasharan S <shivasharan.srikanteshwara@broadcom.com>
Date: Fri, 6 Apr 2018 02:02:11 -0700
Subject: [PATCH 225/519] scsi: megaraid_sas: Increase timeout by 1 sec for
 non-RAID fastpath IOs

[ Upstream commit 3239b8cd28fd849a2023483257d35d68c5876c74 ]

Hardware could time out Fastpath IOs one second earlier than the timeout
provided by the host.

For non-RAID devices, driver provides timeout value based on OS provided
timeout value. Under certain scenarios, if the OS provides a timeout
value of 1 second, due to above behavior hardware will timeout
immediately.

Increase timeout value for non-RAID fastpath IOs by 1 second.

Signed-off-by: Shivasharan S <shivasharan.srikanteshwara@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/megaraid/megaraid_sas_fusion.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
index 96007633ad39..213944ed64d9 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -1886,6 +1886,9 @@ megasas_build_syspd_fusion(struct megasas_instance *instance,
 		pRAID_Context->timeoutValue = cpu_to_le16(os_timeout_value);
 		pRAID_Context->VirtualDiskTgtId = cpu_to_le16(device_id);
 	} else {
+		if (os_timeout_value)
+			os_timeout_value++;
+
 		/* system pd Fast Path */
 		io_request->Function = MPI2_FUNCTION_SCSI_IO_REQUEST;
 		timeout_limit = (scmd->device->type == TYPE_DISK) ?

From 3ff01af80d67326702a82aa80893c887a82501a2 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Fri, 6 Apr 2018 07:54:51 -0400
Subject: [PATCH 226/519] media: si470x: fix __be16 annotations

[ Upstream commit 90db5c829692a0a7845e977e45719b4699216bd4 ]

The annotations there are wrong as warned:
   drivers/media/radio/si470x/radio-si470x-i2c.c:107:35: warning: cast to restricted __be16
   drivers/media/radio/si470x/radio-si470x-i2c.c:107:35: warning: cast to restricted __be16
   drivers/media/radio/si470x/radio-si470x-i2c.c:107:35: warning: cast to restricted __be16
   drivers/media/radio/si470x/radio-si470x-i2c.c:107:35: warning: cast to restricted __be16
   drivers/media/radio/si470x/radio-si470x-i2c.c:129:24: warning: incorrect type in assignment (different base types)
   drivers/media/radio/si470x/radio-si470x-i2c.c:129:24:    expected unsigned short [unsigned] [short] <noident>
   drivers/media/radio/si470x/radio-si470x-i2c.c:129:24:    got restricted __be16 [usertype] <noident>
   drivers/media/radio/si470x/radio-si470x-i2c.c:163:39: warning: cast to restricted __be16
   drivers/media/radio/si470x/radio-si470x-i2c.c:163:39: warning: cast to restricted __be16
   drivers/media/radio/si470x/radio-si470x-i2c.c:163:39: warning: cast to restricted __be16
   drivers/media/radio/si470x/radio-si470x-i2c.c:163:39: warning: cast to restricted __be16

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/radio/si470x/radio-si470x-i2c.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/media/radio/si470x/radio-si470x-i2c.c b/drivers/media/radio/si470x/radio-si470x-i2c.c
index 471d6a8ae8a4..9326439bc49c 100644
--- a/drivers/media/radio/si470x/radio-si470x-i2c.c
+++ b/drivers/media/radio/si470x/radio-si470x-i2c.c
@@ -96,7 +96,7 @@ MODULE_PARM_DESC(max_rds_errors, "RDS maximum block errors: *1*");
  */
 int si470x_get_register(struct si470x_device *radio, int regnr)
 {
-	u16 buf[READ_REG_NUM];
+	__be16 buf[READ_REG_NUM];
 	struct i2c_msg msgs[1] = {
 		{
 			.addr = radio->client->addr,
@@ -121,7 +121,7 @@ int si470x_get_register(struct si470x_device *radio, int regnr)
 int si470x_set_register(struct si470x_device *radio, int regnr)
 {
 	int i;
-	u16 buf[WRITE_REG_NUM];
+	__be16 buf[WRITE_REG_NUM];
 	struct i2c_msg msgs[1] = {
 		{
 			.addr = radio->client->addr,
@@ -151,7 +151,7 @@ int si470x_set_register(struct si470x_device *radio, int regnr)
 static int si470x_get_all_registers(struct si470x_device *radio)
 {
 	int i;
-	u16 buf[READ_REG_NUM];
+	__be16 buf[READ_REG_NUM];
 	struct i2c_msg msgs[1] = {
 		{
 			.addr = radio->client->addr,

From 5a3d1d67b3548e9dc1572c87527fa35b309feb0b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= <jose.souza@intel.com>
Date: Wed, 28 Mar 2018 15:30:37 -0700
Subject: [PATCH 227/519] drm: Add DP PSR2 sink enable bit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 4f212e40468650e220c1770876c7f25b8e0c1ff5 ]

To comply with eDP1.4a this bit should be set when enabling PSR2.

Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180328223046.16125-1-jose.souza@intel.com
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/drm/drm_dp_helper.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h
index bb9d0deca07c..0fb4975fae91 100644
--- a/include/drm/drm_dp_helper.h
+++ b/include/drm/drm_dp_helper.h
@@ -342,6 +342,7 @@
 # define DP_PSR_FRAME_CAPTURE		    (1 << 3)
 # define DP_PSR_SELECTIVE_UPDATE	    (1 << 4)
 # define DP_PSR_IRQ_HPD_WITH_CRC_ERRORS     (1 << 5)
+# define DP_PSR_ENABLE_PSR2		    (1 << 6) /* eDP 1.4a */
 
 #define DP_ADAPTER_CTRL			    0x1a0
 # define DP_ADAPTER_CTRL_FORCE_LOAD_SENSE   (1 << 0)

From 1ed4ccaf052ec2fe00043759a284921e468687c3 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 14 Jul 2018 23:55:57 -0400
Subject: [PATCH 228/519] random: mix rdrand with entropy sent in from
 userspace

commit 81e69df38e2911b642ec121dec319fad2a4782f3 upstream.

Fedora has integrated the jitter entropy daemon to work around slow
boot problems, especially on VM's that don't support virtio-rng:

    https://bugzilla.redhat.com/show_bug.cgi?id=1572944

It's understandable why they did this, but the Jitter entropy daemon
works fundamentally on the principle: "the CPU microarchitecture is
**so** complicated and we can't figure it out, so it *must* be
random".  Yes, it uses statistical tests to "prove" it is secure, but
AES_ENCRYPT(NSA_KEY, COUNTER++) will also pass statistical tests with
flying colors.

So if RDRAND is available, mix it into entropy submitted from
userspace.  It can't hurt, and if you believe the NSA has backdoored
RDRAND, then they probably have enough details about the Intel
microarchitecture that they can reverse engineer how the Jitter
entropy daemon affects the microarchitecture, and attack its output
stream.  And if RDRAND is in fact an honest DRNG, it will immeasurably
improve on what the Jitter entropy daemon might produce.

This also provides some protection against someone who is able to read
or set the entropy seed file.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/random.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index dffd06a3bb76..2916d08ee30e 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1503,14 +1503,22 @@ static int
 write_pool(struct entropy_store *r, const char __user *buffer, size_t count)
 {
 	size_t bytes;
-	__u32 buf[16];
+	__u32 t, buf[16];
 	const char __user *p = buffer;
 
 	while (count > 0) {
+		int b, i = 0;
+
 		bytes = min(count, sizeof(buf));
 		if (copy_from_user(&buf, p, bytes))
 			return -EFAULT;
 
+		for (b = bytes ; b > 0 ; b -= sizeof(__u32), i++) {
+			if (!arch_get_random_int(&t))
+				break;
+			buf[i] ^= t;
+		}
+
 		count -= bytes;
 		p += bytes;
 

From d0f02f70b31306f98619aa7532613008507deda5 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 29 Jul 2018 12:44:46 -0700
Subject: [PATCH 229/519] squashfs: be more careful about metadata corruption

commit 01cfb7937a9af2abb1136c7e89fbf3fd92952956 upstream.

Anatoly Trosinenko reports that a corrupted squashfs image can cause a
kernel oops.  It turns out that squashfs can end up being confused about
negative fragment lengths.

The regular squashfs_read_data() does check for negative lengths, but
squashfs_read_metadata() did not, and the fragment size code just
blindly trusted the on-disk value.  Fix both the fragment parsing and
the metadata reading code.

Reported-by: Anatoly Trosinenko <anatoly.trosinenko@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Phillip Lougher <phillip@squashfs.org.uk>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/squashfs/cache.c       | 3 +++
 fs/squashfs/file.c        | 8 ++++++--
 fs/squashfs/fragment.c    | 4 +---
 fs/squashfs/squashfs_fs.h | 6 ++++++
 4 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index 1cb70a0b2168..91ce49c05b7c 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -350,6 +350,9 @@ int squashfs_read_metadata(struct super_block *sb, void *buffer,
 
 	TRACE("Entered squashfs_read_metadata [%llx:%x]\n", *block, *offset);
 
+	if (unlikely(length < 0))
+		return -EIO;
+
 	while (length) {
 		entry = squashfs_cache_get(sb, msblk->block_cache, *block, 0);
 		if (entry->error) {
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index e5c9689062ba..1ec7bae2751d 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -194,7 +194,11 @@ static long long read_indexes(struct super_block *sb, int n,
 		}
 
 		for (i = 0; i < blocks; i++) {
-			int size = le32_to_cpu(blist[i]);
+			int size = squashfs_block_size(blist[i]);
+			if (size < 0) {
+				err = size;
+				goto failure;
+			}
 			block += SQUASHFS_COMPRESSED_SIZE_BLOCK(size);
 		}
 		n -= blocks;
@@ -367,7 +371,7 @@ static int read_blocklist(struct inode *inode, int index, u64 *block)
 			sizeof(size));
 	if (res < 0)
 		return res;
-	return le32_to_cpu(size);
+	return squashfs_block_size(size);
 }
 
 /* Copy data into page cache  */
diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c
index 0ed6edbc5c71..86ad9a4b8c36 100644
--- a/fs/squashfs/fragment.c
+++ b/fs/squashfs/fragment.c
@@ -61,9 +61,7 @@ int squashfs_frag_lookup(struct super_block *sb, unsigned int fragment,
 		return size;
 
 	*fragment_block = le64_to_cpu(fragment_entry.start_block);
-	size = le32_to_cpu(fragment_entry.size);
-
-	return size;
+	return squashfs_block_size(fragment_entry.size);
 }
 
 
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index 506f4ba5b983..e66486366f02 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -129,6 +129,12 @@
 
 #define SQUASHFS_COMPRESSED_BLOCK(B)	(!((B) & SQUASHFS_COMPRESSED_BIT_BLOCK))
 
+static inline int squashfs_block_size(__le32 raw)
+{
+	u32 size = le32_to_cpu(raw);
+	return (size >> 25) ? -EIO : size;
+}
+
 /*
  * Inode number ops.  Inodes consist of a compressed block number, and an
  * uncompressed offset within that block

From 5afdb4536020da5625872ea0a642fbeaf8869ada Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Tue, 10 Jul 2018 01:07:43 -0400
Subject: [PATCH 230/519] ext4: fix inline data updates with checksums enabled

commit 362eca70b53389bddf3143fe20f53dcce2cfdf61 upstream.

The inline data code was updating the raw inode directly; this is
problematic since if metadata checksums are enabled,
ext4_mark_inode_dirty() must be called to update the inode's checksum.
In addition, the jbd2 layer requires that get_write_access() be called
before the metadata buffer is modified.  Fix both of these problems.

https://bugzilla.kernel.org/show_bug.cgi?id=200443

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/inline.c | 19 +++++++++++--------
 fs/ext4/inode.c  | 16 +++++++---------
 2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index e72f53a89764..c449bc089c94 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -678,6 +678,10 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,
 		goto convert;
 	}
 
+	ret = ext4_journal_get_write_access(handle, iloc.bh);
+	if (ret)
+		goto out;
+
 	flags |= AOP_FLAG_NOFS;
 
 	page = grab_cache_page_write_begin(mapping, 0, flags);
@@ -706,7 +710,7 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,
 out_up_read:
 	up_read(&EXT4_I(inode)->xattr_sem);
 out:
-	if (handle)
+	if (handle && (ret != 1))
 		ext4_journal_stop(handle);
 	brelse(iloc.bh);
 	return ret;
@@ -748,6 +752,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
 
 	ext4_write_unlock_xattr(inode, &no_expand);
 	brelse(iloc.bh);
+	mark_inode_dirty(inode);
 out:
 	return copied;
 }
@@ -894,7 +899,6 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping,
 		goto out;
 	}
 
-
 	page = grab_cache_page_write_begin(mapping, 0, flags);
 	if (!page) {
 		ret = -ENOMEM;
@@ -912,6 +916,9 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping,
 		if (ret < 0)
 			goto out_release_page;
 	}
+	ret = ext4_journal_get_write_access(handle, iloc.bh);
+	if (ret)
+		goto out_release_page;
 
 	up_read(&EXT4_I(inode)->xattr_sem);
 	*pagep = page;
@@ -932,7 +939,6 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
 				  unsigned len, unsigned copied,
 				  struct page *page)
 {
-	int i_size_changed = 0;
 	int ret;
 
 	ret = ext4_write_inline_data_end(inode, pos, len, copied, page);
@@ -950,10 +956,8 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
 	 * But it's important to update i_size while still holding page lock:
 	 * page writeout could otherwise come in and zero beyond i_size.
 	 */
-	if (pos+copied > inode->i_size) {
+	if (pos+copied > inode->i_size)
 		i_size_write(inode, pos+copied);
-		i_size_changed = 1;
-	}
 	unlock_page(page);
 	page_cache_release(page);
 
@@ -963,8 +967,7 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
 	 * ordering of page lock and transaction start for journaling
 	 * filesystems.
 	 */
-	if (i_size_changed)
-		mark_inode_dirty(inode);
+	mark_inode_dirty(inode);
 
 	return copied;
 }
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index b507de0e4bbf..181db3c7f5d1 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1164,9 +1164,10 @@ static int ext4_write_end(struct file *file,
 	loff_t old_size = inode->i_size;
 	int ret = 0, ret2;
 	int i_size_changed = 0;
+	int inline_data = ext4_has_inline_data(inode);
 
 	trace_ext4_write_end(inode, pos, len, copied);
-	if (ext4_has_inline_data(inode)) {
+	if (inline_data) {
 		ret = ext4_write_inline_data_end(inode, pos, len,
 						 copied, page);
 		if (ret < 0) {
@@ -1194,7 +1195,7 @@ static int ext4_write_end(struct file *file,
 	 * ordering of page lock and transaction start for journaling
 	 * filesystems.
 	 */
-	if (i_size_changed)
+	if (i_size_changed || inline_data)
 		ext4_mark_inode_dirty(handle, inode);
 
 	if (pos + len > inode->i_size && ext4_can_truncate(inode))
@@ -1268,6 +1269,7 @@ static int ext4_journalled_write_end(struct file *file,
 	int partial = 0;
 	unsigned from, to;
 	int size_changed = 0;
+	int inline_data = ext4_has_inline_data(inode);
 
 	trace_ext4_journalled_write_end(inode, pos, len, copied);
 	from = pos & (PAGE_CACHE_SIZE - 1);
@@ -1275,7 +1277,7 @@ static int ext4_journalled_write_end(struct file *file,
 
 	BUG_ON(!ext4_handle_valid(handle));
 
-	if (ext4_has_inline_data(inode)) {
+	if (inline_data) {
 		ret = ext4_write_inline_data_end(inode, pos, len,
 						 copied, page);
 		if (ret < 0) {
@@ -1306,7 +1308,7 @@ static int ext4_journalled_write_end(struct file *file,
 	if (old_size < pos)
 		pagecache_isize_extended(inode, old_size, pos);
 
-	if (size_changed) {
+	if (size_changed || inline_data) {
 		ret2 = ext4_mark_inode_dirty(handle, inode);
 		if (!ret)
 			ret = ret2;
@@ -1804,11 +1806,7 @@ static int __ext4_journalled_writepage(struct page *page,
 	}
 
 	if (inline_data) {
-		BUFFER_TRACE(inode_bh, "get write access");
-		ret = ext4_journal_get_write_access(handle, inode_bh);
-
-		err = ext4_handle_dirty_metadata(handle, inode, inode_bh);
-
+		ret = ext4_mark_inode_dirty(handle, inode);
 	} else {
 		ret = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL,
 					     do_journal_get_write_access);

From a66e985716add0a2bae8b94e8f18ecaf59bd44eb Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 12 Jul 2018 19:08:05 -0400
Subject: [PATCH 231/519] ext4: check for allocation block validity with block
 group locked

commit 8d5a803c6a6ce4ec258e31f76059ea5153ba46ef upstream.

With commit 044e6e3d74a3: "ext4: don't update checksum of new
initialized bitmaps" the buffer valid bit will get set without
actually setting up the checksum for the allocation bitmap, since the
checksum will get calculated once we actually allocate an inode or
block.

If we are doing this, then we need to (re-)check the verified bit
after we take the block group lock.  Otherwise, we could race with
another process reading and verifying the bitmap, which would then
complain about the checksum being invalid.

https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1780137

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/balloc.c | 3 +++
 fs/ext4/ialloc.c | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 092da164bdc0..e0fb7cdcee89 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -378,6 +378,8 @@ static int ext4_validate_block_bitmap(struct super_block *sb,
 		return -EFSCORRUPTED;
 
 	ext4_lock_group(sb, block_group);
+	if (buffer_verified(bh))
+		goto verified;
 	if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group,
 			desc, bh))) {
 		ext4_unlock_group(sb, block_group);
@@ -400,6 +402,7 @@ static int ext4_validate_block_bitmap(struct super_block *sb,
 		return -EFSCORRUPTED;
 	}
 	set_buffer_verified(bh);
+verified:
 	ext4_unlock_group(sb, block_group);
 	return 0;
 }
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 48d818eba9c3..041117fd8fd7 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -88,6 +88,8 @@ static int ext4_validate_inode_bitmap(struct super_block *sb,
 		return -EFSCORRUPTED;
 
 	ext4_lock_group(sb, block_group);
+	if (buffer_verified(bh))
+		goto verified;
 	blk = ext4_inode_bitmap(sb, desc);
 	if (!ext4_inode_bitmap_csum_verify(sb, block_group, desc, bh,
 					   EXT4_INODES_PER_GROUP(sb) / 8)) {
@@ -105,6 +107,7 @@ static int ext4_validate_inode_bitmap(struct super_block *sb,
 		return -EFSBADCRC;
 	}
 	set_buffer_verified(bh);
+verified:
 	ext4_unlock_group(sb, block_group);
 	return 0;
 }

From b4cb2f0fd456f4eca58c9a84639aa8219c319cda Mon Sep 17 00:00:00 2001
From: Eric Engestrom <eric.engestrom@imgtec.com>
Date: Mon, 25 Apr 2016 10:47:56 +0100
Subject: [PATCH 232/519] dmaengine: pxa_dma: remove duplicate const qualifier

commit 4e0def887d717598ae8062b46e55f9e00d3a5783 upstream.

Signed-off-by: Eric Engestrom <eric.engestrom@imgtec.com>
Acked-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Cc: Andrea Adami <andrea.adami@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/dma/pxa_dma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c
index 55f5d33f6dc7..4251e9ac0373 100644
--- a/drivers/dma/pxa_dma.c
+++ b/drivers/dma/pxa_dma.c
@@ -1321,7 +1321,7 @@ static int pxad_init_phys(struct platform_device *op,
 	return 0;
 }
 
-static const struct of_device_id const pxad_dt_ids[] = {
+static const struct of_device_id pxad_dt_ids[] = {
 	{ .compatible = "marvell,pdma-1.0", },
 	{}
 };

From bb0376b6b6705503bd9b5706816213d260e1bde9 Mon Sep 17 00:00:00 2001
From: Andrea Adami <andrea.adami@gmail.com>
Date: Fri, 6 May 2016 17:27:34 +0200
Subject: [PATCH 233/519] ASoC: pxa: Fix module autoload for platform drivers

commit e5b7d71aa5b32180adec49a17c752e577c68f740 upstream.

These platform drivers are lacking MODULE_ALIAS so module autoloading
doesn't work. Tested on corgi and poodle with kernel 4.4.

Signed-off-by: Andrea Adami <andrea.adami@gmail.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/pxa/brownstone.c     | 1 +
 sound/soc/pxa/mioa701_wm9713.c | 1 +
 sound/soc/pxa/mmp-pcm.c        | 1 +
 sound/soc/pxa/mmp-sspa.c       | 1 +
 sound/soc/pxa/palm27x.c        | 1 +
 sound/soc/pxa/pxa-ssp.c        | 1 +
 sound/soc/pxa/pxa2xx-ac97.c    | 1 +
 sound/soc/pxa/pxa2xx-pcm.c     | 1 +
 8 files changed, 8 insertions(+)

diff --git a/sound/soc/pxa/brownstone.c b/sound/soc/pxa/brownstone.c
index 6147e86e9b0f..55ca9c9364b8 100644
--- a/sound/soc/pxa/brownstone.c
+++ b/sound/soc/pxa/brownstone.c
@@ -136,3 +136,4 @@ module_platform_driver(mmp_driver);
 MODULE_AUTHOR("Leo Yan <leoy@marvell.com>");
 MODULE_DESCRIPTION("ALSA SoC Brownstone");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:brownstone-audio");
diff --git a/sound/soc/pxa/mioa701_wm9713.c b/sound/soc/pxa/mioa701_wm9713.c
index 29bc60e85e92..6cd28f95d548 100644
--- a/sound/soc/pxa/mioa701_wm9713.c
+++ b/sound/soc/pxa/mioa701_wm9713.c
@@ -203,3 +203,4 @@ module_platform_driver(mioa701_wm9713_driver);
 MODULE_AUTHOR("Robert Jarzmik (rjarzmik@free.fr)");
 MODULE_DESCRIPTION("ALSA SoC WM9713 MIO A701");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:mioa701-wm9713");
diff --git a/sound/soc/pxa/mmp-pcm.c b/sound/soc/pxa/mmp-pcm.c
index 51e790d006f5..96df9b2d8fc4 100644
--- a/sound/soc/pxa/mmp-pcm.c
+++ b/sound/soc/pxa/mmp-pcm.c
@@ -248,3 +248,4 @@ module_platform_driver(mmp_pcm_driver);
 MODULE_AUTHOR("Leo Yan <leoy@marvell.com>");
 MODULE_DESCRIPTION("MMP Soc Audio DMA module");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:mmp-pcm-audio");
diff --git a/sound/soc/pxa/mmp-sspa.c b/sound/soc/pxa/mmp-sspa.c
index eca60c29791a..ca8b23f8c525 100644
--- a/sound/soc/pxa/mmp-sspa.c
+++ b/sound/soc/pxa/mmp-sspa.c
@@ -482,3 +482,4 @@ module_platform_driver(asoc_mmp_sspa_driver);
 MODULE_AUTHOR("Leo Yan <leoy@marvell.com>");
 MODULE_DESCRIPTION("MMP SSPA SoC Interface");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:mmp-sspa-dai");
diff --git a/sound/soc/pxa/palm27x.c b/sound/soc/pxa/palm27x.c
index 4e74d9573f03..bcc81e920a67 100644
--- a/sound/soc/pxa/palm27x.c
+++ b/sound/soc/pxa/palm27x.c
@@ -161,3 +161,4 @@ module_platform_driver(palm27x_wm9712_driver);
 MODULE_AUTHOR("Marek Vasut <marek.vasut@gmail.com>");
 MODULE_DESCRIPTION("ALSA SoC Palm T|X, T5 and LifeDrive");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:palm27x-asoc");
diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c
index da03fad1b9cd..3cad990dad2c 100644
--- a/sound/soc/pxa/pxa-ssp.c
+++ b/sound/soc/pxa/pxa-ssp.c
@@ -833,3 +833,4 @@ module_platform_driver(asoc_ssp_driver);
 MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>");
 MODULE_DESCRIPTION("PXA SSP/PCM SoC Interface");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:pxa-ssp-dai");
diff --git a/sound/soc/pxa/pxa2xx-ac97.c b/sound/soc/pxa/pxa2xx-ac97.c
index f3de615aacd7..9615e6de1306 100644
--- a/sound/soc/pxa/pxa2xx-ac97.c
+++ b/sound/soc/pxa/pxa2xx-ac97.c
@@ -287,3 +287,4 @@ module_platform_driver(pxa2xx_ac97_driver);
 MODULE_AUTHOR("Nicolas Pitre");
 MODULE_DESCRIPTION("AC97 driver for the Intel PXA2xx chip");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:pxa2xx-ac97");
diff --git a/sound/soc/pxa/pxa2xx-pcm.c b/sound/soc/pxa/pxa2xx-pcm.c
index 9f390398d518..410d48b93031 100644
--- a/sound/soc/pxa/pxa2xx-pcm.c
+++ b/sound/soc/pxa/pxa2xx-pcm.c
@@ -117,3 +117,4 @@ module_platform_driver(pxa_pcm_driver);
 MODULE_AUTHOR("Nicolas Pitre");
 MODULE_DESCRIPTION("Intel PXA2xx PCM DMA module");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:pxa-pcm-audio");

From 33fbeee10538755e4955a18da0dde690be02073b Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Fri, 27 Jul 2018 18:15:46 +0200
Subject: [PATCH 234/519] ipv4: remove BUG_ON() from fib_compute_spec_dst

[ Upstream commit 9fc12023d6f51551d6ca9ed7e02ecc19d79caf17 ]

Remove BUG_ON() from fib_compute_spec_dst routine and check
in_dev pointer during flowi4 data structure initialization.
fib_compute_spec_dst routine can be run concurrently with device removal
where ip_ptr net_device pointer is set to NULL. This can happen
if userspace enables pkt info on UDP rx socket and the device
is removed while traffic is flowing

Fixes: 35ebf65e851c ("ipv4: Create and use fib_compute_spec_dst() helper")
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/fib_frontend.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 8f05816a8be2..015c33712803 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -289,19 +289,19 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
 		return ip_hdr(skb)->daddr;
 
 	in_dev = __in_dev_get_rcu(dev);
-	BUG_ON(!in_dev);
 
 	net = dev_net(dev);
 
 	scope = RT_SCOPE_UNIVERSE;
 	if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) {
+		bool vmark = in_dev && IN_DEV_SRC_VMARK(in_dev);
 		struct flowi4 fl4 = {
 			.flowi4_iif = LOOPBACK_IFINDEX,
 			.flowi4_oif = l3mdev_master_ifindex_rcu(dev),
 			.daddr = ip_hdr(skb)->saddr,
 			.flowi4_tos = RT_TOS(ip_hdr(skb)->tos),
 			.flowi4_scope = scope,
-			.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0,
+			.flowi4_mark = vmark ? skb->mark : 0,
 		};
 		if (!fib_lookup(net, &fl4, &res, 0))
 			return FIB_RES_PREFSRC(net, res);

From 3c3deb06ebda981b65be2abfd5980e23b1519dff Mon Sep 17 00:00:00 2001
From: tangpengpeng <tangpengpeng@higon.com>
Date: Thu, 26 Jul 2018 14:45:16 +0800
Subject: [PATCH 235/519] net: fix amd-xgbe flow-control issue

[ Upstream commit 7f3fc7ddf719cd6faaf787722c511f6918ac6aab ]

If we enable or disable xgbe flow-control by ethtool ,
it does't work.Because the parameter is not properly
assigned,so we need to adjust the assignment order
of the parameters.

Fixes: c1ce2f77366b ("amd-xgbe: Fix flow control setting logic")
Signed-off-by: tangpengpeng <tangpengpeng@higon.com>
Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/amd/xgbe/xgbe-mdio.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
index 446058081866..7a0ab4c44ee4 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
@@ -872,14 +872,14 @@ static void xgbe_phy_adjust_link(struct xgbe_prv_data *pdata)
 
 		if (pdata->tx_pause != pdata->phy.tx_pause) {
 			new_state = 1;
-			pdata->hw_if.config_tx_flow_control(pdata);
 			pdata->tx_pause = pdata->phy.tx_pause;
+			pdata->hw_if.config_tx_flow_control(pdata);
 		}
 
 		if (pdata->rx_pause != pdata->phy.rx_pause) {
 			new_state = 1;
-			pdata->hw_if.config_rx_flow_control(pdata);
 			pdata->rx_pause = pdata->phy.rx_pause;
+			pdata->hw_if.config_rx_flow_control(pdata);
 		}
 
 		/* Speed support */

From d1fc12d8475ce4538b03f4901d6a65b639700e27 Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Sat, 28 Jul 2018 09:52:10 +0200
Subject: [PATCH 236/519] net: lan78xx: fix rx handling before first packet is
 send

[ Upstream commit 136f55f660192ce04af091642efc75d85e017364 ]

As long the bh tasklet isn't scheduled once, no packet from the rx path
will be handled. Since the tx path also schedule the same tasklet
this situation only persits until the first packet transmission.
So fix this issue by scheduling the tasklet after link reset.

Link: https://github.com/raspberrypi/linux/issues/2617
Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet")
Suggested-by: Floris Bos <bos@je-eigen-domein.nl>
Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/usb/lan78xx.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index a6d429950cb0..acec4b565511 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1361,6 +1361,8 @@ static void lan78xx_init_mac_address(struct lan78xx_net *dev)
 			netif_dbg(dev, ifup, dev->net,
 				  "MAC address set to random addr");
 		}
+
+		tasklet_schedule(&dev->bh);
 	}
 
 	ret = lan78xx_write_reg(dev, MAF_LO(0), addr_lo);

From f6384b2517429e1666a79862fef895de3e2a2cd4 Mon Sep 17 00:00:00 2001
From: Xiao Liang <xiliang@redhat.com>
Date: Fri, 27 Jul 2018 17:56:08 +0800
Subject: [PATCH 237/519] xen-netfront: wait xenbus state change when load
 module manually

[ Upstream commit 822fb18a82abaf4ee7058793d95d340f5dab7bfc ]

When loading module manually, after call xenbus_switch_state to initializes
the state of the netfront device, the driver state did not change so fast
that may lead no dev created in latest kernel. This patch adds wait to make
sure xenbus knows the driver is not in closed/unknown state.

Current state:
[vm]# ethtool eth0
Settings for eth0:
	Link detected: yes
[vm]# modprobe -r xen_netfront
[vm]# modprobe  xen_netfront
[vm]# ethtool eth0
Settings for eth0:
Cannot get device settings: No such device
Cannot get wake-on-lan settings: No such device
Cannot get message level: No such device
Cannot get link status: No such device
No data available

With the patch installed.
[vm]# ethtool eth0
Settings for eth0:
	Link detected: yes
[vm]# modprobe -r xen_netfront
[vm]# modprobe xen_netfront
[vm]# ethtool eth0
Settings for eth0:
	Link detected: yes

Signed-off-by: Xiao Liang <xiliang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/xen-netfront.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 3bb3d6d9117c..bec9f099573b 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -86,6 +86,7 @@ struct netfront_cb {
 /* IRQ name is queue name with "-tx" or "-rx" appended */
 #define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3)
 
+static DECLARE_WAIT_QUEUE_HEAD(module_load_q);
 static DECLARE_WAIT_QUEUE_HEAD(module_unload_q);
 
 struct netfront_stats {
@@ -1335,6 +1336,11 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev)
 	netif_carrier_off(netdev);
 
 	xenbus_switch_state(dev, XenbusStateInitialising);
+	wait_event(module_load_q,
+			   xenbus_read_driver_state(dev->otherend) !=
+			   XenbusStateClosed &&
+			   xenbus_read_driver_state(dev->otherend) !=
+			   XenbusStateUnknown);
 	return netdev;
 
  exit:

From 6f4a86ce5d04d57b17b7b987eb27038bf9c3a1a5 Mon Sep 17 00:00:00 2001
From: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Date: Thu, 26 Jul 2018 15:05:37 +0300
Subject: [PATCH 238/519] NET: stmmac: align DMA stuff to largest cache line
 length

[ Upstream commit 9939a46d90c6c76f4533d534dbadfa7b39dc6acc ]

As for today STMMAC_ALIGN macro (which is used to align DMA stuff)
relies on L1 line length (L1_CACHE_BYTES).
This isn't correct in case of system with several cache levels
which might have L1 cache line length smaller than L2 line. This
can lead to sharing one cache line between DMA buffer and other
data, so we can lose this data while invalidate DMA buffer before
DMA transaction.

Fix that by using SMP_CACHE_BYTES instead of L1_CACHE_BYTES for
aligning.

Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 5adaf537513b..7bba30f24135 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -54,7 +54,7 @@
 #include <linux/reset.h>
 #include <linux/of_mdio.h>
 
-#define STMMAC_ALIGN(x)	L1_CACHE_ALIGN(x)
+#define	STMMAC_ALIGN(x)		__ALIGN_KERNEL(x, SMP_CACHE_BYTES)
 
 /* Module parameters */
 #define TX_TIMEO	5000

From e2f337e2bd4efe32051a496a7fcdd94ea67c0cfa Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 17 May 2018 14:47:25 -0700
Subject: [PATCH 239/519] tcp: do not force quickack when receiving
 out-of-order packets

[ Upstream commit a3893637e1eb0ef5eb1bbc52b3a8d2dfa317a35d ]

As explained in commit 9f9843a751d0 ("tcp: properly handle stretch
acks in slow start"), TCP stacks have to consider how many packets
are acknowledged in one single ACK, because of GRO, but also
because of ACK compression or losses.

We plan to add SACK compression in the following patch, we
must therefore not call tcp_enter_quickack_mode()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_input.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5c645069a09a..2315701036fa 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4649,8 +4649,6 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 	if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
 		goto out_of_window;
 
-	tcp_enter_quickack_mode(sk);
-
 	if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
 		/* Partial packet, seq < rcv_next < end_seq */
 		SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",

From 2b30c04bc6f9e7be2d9a5e1b504faa904154c7da Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 21 May 2018 15:08:56 -0700
Subject: [PATCH 240/519] tcp: add max_quickacks param to tcp_incr_quickack and
 tcp_enter_quickack_mode

[ Upstream commit 9a9c9b51e54618861420093ae6e9b50a961914c5 ]

We want to add finer control of the number of ACK packets sent after
ECN events.

This patch is not changing current behavior, it only enables following
change.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/tcp.h    |  2 +-
 net/ipv4/tcp_dctcp.c |  4 ++--
 net/ipv4/tcp_input.c | 24 +++++++++++++-----------
 3 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 65babd8a682d..cac4a6ad5db3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -376,7 +376,7 @@ ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos,
 			struct pipe_inode_info *pipe, size_t len,
 			unsigned int flags);
 
-void tcp_enter_quickack_mode(struct sock *sk);
+void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks);
 static inline void tcp_dec_quickack_mode(struct sock *sk,
 					 const unsigned int pkts)
 {
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index e63b764e55ea..6300edf90e60 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -138,7 +138,7 @@ static void dctcp_ce_state_0_to_1(struct sock *sk)
 		 */
 		if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
 			__tcp_send_ack(sk, ca->prior_rcv_nxt);
-		tcp_enter_quickack_mode(sk);
+		tcp_enter_quickack_mode(sk, 1);
 	}
 
 	ca->prior_rcv_nxt = tp->rcv_nxt;
@@ -159,7 +159,7 @@ static void dctcp_ce_state_1_to_0(struct sock *sk)
 		 */
 		if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
 			__tcp_send_ack(sk, ca->prior_rcv_nxt);
-		tcp_enter_quickack_mode(sk);
+		tcp_enter_quickack_mode(sk, 1);
 	}
 
 	ca->prior_rcv_nxt = tp->rcv_nxt;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2315701036fa..5f0ef0b7ef37 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -176,21 +176,23 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
 	}
 }
 
-static void tcp_incr_quickack(struct sock *sk)
+static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
 
 	if (quickacks == 0)
 		quickacks = 2;
+	quickacks = min(quickacks, max_quickacks);
 	if (quickacks > icsk->icsk_ack.quick)
-		icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
+		icsk->icsk_ack.quick = quickacks;
 }
 
-void tcp_enter_quickack_mode(struct sock *sk)
+void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
-	tcp_incr_quickack(sk);
+
+	tcp_incr_quickack(sk, max_quickacks);
 	icsk->icsk_ack.pingpong = 0;
 	icsk->icsk_ack.ato = TCP_ATO_MIN;
 }
@@ -235,7 +237,7 @@ static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
 		 * it is probably a retransmit.
 		 */
 		if (tp->ecn_flags & TCP_ECN_SEEN)
-			tcp_enter_quickack_mode((struct sock *)tp);
+			tcp_enter_quickack_mode((struct sock *)tp, TCP_MAX_QUICKACKS);
 		break;
 	case INET_ECN_CE:
 		if (tcp_ca_needs_ecn((struct sock *)tp))
@@ -243,7 +245,7 @@ static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
 
 		if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
 			/* Better not delay acks, sender can have a very low cwnd */
-			tcp_enter_quickack_mode((struct sock *)tp);
+			tcp_enter_quickack_mode((struct sock *)tp, TCP_MAX_QUICKACKS);
 			tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
 		}
 		tp->ecn_flags |= TCP_ECN_SEEN;
@@ -651,7 +653,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
 		/* The _first_ data packet received, initialize
 		 * delayed ACK engine.
 		 */
-		tcp_incr_quickack(sk);
+		tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
 		icsk->icsk_ack.ato = TCP_ATO_MIN;
 	} else {
 		int m = now - icsk->icsk_ack.lrcvtime;
@@ -667,7 +669,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
 			/* Too long gap. Apparently sender failed to
 			 * restart window, so that we send ACKs quickly.
 			 */
-			tcp_incr_quickack(sk);
+			tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
 			sk_mem_reclaim(sk);
 		}
 	}
@@ -4136,7 +4138,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
 	if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
 	    before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
-		tcp_enter_quickack_mode(sk);
+		tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
 
 		if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
 			u32 end_seq = TCP_SKB_CB(skb)->end_seq;
@@ -4638,7 +4640,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 		tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
 
 out_of_window:
-		tcp_enter_quickack_mode(sk);
+		tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
 		inet_csk_schedule_ack(sk);
 drop:
 		__kfree_skb(skb);
@@ -5674,7 +5676,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 			 * to stand against the temptation 8)     --ANK
 			 */
 			inet_csk_schedule_ack(sk);
-			tcp_enter_quickack_mode(sk);
+			tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
 						  TCP_DELACK_MAX, TCP_RTO_MAX);
 

From 96b792d199d17545d6a53faf44b9c91d038f1ab3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 21 May 2018 15:08:57 -0700
Subject: [PATCH 241/519] tcp: do not aggressively quick ack after ECN events

[ Upstream commit 522040ea5fdd1c33bbf75e1d7c7c0422b96a94ef ]

ECN signals currently forces TCP to enter quickack mode for
up to 16 (TCP_MAX_QUICKACKS) following incoming packets.

We believe this is not needed, and only sending one immediate ack
for the current packet should be enough.

This should reduce the extra load noticed in DCTCP environments,
after congestion events.

This is part 2 of our effort to reduce pure ACK packets.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_input.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5f0ef0b7ef37..3dc61a64022b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -237,7 +237,7 @@ static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
 		 * it is probably a retransmit.
 		 */
 		if (tp->ecn_flags & TCP_ECN_SEEN)
-			tcp_enter_quickack_mode((struct sock *)tp, TCP_MAX_QUICKACKS);
+			tcp_enter_quickack_mode((struct sock *)tp, 1);
 		break;
 	case INET_ECN_CE:
 		if (tcp_ca_needs_ecn((struct sock *)tp))
@@ -245,7 +245,7 @@ static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
 
 		if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
 			/* Better not delay acks, sender can have a very low cwnd */
-			tcp_enter_quickack_mode((struct sock *)tp, TCP_MAX_QUICKACKS);
+			tcp_enter_quickack_mode((struct sock *)tp, 1);
 			tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
 		}
 		tp->ecn_flags |= TCP_ECN_SEEN;

From cd760ab9f4e13aedccc80f19a0b7863d5c0b3c8c Mon Sep 17 00:00:00 2001
From: Yousuk Seung <ysseung@google.com>
Date: Mon, 4 Jun 2018 15:29:51 -0700
Subject: [PATCH 242/519] tcp: refactor tcp_ecn_check_ce to remove sk type cast

[ Upstream commit f4c9f85f3b2cb7669830cd04d0be61192a4d2436 ]

Refactor tcp_ecn_check_ce and __tcp_ecn_check_ce to accept struct sock*
instead of tcp_sock* to clean up type casts. This is a pure refactor
patch.

Signed-off-by: Yousuk Seung <ysseung@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_input.c | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3dc61a64022b..0754e30abfab 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -228,8 +228,10 @@ static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
 	tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
 }
 
-static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
+static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
 	case INET_ECN_NOT_ECT:
 		/* Funny extension: if ECT is not set on a segment,
@@ -237,31 +239,31 @@ static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
 		 * it is probably a retransmit.
 		 */
 		if (tp->ecn_flags & TCP_ECN_SEEN)
-			tcp_enter_quickack_mode((struct sock *)tp, 1);
+			tcp_enter_quickack_mode(sk, 1);
 		break;
 	case INET_ECN_CE:
-		if (tcp_ca_needs_ecn((struct sock *)tp))
-			tcp_ca_event((struct sock *)tp, CA_EVENT_ECN_IS_CE);
+		if (tcp_ca_needs_ecn(sk))
+			tcp_ca_event(sk, CA_EVENT_ECN_IS_CE);
 
 		if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
 			/* Better not delay acks, sender can have a very low cwnd */
-			tcp_enter_quickack_mode((struct sock *)tp, 1);
+			tcp_enter_quickack_mode(sk, 1);
 			tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
 		}
 		tp->ecn_flags |= TCP_ECN_SEEN;
 		break;
 	default:
-		if (tcp_ca_needs_ecn((struct sock *)tp))
-			tcp_ca_event((struct sock *)tp, CA_EVENT_ECN_NO_CE);
+		if (tcp_ca_needs_ecn(sk))
+			tcp_ca_event(sk, CA_EVENT_ECN_NO_CE);
 		tp->ecn_flags |= TCP_ECN_SEEN;
 		break;
 	}
 }
 
-static void tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
+static void tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
 {
-	if (tp->ecn_flags & TCP_ECN_OK)
-		__tcp_ecn_check_ce(tp, skb);
+	if (tcp_sk(sk)->ecn_flags & TCP_ECN_OK)
+		__tcp_ecn_check_ce(sk, skb);
 }
 
 static void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
@@ -675,7 +677,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
 	}
 	icsk->icsk_ack.lrcvtime = now;
 
-	tcp_ecn_check_ce(tp, skb);
+	tcp_ecn_check_ce(sk, skb);
 
 	if (skb->len >= 128)
 		tcp_grow_window(sk, skb);
@@ -4366,7 +4368,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 	struct sk_buff *skb1;
 	u32 seq, end_seq;
 
-	tcp_ecn_check_ce(tp, skb);
+	tcp_ecn_check_ce(sk, skb);
 
 	if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP);

From 27a0762cb570834dc44155363c118cabdd024c3c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 27 Jun 2018 08:47:21 -0700
Subject: [PATCH 243/519] tcp: add one more quick ack after after ECN events

[ Upstream commit 15ecbe94a45ef88491ca459b26efdd02f91edb6d ]

Larry Brakmo proposal ( https://patchwork.ozlabs.org/patch/935233/
tcp: force cwnd at least 2 in tcp_cwnd_reduction) made us rethink
about our recent patch removing ~16 quick acks after ECN events.

tcp_enter_quickack_mode(sk, 1) makes sure one immediate ack is sent,
but in the case the sender cwnd was lowered to 1, we do not want
to have a delayed ack for the next packet we will receive.

Fixes: 522040ea5fdd ("tcp: do not aggressively quick ack after ECN events")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Neal Cardwell <ncardwell@google.com>
Cc: Lawrence Brakmo <brakmo@fb.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_input.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0754e30abfab..4a261e078082 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -239,7 +239,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
 		 * it is probably a retransmit.
 		 */
 		if (tp->ecn_flags & TCP_ECN_SEEN)
-			tcp_enter_quickack_mode(sk, 1);
+			tcp_enter_quickack_mode(sk, 2);
 		break;
 	case INET_ECN_CE:
 		if (tcp_ca_needs_ecn(sk))
@@ -247,7 +247,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
 
 		if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
 			/* Better not delay acks, sender can have a very low cwnd */
-			tcp_enter_quickack_mode(sk, 1);
+			tcp_enter_quickack_mode(sk, 2);
 			tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
 		}
 		tp->ecn_flags |= TCP_ECN_SEEN;

From df30bfccc463082cfc2a5b164e5590403f16af94 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 30 Jul 2018 20:09:11 -0700
Subject: [PATCH 244/519] inet: frag: enforce memory limits earlier

[ Upstream commit 56e2c94f055d328f5f6b0a5c1721cca2f2d4e0a1 ]

We currently check current frags memory usage only when
a new frag queue is created. This allows attackers to first
consume the memory budget (default : 4 MB) creating thousands
of frag queues, then sending tiny skbs to exceed high_thresh
limit by 2 to 3 order of magnitude.

Note that before commit 648700f76b03 ("inet: frags: use rhashtables
for reassembly units"), work queue could be starved under DOS,
getting no cpu cycles.
After commit 648700f76b03, only the per frag queue timer can eventually
remove an incomplete frag queue and its skbs.

Fixes: b13d3cbfb8e8 ("inet: frag: move eviction of queues to work queue")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Jann Horn <jannh@google.com>
Cc: Florian Westphal <fw@strlen.de>
Cc: Peter Oskolkov <posk@google.com>
Cc: Paolo Abeni <pabeni@redhat.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/inet_fragment.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index b34fa1bb278f..b2001b20e029 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -364,11 +364,6 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
 {
 	struct inet_frag_queue *q;
 
-	if (frag_mem_limit(nf) > nf->high_thresh) {
-		inet_frag_schedule_worker(f);
-		return NULL;
-	}
-
 	q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC);
 	if (!q)
 		return NULL;
@@ -405,6 +400,11 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
 	struct inet_frag_queue *q;
 	int depth = 0;
 
+	if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) {
+		inet_frag_schedule_worker(f);
+		return NULL;
+	}
+
 	if (frag_mem_limit(nf) > nf->low_thresh)
 		inet_frag_schedule_worker(f);
 

From b5fef54e3212e2ec1449a4445becb25208c9410c Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 31 Jul 2018 17:12:52 -0700
Subject: [PATCH 245/519] net: dsa: Do not suspend/resume closed slave_dev

[ Upstream commit a94c689e6c9e72e722f28339e12dff191ee5a265 ]

If a DSA slave network device was previously disabled, there is no need
to suspend or resume it.

Fixes: 2446254915a7 ("net: dsa: allow switch drivers to implement suspend/resume hooks")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/dsa/slave.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 554c2a961ad5..48b28a7ecc7a 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1099,6 +1099,9 @@ int dsa_slave_suspend(struct net_device *slave_dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(slave_dev);
 
+	if (!netif_running(slave_dev))
+		return 0;
+
 	netif_device_detach(slave_dev);
 
 	if (p->phy) {
@@ -1116,6 +1119,9 @@ int dsa_slave_resume(struct net_device *slave_dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(slave_dev);
 
+	if (!netif_running(slave_dev))
+		return 0;
+
 	netif_device_attach(slave_dev);
 
 	if (p->phy) {

From 8cac0ce0a8853cae7dc01256f88bc9b7e53ad3ce Mon Sep 17 00:00:00 2001
From: Jeremy Cline <jcline@redhat.com>
Date: Tue, 31 Jul 2018 21:13:16 +0000
Subject: [PATCH 246/519] netlink: Fix spectre v1 gadget in netlink_create()

[ Upstream commit bc5b6c0b62b932626a135f516a41838c510c6eba ]

'protocol' is a user-controlled value, so sanitize it after the bounds
check to avoid using it for speculative out-of-bounds access to arrays
indexed by it.

This addresses the following accesses detected with the help of smatch:

* net/netlink/af_netlink.c:654 __netlink_create() warn: potential
  spectre issue 'nlk_cb_mutex_keys' [w]

* net/netlink/af_netlink.c:654 __netlink_create() warn: potential
  spectre issue 'nlk_cb_mutex_key_strings' [w]

* net/netlink/af_netlink.c:685 netlink_create() warn: potential spectre
  issue 'nl_table' [w] (local cap)

Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Jeremy Cline <jcline@redhat.com>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netlink/af_netlink.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 818400fddc9b..9708fff318d5 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -62,6 +62,7 @@
 #include <asm/cacheflush.h>
 #include <linux/hash.h>
 #include <linux/genetlink.h>
+#include <linux/nospec.h>
 
 #include <net/net_namespace.h>
 #include <net/sock.h>
@@ -654,6 +655,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
 
 	if (protocol < 0 || protocol >= MAX_LINKS)
 		return -EPROTONOSUPPORT;
+	protocol = array_index_nospec(protocol, MAX_LINKS);
 
 	netlink_lock_table();
 #ifdef CONFIG_MODULES

From dac2939e629e092b9c65a6242f1b1c018e811dc8 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 30 Jul 2018 14:27:15 -0700
Subject: [PATCH 247/519] squashfs: more metadata hardening

commit d512584780d3e6a7cacb2f482834849453d444a1 upstream.

Anatoly reports another squashfs fuzzing issue, where the decompression
parameters themselves are in a compressed block.

This causes squashfs_read_data() to be called in order to read the
decompression options before the decompression stream having been set
up, making squashfs go sideways.

Reported-by: Anatoly Trosinenko <anatoly.trosinenko@gmail.com>
Acked-by: Phillip Lougher <phillip.lougher@gmail.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/squashfs/block.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 0cea9b9236d0..82bc942fc437 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -166,6 +166,8 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
 	}
 
 	if (compressed) {
+		if (!msblk->stream)
+			goto read_failure;
 		length = squashfs_decompress(msblk, bh, b, offset, length,
 			output);
 		if (length < 0)

From 581c2941840f0a37aec39a3e8f4066fd86a8e61b Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 2 Aug 2018 08:43:35 -0700
Subject: [PATCH 248/519] squashfs: more metadata hardenings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 71755ee5350b63fb1f283de8561cdb61b47f4d1d upstream.

The squashfs fragment reading code doesn't actually verify that the
fragment is inside the fragment table.  The end result _is_ verified to
be inside the image when actually reading the fragment data, but before
that is done, we may end up taking a page fault because the fragment
table itself might not even exist.

Another report from Anatoly and his endless squashfs image fuzzing.

Reported-by: Анатолий Тросиненко <anatoly.trosinenko@gmail.com>
Acked-by:: Phillip Lougher <phillip.lougher@gmail.com>,
Cc: Willy Tarreau <w@1wt.eu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/squashfs/fragment.c       | 13 +++++++++----
 fs/squashfs/squashfs_fs_sb.h |  1 +
 fs/squashfs/super.c          |  5 +++--
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c
index 86ad9a4b8c36..0681feab4a84 100644
--- a/fs/squashfs/fragment.c
+++ b/fs/squashfs/fragment.c
@@ -49,11 +49,16 @@ int squashfs_frag_lookup(struct super_block *sb, unsigned int fragment,
 				u64 *fragment_block)
 {
 	struct squashfs_sb_info *msblk = sb->s_fs_info;
-	int block = SQUASHFS_FRAGMENT_INDEX(fragment);
-	int offset = SQUASHFS_FRAGMENT_INDEX_OFFSET(fragment);
-	u64 start_block = le64_to_cpu(msblk->fragment_index[block]);
+	int block, offset, size;
 	struct squashfs_fragment_entry fragment_entry;
-	int size;
+	u64 start_block;
+
+	if (fragment >= msblk->fragments)
+		return -EIO;
+	block = SQUASHFS_FRAGMENT_INDEX(fragment);
+	offset = SQUASHFS_FRAGMENT_INDEX_OFFSET(fragment);
+
+	start_block = le64_to_cpu(msblk->fragment_index[block]);
 
 	size = squashfs_read_metadata(sb, &fragment_entry, &start_block,
 					&offset, sizeof(fragment_entry));
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index 1da565cb50c3..ef69c31947bf 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -75,6 +75,7 @@ struct squashfs_sb_info {
 	unsigned short				block_log;
 	long long				bytes_used;
 	unsigned int				inodes;
+	unsigned int				fragments;
 	int					xattr_ids;
 };
 #endif
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 5056babe00df..93aa3e23c845 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -176,6 +176,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
 	msblk->inode_table = le64_to_cpu(sblk->inode_table_start);
 	msblk->directory_table = le64_to_cpu(sblk->directory_table_start);
 	msblk->inodes = le32_to_cpu(sblk->inodes);
+	msblk->fragments = le32_to_cpu(sblk->fragments);
 	flags = le16_to_cpu(sblk->flags);
 
 	TRACE("Found valid superblock on %s\n", bdevname(sb->s_bdev, b));
@@ -186,7 +187,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
 	TRACE("Filesystem size %lld bytes\n", msblk->bytes_used);
 	TRACE("Block size %d\n", msblk->block_size);
 	TRACE("Number of inodes %d\n", msblk->inodes);
-	TRACE("Number of fragments %d\n", le32_to_cpu(sblk->fragments));
+	TRACE("Number of fragments %d\n", msblk->fragments);
 	TRACE("Number of ids %d\n", le16_to_cpu(sblk->no_ids));
 	TRACE("sblk->inode_table_start %llx\n", msblk->inode_table);
 	TRACE("sblk->directory_table_start %llx\n", msblk->directory_table);
@@ -273,7 +274,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_export_op = &squashfs_export_ops;
 
 handle_fragments:
-	fragments = le32_to_cpu(sblk->fragments);
+	fragments = msblk->fragments;
 	if (fragments == 0)
 		goto check_directory_table;
 

From 7dfa787508f9150cd7527e06b9216415dcf941f2 Mon Sep 17 00:00:00 2001
From: Anton Vasilyev <vasilyev@ispras.ru>
Date: Fri, 27 Jul 2018 18:50:42 +0300
Subject: [PATCH 249/519] can: ems_usb: Fix memory leak on ems_usb_disconnect()

commit 72c05f32f4a5055c9c8fe889bb6903ec959c0aad upstream.

ems_usb_probe() allocates memory for dev->tx_msg_buffer, but there
is no its deallocation in ems_usb_disconnect().

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Anton Vasilyev <vasilyev@ispras.ru>
Cc: <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/can/usb/ems_usb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
index 357c9e89fdf9..047348033e27 100644
--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c
@@ -1078,6 +1078,7 @@ static void ems_usb_disconnect(struct usb_interface *intf)
 		usb_free_urb(dev->intr_urb);
 
 		kfree(dev->intr_in_buffer);
+		kfree(dev->tx_msg_buffer);
 	}
 }
 

From d856749a77546f033d9a41cc681ed3a58dba18e9 Mon Sep 17 00:00:00 2001
From: Jeremy Cline <jcline@redhat.com>
Date: Fri, 27 Jul 2018 22:43:01 +0000
Subject: [PATCH 250/519] net: socket: fix potential spectre v1 gadget in
 socketcall

commit c8e8cd579bb4265651df8223730105341e61a2d1 upstream.

'call' is a user-controlled value, so sanitize the array index after the
bounds check to avoid speculating past the bounds of the 'nargs' array.

Found with the help of Smatch:

net/socket.c:2508 __do_sys_socketcall() warn: potential spectre issue
'nargs' [r] (local cap)

Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Jeremy Cline <jcline@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/socket.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/socket.c b/net/socket.c
index 5b31e5baf3b5..0c544ae48eac 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -89,6 +89,7 @@
 #include <linux/magic.h>
 #include <linux/slab.h>
 #include <linux/xattr.h>
+#include <linux/nospec.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -2324,6 +2325,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
 
 	if (call < 1 || call > SYS_SENDMMSG)
 		return -EINVAL;
+	call = array_index_nospec(call, SYS_SENDMMSG + 1);
 
 	len = nargs[call];
 	if (len > sizeof(a))

From a1d7ff2496e04bc1b8c3d8cb1ad717b6256098a8 Mon Sep 17 00:00:00 2001
From: Jiang Biao <jiang.biao2@zte.com.cn>
Date: Wed, 18 Jul 2018 10:29:28 +0800
Subject: [PATCH 251/519] virtio_balloon: fix another race between migration
 and ballooning

commit 89da619bc18d79bca5304724c11d4ba3b67ce2c6 upstream.

Kernel panic when with high memory pressure, calltrace looks like,

PID: 21439 TASK: ffff881be3afedd0 CPU: 16 COMMAND: "java"
 #0 [ffff881ec7ed7630] machine_kexec at ffffffff81059beb
 #1 [ffff881ec7ed7690] __crash_kexec at ffffffff81105942
 #2 [ffff881ec7ed7760] crash_kexec at ffffffff81105a30
 #3 [ffff881ec7ed7778] oops_end at ffffffff816902c8
 #4 [ffff881ec7ed77a0] no_context at ffffffff8167ff46
 #5 [ffff881ec7ed77f0] __bad_area_nosemaphore at ffffffff8167ffdc
 #6 [ffff881ec7ed7838] __node_set at ffffffff81680300
 #7 [ffff881ec7ed7860] __do_page_fault at ffffffff8169320f
 #8 [ffff881ec7ed78c0] do_page_fault at ffffffff816932b5
 #9 [ffff881ec7ed78f0] page_fault at ffffffff8168f4c8
    [exception RIP: _raw_spin_lock_irqsave+47]
    RIP: ffffffff8168edef RSP: ffff881ec7ed79a8 RFLAGS: 00010046
    RAX: 0000000000000246 RBX: ffffea0019740d00 RCX: ffff881ec7ed7fd8
    RDX: 0000000000020000 RSI: 0000000000000016 RDI: 0000000000000008
    RBP: ffff881ec7ed79a8 R8: 0000000000000246 R9: 000000000001a098
    R10: ffff88107ffda000 R11: 0000000000000000 R12: 0000000000000000
    R13: 0000000000000008 R14: ffff881ec7ed7a80 R15: ffff881be3afedd0
    ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018

It happens in the pagefault and results in double pagefault
during compacting pages when memory allocation fails.

Analysed the vmcore, the page leads to second pagefault is corrupted
with _mapcount=-256, but private=0.

It's caused by the race between migration and ballooning, and lock
missing in virtballoon_migratepage() of virtio_balloon driver.
This patch fix the bug.

Fixes: e22504296d4f64f ("virtio_balloon: introduce migration primitives to balloon pages")
Cc: stable@vger.kernel.org
Signed-off-by: Jiang Biao <jiang.biao2@zte.com.cn>
Signed-off-by: Huang Chong <huang.chong@zte.com.cn>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/virtio/virtio_balloon.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 7cf26768ea0b..cbe9e2295752 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -479,7 +479,9 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info,
 	tell_host(vb, vb->inflate_vq);
 
 	/* balloon's page migration 2nd step -- deflate "page" */
+	spin_lock_irqsave(&vb_dev_info->pages_lock, flags);
 	balloon_page_delete(page);
+	spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags);
 	vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
 	set_page_pfns(vb, vb->pfns, page);
 	tell_host(vb, vb->deflate_vq);

From 314b46558cc97578f2edf5e77a65140b63db3fcc Mon Sep 17 00:00:00 2001
From: Roman Kagan <rkagan@virtuozzo.com>
Date: Thu, 19 Jul 2018 21:59:07 +0300
Subject: [PATCH 252/519] kvm: x86: vmx: fix vpid leak

commit 63aff65573d73eb8dda4732ad4ef222dd35e4862 upstream.

VPID for the nested vcpu is allocated at vmx_create_vcpu whenever nested
vmx is turned on with the module parameter.

However, it's only freed if the L1 guest has executed VMXON which is not
a given.

As a result, on a system with nested==on every creation+deletion of an
L1 vcpu without running an L2 guest results in leaking one vpid.  Since
the total number of vpids is limited to 64k, they can eventually get
exhausted, preventing L2 from starting.

Delay allocation of the L2 vpid until VMXON emulation, thus matching its
freeing.

Fixes: 5c614b3583e7b6dab0c86356fa36c2bcbb8322a0
Cc: stable@vger.kernel.org
Signed-off-by: Roman Kagan <rkagan@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/vmx.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 18143886b186..c5a4b1978cbf 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6843,6 +6843,8 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
 		     HRTIMER_MODE_REL);
 	vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
 
+	vmx->nested.vpid02 = allocate_vpid();
+
 	vmx->nested.vmxon = true;
 
 	skip_emulated_instruction(vcpu);
@@ -8887,10 +8889,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 			goto free_vmcs;
 	}
 
-	if (nested) {
+	if (nested)
 		nested_vmx_setup_ctls_msrs(vmx);
-		vmx->nested.vpid02 = allocate_vpid();
-	}
 
 	vmx->nested.posted_intr_nv = -1;
 	vmx->nested.current_vmptr = -1ull;
@@ -8899,7 +8899,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	return &vmx->vcpu;
 
 free_vmcs:
-	free_vpid(vmx->nested.vpid02);
 	free_loaded_vmcs(vmx->loaded_vmcs);
 free_msrs:
 	kfree(vmx->guest_msrs);

From 3ef726179c2bbedbf80ab43f8e57f9582865b7b5 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 13 Jul 2018 16:12:32 +0800
Subject: [PATCH 253/519] crypto: padlock-aes - Fix Nano workaround data
 corruption

commit 46d8c4b28652d35dc6cfb5adf7f54e102fc04384 upstream.

This was detected by the self-test thanks to Ard's chunking patch.

I finally got around to testing this out on my ancient Via box.  It
turns out that the workaround got the assembly wrong and we end up
doing count + initial cycles of the loop instead of just count.

This obviously causes corruption, either by overwriting the source
that is yet to be processed, or writing over the end of the buffer.

On CPUs that don't require the workaround only ECB is affected.
On Nano CPUs both ECB and CBC are affected.

This patch fixes it by doing the subtraction prior to the assembly.

Fixes: a76c1c23d0c3 ("crypto: padlock-aes - work around Nano CPU...")
Cc: <stable@vger.kernel.org>
Reported-by: Jamie Heilman <jamie@audible.transient.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/crypto/padlock-aes.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index 97a364694bfc..047ef69b7e65 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -266,6 +266,8 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key,
 		return;
 	}
 
+	count -= initial;
+
 	if (initial)
 		asm volatile (".byte 0xf3,0x0f,0xa7,0xc8"	/* rep xcryptecb */
 			      : "+S"(input), "+D"(output)
@@ -273,7 +275,7 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key,
 
 	asm volatile (".byte 0xf3,0x0f,0xa7,0xc8"	/* rep xcryptecb */
 		      : "+S"(input), "+D"(output)
-		      : "d"(control_word), "b"(key), "c"(count - initial));
+		      : "d"(control_word), "b"(key), "c"(count));
 }
 
 static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key,
@@ -284,6 +286,8 @@ static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key,
 	if (count < cbc_fetch_blocks)
 		return cbc_crypt(input, output, key, iv, control_word, count);
 
+	count -= initial;
+
 	if (initial)
 		asm volatile (".byte 0xf3,0x0f,0xa7,0xd0"	/* rep xcryptcbc */
 			      : "+S" (input), "+D" (output), "+a" (iv)
@@ -291,7 +295,7 @@ static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key,
 
 	asm volatile (".byte 0xf3,0x0f,0xa7,0xd0"	/* rep xcryptcbc */
 		      : "+S" (input), "+D" (output), "+a" (iv)
-		      : "d" (control_word), "b" (key), "c" (count-initial));
+		      : "d" (control_word), "b" (key), "c" (count));
 	return iv;
 }
 

From 6ff21107ba21e59a3831825a55e882afc8a1b3f1 Mon Sep 17 00:00:00 2001
From: Tony Battersby <tonyb@cybernetics.com>
Date: Thu, 12 Jul 2018 16:30:45 -0400
Subject: [PATCH 254/519] scsi: sg: fix minor memory leak in error path

commit c170e5a8d222537e98aa8d4fddb667ff7a2ee114 upstream.

Fix a minor memory leak when there is an error opening a /dev/sg device.

Fixes: cc833acbee9d ("sg: O_EXCL and other lock handling")
Cc: <stable@vger.kernel.org>
Reviewed-by: Ewan D. Milne <emilne@redhat.com>
Signed-off-by: Tony Battersby <tonyb@cybernetics.com>
Reviewed-by: Bart Van Assche <bart.vanassche@wdc.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/sg.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 4302880a20b3..e1639e80db53 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -2195,6 +2195,7 @@ sg_add_sfp(Sg_device * sdp)
 	write_lock_irqsave(&sdp->sfd_lock, iflags);
 	if (atomic_read(&sdp->detaching)) {
 		write_unlock_irqrestore(&sdp->sfd_lock, iflags);
+		kfree(sfp);
 		return ERR_PTR(-ENODEV);
 	}
 	list_add_tail(&sfp->sfd_siblings, &sdp->sfds);

From bffa1e42b3713aa7911cc3f9a6e5a2dbbf1dc789 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 6 Aug 2018 16:24:42 +0200
Subject: [PATCH 255/519] Linux 4.4.146

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index be31491a2d67..030f5af05f4e 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 4
-SUBLEVEL = 145
+SUBLEVEL = 146
 EXTRAVERSION =
 NAME = Blurry Fish Butt
 

From 760fed99e2bfcf0c4c7b1880ebbe19fdb4d9738a Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Wed, 18 Jul 2018 14:29:54 -0700
Subject: [PATCH 256/519] scsi: qla2xxx: Fix ISP recovery on unload

commit b08abbd9f5996309f021684f9ca74da30dcca36a upstream.

During unload process, the chip can encounter problem where a FW dump would
be captured. For this case, the full reset sequence will be skip to bring
the chip back to full operational state.

Fixes: e315cd28b9ef ("[SCSI] qla2xxx: Code changes for qla data structure refactoring")
Cc: <stable@vger.kernel.org>
Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/qla2xxx/qla_os.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 5cbf20ab94aa..18b19744398a 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -4938,8 +4938,9 @@ qla2x00_do_dpc(void *data)
 			}
 		}
 
-		if (test_and_clear_bit(ISP_ABORT_NEEDED,
-						&base_vha->dpc_flags)) {
+		if (test_and_clear_bit
+		    (ISP_ABORT_NEEDED, &base_vha->dpc_flags) &&
+		    !test_bit(UNLOADING, &base_vha->dpc_flags)) {
 
 			ql_dbg(ql_dbg_dpc, base_vha, 0x4007,
 			    "ISP abort scheduled.\n");

From 468926f8dbb747b2612459113a926be404c9a6d6 Mon Sep 17 00:00:00 2001
From: Anil Gurumurthy <anil.gurumurthy@cavium.com>
Date: Wed, 18 Jul 2018 14:29:55 -0700
Subject: [PATCH 257/519] scsi: qla2xxx: Return error when TMF returns

commit b4146c4929ef61d5afca011474d59d0918a0cd82 upstream.

Propagate the task management completion status properly to avoid
unnecessary waits for commands to complete.

Fixes: faef62d13463 ("[SCSI] qla2xxx: Fix Task Management command asynchronous handling")
Cc: <stable@vger.kernel.org>
Signed-off-by: Anil Gurumurthy <anil.gurumurthy@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/qla2xxx/qla_init.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index a9eb3cd453be..41a646696bab 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -325,11 +325,10 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun,
 
 	wait_for_completion(&tm_iocb->u.tmf.comp);
 
-	rval = tm_iocb->u.tmf.comp_status == CS_COMPLETE ?
-	    QLA_SUCCESS : QLA_FUNCTION_FAILED;
+	rval = tm_iocb->u.tmf.data;
 
-	if ((rval != QLA_SUCCESS) || tm_iocb->u.tmf.data) {
-		ql_dbg(ql_dbg_taskm, vha, 0x8030,
+	if (rval != QLA_SUCCESS) {
+		ql_log(ql_log_warn, vha, 0x8030,
 		    "TM IOCB failed (%x).\n", rval);
 	}
 

From 09a0de491c5ec0e40f8f9d21b13af306667360f4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 3 Aug 2018 14:44:59 +0200
Subject: [PATCH 258/519] genirq: Make force irq threading setup more robust

commit d1f0301b3333eef5efbfa1fe0f0edbea01863d5d upstream.

The support of force threading interrupts which are set up with both a
primary and a threaded handler wreckaged the setup of regular requested
threaded interrupts (primary handler == NULL).

The reason is that it does not check whether the primary handler is set to
the default handler which wakes the handler thread. Instead it replaces the
thread handler with the primary handler as it would do with force threaded
interrupts which have been requested via request_irq(). So both the primary
and the thread handler become the same which then triggers the warnon that
the thread handler tries to wakeup a not configured secondary thread.

Fortunately this only happens when the driver omits the IRQF_ONESHOT flag
when requesting the threaded interrupt, which is normaly caught by the
sanity checks when force irq threading is disabled.

Fix it by skipping the force threading setup when a regular threaded
interrupt is requested. As a consequence the interrupt request which lacks
the IRQ_ONESHOT flag is rejected correctly instead of silently wreckaging
it.

Fixes: 2a1d3ab8986d ("genirq: Handle force threading of irqs with primary and thread handler")
Reported-by: Kurt Kanzenbach <kurt.kanzenbach@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Kurt Kanzenbach <kurt.kanzenbach@linutronix.de>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/irq/manage.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 5f55a8bf5264..0df2b44dac7c 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1012,6 +1012,13 @@ static int irq_setup_forced_threading(struct irqaction *new)
 	if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT))
 		return 0;
 
+	/*
+	 * No further action required for interrupts which are requested as
+	 * threaded interrupts already
+	 */
+	if (new->handler == irq_default_primary_handler)
+		return 0;
+
 	new->flags |= IRQF_ONESHOT;
 
 	/*
@@ -1019,7 +1026,7 @@ static int irq_setup_forced_threading(struct irqaction *new)
 	 * thread handler. We force thread them as well by creating a
 	 * secondary action.
 	 */
-	if (new->handler != irq_default_primary_handler && new->thread_fn) {
+	if (new->handler && new->thread_fn) {
 		/* Allocate the secondary action */
 		new->secondary = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
 		if (!new->secondary)

From 0a022859e2304b21a6cd620c2eb008cc60de0ab8 Mon Sep 17 00:00:00 2001
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Tue, 31 Jul 2018 18:13:58 +0200
Subject: [PATCH 259/519] nohz: Fix local_timer_softirq_pending()

commit 80d20d35af1edd632a5e7a3b9c0ab7ceff92769e upstream.

local_timer_softirq_pending() checks whether the timer softirq is
pending with: local_softirq_pending() & TIMER_SOFTIRQ.

This is wrong because TIMER_SOFTIRQ is the softirq number and not a
bitmask. So the test checks for the wrong bit.

Use BIT(TIMER_SOFTIRQ) instead.

Fixes: 5d62c183f9e9 ("nohz: Prevent a timer interrupt storm in tick_nohz_stop_sched_tick()")
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Acked-by: Frederic Weisbecker <frederic@kernel.org>
Cc: bigeasy@linutronix.de
Cc: peterz@infradead.org
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20180731161358.29472-1-anna-maria@linutronix.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/time/tick-sched.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index e5d228f7224c..5ad2e852e9f6 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -570,7 +570,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
 
 static inline bool local_timer_softirq_pending(void)
 {
-	return local_softirq_pending() & TIMER_SOFTIRQ;
+	return local_softirq_pending() & BIT(TIMER_SOFTIRQ);
 }
 
 static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,

From 52296ab92b66e2a8c932b2a4cb318dd08cb8bd29 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Fri, 27 Jul 2018 16:54:44 +0100
Subject: [PATCH 260/519] netlink: Do not subscribe to non-existent groups

[ Upstream commit 7acf9d4237c46894e0fa0492dd96314a41742e84 ]

Make ABI more strict about subscribing to group > ngroups.
Code doesn't check for that and it looks bogus.
(one can subscribe to non-existing group)
Still, it's possible to bind() to all possible groups with (-1)

Cc: "David S. Miller" <davem@davemloft.net>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: netdev@vger.kernel.org
Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netlink/af_netlink.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 9708fff318d5..aed2cfb1a4fe 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -985,6 +985,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 		if (err)
 			return err;
 	}
+	groups &= (1UL << nlk->ngroups) - 1;
 
 	bound = nlk->bound;
 	if (bound) {

From bc48f46f117e05af226a4c0dff617218f1c13650 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Mon, 30 Jul 2018 18:32:36 +0100
Subject: [PATCH 261/519] netlink: Don't shift with UB on nlk->ngroups

[ Upstream commit 61f4b23769f0cc72ae62c9a81cf08f0397d40da8 ]

On i386 nlk->ngroups might be 32 or 0. Which leads to UB, resulting in
hang during boot.
Check for 0 ngroups and use (unsigned long long) as a type to shift.

Fixes: 7acf9d4237c4 ("netlink: Do not subscribe to non-existent groups").
Reported-by: kernel test robot <rong.a.chen@intel.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netlink/af_netlink.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index aed2cfb1a4fe..87c25918c073 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -985,7 +985,11 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 		if (err)
 			return err;
 	}
-	groups &= (1UL << nlk->ngroups) - 1;
+
+	if (nlk->ngroups == 0)
+		groups = 0;
+	else
+		groups &= (1ULL << nlk->ngroups) - 1;
 
 	bound = nlk->bound;
 	if (bound) {

From a5928d68418768e3e7ed9c75039060c1e70e047e Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Sun, 5 Aug 2018 01:35:53 +0100
Subject: [PATCH 262/519] netlink: Don't shift on 64 for ngroups

commit 91874ecf32e41b5d86a4cb9d60e0bee50d828058 upstream.

It's legal to have 64 groups for netlink_sock.

As user-supplied nladdr->nl_groups is __u32, it's possible to subscribe
only to first 32 groups.

The check for correctness of .bind() userspace supplied parameter
is done by applying mask made from ngroups shift. Which broke Android
as they have 64 groups and the shift for mask resulted in an overflow.

Fixes: 61f4b23769f0 ("netlink: Don't shift with UB on nlk->ngroups")
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: netdev@vger.kernel.org
Cc: stable@vger.kernel.org
Reported-and-Tested-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netlink/af_netlink.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 87c25918c073..bf292010760a 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -988,8 +988,8 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 
 	if (nlk->ngroups == 0)
 		groups = 0;
-	else
-		groups &= (1ULL << nlk->ngroups) - 1;
+	else if (nlk->ngroups < 8*sizeof(groups))
+		groups &= (1UL << nlk->ngroups) - 1;
 
 	bound = nlk->bound;
 	if (bound) {

From 310eba0dfc8a7d5423516df7f4be7451505ac6ef Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sun, 8 Jul 2018 19:35:02 -0400
Subject: [PATCH 263/519] ext4: fix false negatives *and* false positives in
 ext4_check_descriptors()

commit 44de022c4382541cebdd6de4465d1f4f465ff1dd upstream.

Ext4_check_descriptors() was getting called before s_gdb_count was
initialized.  So for file systems w/o the meta_bg feature, allocation
bitmaps could overlap the block group descriptors and ext4 wouldn't
notice.

For file systems with the meta_bg feature enabled, there was a
fencepost error which would cause the ext4_check_descriptors() to
incorrectly believe that the block allocation bitmap overlaps with the
block group descriptor blocks, and it would reject the mount.

Fix both of these problems.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org
Signed-off-by: Benjamin Gilbert <bgilbert@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/super.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 49af3c50b263..3e4d8ac1974e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2102,7 +2102,7 @@ static int ext4_check_descriptors(struct super_block *sb,
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
 	ext4_fsblk_t last_block;
-	ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0) + 1;
+	ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0);
 	ext4_fsblk_t block_bitmap;
 	ext4_fsblk_t inode_bitmap;
 	ext4_fsblk_t inode_table;
@@ -3777,13 +3777,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 			goto failed_mount2;
 		}
 	}
+	sbi->s_gdb_count = db_count;
 	if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
 		ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
 		ret = -EFSCORRUPTED;
 		goto failed_mount2;
 	}
 
-	sbi->s_gdb_count = db_count;
 	get_random_bytes(&sbi->s_next_generation, sizeof(u32));
 	spin_lock_init(&sbi->s_next_gen_lock);
 

From fef9866d278ee726b15cf251339660e77ba5488c Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu, 14 Sep 2017 16:50:14 +0200
Subject: [PATCH 264/519] ACPI / PCI: Bail early in acpi_pci_add_bus() if there
 is no ACPI handle

commit a0040c0145945d3bd203df8fa97f6dfa819f3f7d upstream.

Hyper-V instances support PCI pass-through which is implemented through PV
pci-hyperv driver. When a device is passed through, a new root PCI bus is
created in the guest. The bus sits on top of VMBus and has no associated
information in ACPI. acpi_pci_add_bus() in this case proceeds all the way
to acpi_evaluate_dsm(), which reports

  ACPI: \: failed to evaluate _DSM (0x1001)

While acpi_pci_slot_enumerate() and acpiphp_enumerate_slots() are protected
against ACPI_HANDLE() being NULL and do nothing, acpi_evaluate_dsm() is not
and gives us the error. It seems the correct fix is to not do anything in
acpi_pci_add_bus() in such cases.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Sinan Kaya <okaya@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pci/pci-acpi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index a32ba753e413..afaf13474796 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -543,7 +543,7 @@ void acpi_pci_add_bus(struct pci_bus *bus)
 	union acpi_object *obj;
 	struct pci_host_bridge *bridge;
 
-	if (acpi_pci_disabled || !bus->bridge)
+	if (acpi_pci_disabled || !bus->bridge || !ACPI_HANDLE(bus->bridge))
 		return;
 
 	acpi_pci_slot_enumerate(bus);

From 731ccd90b8dc6697fefb62f43ed6f8d253d7fd5b Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Sat, 14 Jul 2018 01:28:15 +0900
Subject: [PATCH 265/519] ring_buffer: tracing: Inherit the tracing setting to
 next ring buffer

commit 73c8d8945505acdcbae137c2e00a1232e0be709f upstream.

Maintain the tracing on/off setting of the ring_buffer when switching
to the trace buffer snapshot.

Taking a snapshot is done by swapping the backup ring buffer
(max_tr_buffer). But since the tracing on/off setting is defined
by the ring buffer, when swapping it, the tracing on/off setting
can also be changed. This causes a strange result like below:

  /sys/kernel/debug/tracing # cat tracing_on
  1
  /sys/kernel/debug/tracing # echo 0 > tracing_on
  /sys/kernel/debug/tracing # cat tracing_on
  0
  /sys/kernel/debug/tracing # echo 1 > snapshot
  /sys/kernel/debug/tracing # cat tracing_on
  1
  /sys/kernel/debug/tracing # echo 1 > snapshot
  /sys/kernel/debug/tracing # cat tracing_on
  0

We don't touch tracing_on, but snapshot changes tracing_on
setting each time. This is an anomaly, because user doesn't know
that each "ring_buffer" stores its own tracing-enable state and
the snapshot is done by swapping ring buffers.

Link: http://lkml.kernel.org/r/153149929558.11274.11730609978254724394.stgit@devbox

Cc: Ingo Molnar <mingo@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Hiraku Toyooka <hiraku.toyooka@cybertrust.co.jp>
Cc: stable@vger.kernel.org
Fixes: debdd57f5145 ("tracing: Make a snapshot feature available from userspace")
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
[ Updated commit log and comment in the code ]
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/ring_buffer.h |  1 +
 kernel/trace/ring_buffer.c  | 16 ++++++++++++++++
 kernel/trace/trace.c        |  6 ++++++
 3 files changed, 23 insertions(+)

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 4acc552e9279..19d0778ec382 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -162,6 +162,7 @@ void ring_buffer_record_enable(struct ring_buffer *buffer);
 void ring_buffer_record_off(struct ring_buffer *buffer);
 void ring_buffer_record_on(struct ring_buffer *buffer);
 int ring_buffer_record_is_on(struct ring_buffer *buffer);
+int ring_buffer_record_is_set_on(struct ring_buffer *buffer);
 void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu);
 void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu);
 
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index d9cd6191760b..fdaa88f38aec 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3141,6 +3141,22 @@ int ring_buffer_record_is_on(struct ring_buffer *buffer)
 	return !atomic_read(&buffer->record_disabled);
 }
 
+/**
+ * ring_buffer_record_is_set_on - return true if the ring buffer is set writable
+ * @buffer: The ring buffer to see if write is set enabled
+ *
+ * Returns true if the ring buffer is set writable by ring_buffer_record_on().
+ * Note that this does NOT mean it is in a writable state.
+ *
+ * It may return true when the ring buffer has been disabled by
+ * ring_buffer_record_disable(), as that is a temporary disabling of
+ * the ring buffer.
+ */
+int ring_buffer_record_is_set_on(struct ring_buffer *buffer)
+{
+	return !(atomic_read(&buffer->record_disabled) & RB_BUFFER_OFF);
+}
+
 /**
  * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
  * @buffer: The ring buffer to stop writes to.
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8aef4e63ac57..1b980a8ef791 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1088,6 +1088,12 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 
 	arch_spin_lock(&tr->max_lock);
 
+	/* Inherit the recordable setting from trace_buffer */
+	if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
+		ring_buffer_record_on(tr->max_buffer.buffer);
+	else
+		ring_buffer_record_off(tr->max_buffer.buffer);
+
 	buf = tr->trace_buffer.buffer;
 	tr->trace_buffer.buffer = tr->max_buffer.buffer;
 	tr->max_buffer.buffer = buf;

From a8ec97dbac9027f2f4158aadf86010edc2a9ea5d Mon Sep 17 00:00:00 2001
From: Esben Haabendal <eha@deif.com>
Date: Mon, 9 Jul 2018 11:43:01 +0200
Subject: [PATCH 266/519] i2c: imx: Fix reinit_completion() use
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 9f9e3e0d4dd3338b3f3dde080789f71901e1e4ff upstream.

Make sure to call reinit_completion() before dma is started to avoid race
condition where reinit_completion() is called after complete() and before
wait_for_completion_timeout().

Signed-off-by: Esben Haabendal <eha@deif.com>
Fixes: ce1a78840ff7 ("i2c: imx: add DMA support for freescale i2c driver")
Reviewed-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Cc: stable@kernel.org
Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/i2c/busses/i2c-imx.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index d4d853680ae4..a4abf7dc9576 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -382,6 +382,7 @@ static int i2c_imx_dma_xfer(struct imx_i2c_struct *i2c_imx,
 		goto err_desc;
 	}
 
+	reinit_completion(&dma->cmd_complete);
 	txdesc->callback = i2c_imx_dma_callback;
 	txdesc->callback_param = i2c_imx;
 	if (dma_submit_error(dmaengine_submit(txdesc))) {
@@ -631,7 +632,6 @@ static int i2c_imx_dma_write(struct imx_i2c_struct *i2c_imx,
 	 * The first byte must be transmitted by the CPU.
 	 */
 	imx_i2c_write_reg(msgs->addr << 1, i2c_imx, IMX_I2C_I2DR);
-	reinit_completion(&i2c_imx->dma->cmd_complete);
 	time_left = wait_for_completion_timeout(
 				&i2c_imx->dma->cmd_complete,
 				msecs_to_jiffies(DMA_TIMEOUT));
@@ -690,7 +690,6 @@ static int i2c_imx_dma_read(struct imx_i2c_struct *i2c_imx,
 	if (result)
 		return result;
 
-	reinit_completion(&i2c_imx->dma->cmd_complete);
 	time_left = wait_for_completion_timeout(
 				&i2c_imx->dma->cmd_complete,
 				msecs_to_jiffies(DMA_TIMEOUT));

From 0749d5b3ec62310b747751ea7d4d5ccca51bc80f Mon Sep 17 00:00:00 2001
From: Shankara Pailoor <shankarapailoor@gmail.com>
Date: Tue, 5 Jun 2018 08:33:27 -0500
Subject: [PATCH 267/519] jfs: Fix inconsistency between memory allocation and
 ea_buf->max_size

commit 92d34134193e5b129dc24f8d79cb9196626e8d7a upstream.

The code is assuming the buffer is max_size length, but we weren't
allocating enough space for it.

Signed-off-by: Shankara Pailoor <shankarapailoor@gmail.com>
Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/jfs/xattr.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 48b15a6e5558..40a26a542341 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -493,15 +493,17 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size)
 	if (size > PSIZE) {
 		/*
 		 * To keep the rest of the code simple.  Allocate a
-		 * contiguous buffer to work with
+		 * contiguous buffer to work with. Make the buffer large
+		 * enough to make use of the whole extent.
 		 */
-		ea_buf->xattr = kmalloc(size, GFP_KERNEL);
+		ea_buf->max_size = (size + sb->s_blocksize - 1) &
+		    ~(sb->s_blocksize - 1);
+
+		ea_buf->xattr = kmalloc(ea_buf->max_size, GFP_KERNEL);
 		if (ea_buf->xattr == NULL)
 			return -ENOMEM;
 
 		ea_buf->flag = EA_MALLOC;
-		ea_buf->max_size = (size + sb->s_blocksize - 1) &
-		    ~(sb->s_blocksize - 1);
 
 		if (ea_size == 0)
 			return 0;

From 8404ae6c8c9ff23a06cf38112e83002e1088bfe1 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 9 Aug 2018 12:19:28 +0200
Subject: [PATCH 268/519] Linux 4.4.147

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 030f5af05f4e..ee92a12e3a4b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 4
-SUBLEVEL = 146
+SUBLEVEL = 147
 EXTRAVERSION =
 NAME = Blurry Fish Butt
 

From 7736fcede789b412ae1c5c2f12f9bef58903319c Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 28 Jul 2018 08:12:04 -0400
Subject: [PATCH 269/519] ext4: fix check to prevent initializing reserved
 inodes

commit 5012284700775a4e6e3fbe7eac4c543c4874b559 upstream.

Commit 8844618d8aa7: "ext4: only look at the bg_flags field if it is
valid" will complain if block group zero does not have the
EXT4_BG_INODE_ZEROED flag set.  Unfortunately, this is not correct,
since a freshly created file system has this flag cleared.  It gets
almost immediately after the file system is mounted read-write --- but
the following somewhat unlikely sequence will end up triggering a
false positive report of a corrupted file system:

   mkfs.ext4 /dev/vdc
   mount -o ro /dev/vdc /vdc
   mount -o remount,rw /dev/vdc

Instead, when initializing the inode table for block group zero, test
to make sure that itable_unused count is not too large, since that is
the case that will result in some or all of the reserved inodes
getting cleared.

This fixes the failures reported by Eric Whiteney when running
generic/230 and generic/231 in the the nojournal test case.

Fixes: 8844618d8aa7 ("ext4: only look at the bg_flags field if it is valid")
Reported-by: Eric Whitney <enwlinux@gmail.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/ialloc.c | 5 ++++-
 fs/ext4/super.c  | 8 +-------
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 041117fd8fd7..0963213e9cd3 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1308,7 +1308,10 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
 			    ext4_itable_unused_count(sb, gdp)),
 			    sbi->s_inodes_per_block);
 
-	if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) {
+	if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group) ||
+	    ((group == 0) && ((EXT4_INODES_PER_GROUP(sb) -
+			       ext4_itable_unused_count(sb, gdp)) <
+			      EXT4_FIRST_INO(sb)))) {
 		ext4_error(sb, "Something is wrong with group %u: "
 			   "used itable blocks: %d; "
 			   "itable unused count: %u",
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3e4d8ac1974e..8d18f6142da5 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2875,14 +2875,8 @@ static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
 		if (!gdp)
 			continue;
 
-		if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))
-			continue;
-		if (group != 0)
+		if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
 			break;
-		ext4_error(sb, "Inode table for bg 0 marked as "
-			   "needing zeroing");
-		if (sb->s_flags & MS_RDONLY)
-			return ngroups;
 	}
 
 	return group;

From 215f36e128f2b476cd3bfe91339a5e12b79d010c Mon Sep 17 00:00:00 2001
From: Tadeusz Struk <tadeusz.struk@intel.com>
Date: Tue, 22 May 2018 14:37:18 -0700
Subject: [PATCH 270/519] tpm: fix race condition in tpm_common_write()

commit 3ab2011ea368ec3433ad49e1b9e1c7b70d2e65df upstream.

There is a race condition in tpm_common_write function allowing
two threads on the same /dev/tpm<N>, or two different applications
on the same /dev/tpmrm<N> to overwrite each other commands/responses.
Fixed this by taking the priv->buffer_mutex early in the function.

Also converted the priv->data_pending from atomic to a regular size_t
type. There is no need for it to be atomic since it is only touched
under the protection of the priv->buffer_mutex.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable@vger.kernel.org
Signed-off-by: Tadeusz Struk <tadeusz.struk@intel.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/tpm/tpm-dev.c | 43 ++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 23 deletions(-)

diff --git a/drivers/char/tpm/tpm-dev.c b/drivers/char/tpm/tpm-dev.c
index 912ad30be585..4719aa781bf2 100644
--- a/drivers/char/tpm/tpm-dev.c
+++ b/drivers/char/tpm/tpm-dev.c
@@ -25,7 +25,7 @@ struct file_priv {
 	struct tpm_chip *chip;
 
 	/* Data passed to and from the tpm via the read/write calls */
-	atomic_t data_pending;
+	size_t data_pending;
 	struct mutex buffer_mutex;
 
 	struct timer_list user_read_timer;      /* user needs to claim result */
@@ -46,7 +46,7 @@ static void timeout_work(struct work_struct *work)
 	struct file_priv *priv = container_of(work, struct file_priv, work);
 
 	mutex_lock(&priv->buffer_mutex);
-	atomic_set(&priv->data_pending, 0);
+	priv->data_pending = 0;
 	memset(priv->data_buffer, 0, sizeof(priv->data_buffer));
 	mutex_unlock(&priv->buffer_mutex);
 }
@@ -72,7 +72,6 @@ static int tpm_open(struct inode *inode, struct file *file)
 	}
 
 	priv->chip = chip;
-	atomic_set(&priv->data_pending, 0);
 	mutex_init(&priv->buffer_mutex);
 	setup_timer(&priv->user_read_timer, user_reader_timeout,
 			(unsigned long)priv);
@@ -86,28 +85,24 @@ static ssize_t tpm_read(struct file *file, char __user *buf,
 			size_t size, loff_t *off)
 {
 	struct file_priv *priv = file->private_data;
-	ssize_t ret_size;
+	ssize_t ret_size = 0;
 	int rc;
 
 	del_singleshot_timer_sync(&priv->user_read_timer);
 	flush_work(&priv->work);
-	ret_size = atomic_read(&priv->data_pending);
-	if (ret_size > 0) {	/* relay data */
-		ssize_t orig_ret_size = ret_size;
-		if (size < ret_size)
-			ret_size = size;
+	mutex_lock(&priv->buffer_mutex);
 
-		mutex_lock(&priv->buffer_mutex);
+	if (priv->data_pending) {
+		ret_size = min_t(ssize_t, size, priv->data_pending);
 		rc = copy_to_user(buf, priv->data_buffer, ret_size);
-		memset(priv->data_buffer, 0, orig_ret_size);
+		memset(priv->data_buffer, 0, priv->data_pending);
 		if (rc)
 			ret_size = -EFAULT;
 
-		mutex_unlock(&priv->buffer_mutex);
+		priv->data_pending = 0;
 	}
 
-	atomic_set(&priv->data_pending, 0);
-
+	mutex_unlock(&priv->buffer_mutex);
 	return ret_size;
 }
 
@@ -118,18 +113,20 @@ static ssize_t tpm_write(struct file *file, const char __user *buf,
 	size_t in_size = size;
 	ssize_t out_size;
 
-	/* cannot perform a write until the read has cleared
-	   either via tpm_read or a user_read_timer timeout.
-	   This also prevents splitted buffered writes from blocking here.
-	*/
-	if (atomic_read(&priv->data_pending) != 0)
-		return -EBUSY;
-
 	if (in_size > TPM_BUFSIZE)
 		return -E2BIG;
 
 	mutex_lock(&priv->buffer_mutex);
 
+	/* Cannot perform a write until the read has cleared either via
+	 * tpm_read or a user_read_timer timeout. This also prevents split
+	 * buffered writes from blocking here.
+	 */
+	if (priv->data_pending != 0) {
+		mutex_unlock(&priv->buffer_mutex);
+		return -EBUSY;
+	}
+
 	if (copy_from_user
 	    (priv->data_buffer, (void __user *) buf, in_size)) {
 		mutex_unlock(&priv->buffer_mutex);
@@ -153,7 +150,7 @@ static ssize_t tpm_write(struct file *file, const char __user *buf,
 		return out_size;
 	}
 
-	atomic_set(&priv->data_pending, out_size);
+	priv->data_pending = out_size;
 	mutex_unlock(&priv->buffer_mutex);
 
 	/* Set a timeout by which the reader must come claim the result */
@@ -172,7 +169,7 @@ static int tpm_release(struct inode *inode, struct file *file)
 	del_singleshot_timer_sync(&priv->user_read_timer);
 	flush_work(&priv->work);
 	file->private_data = NULL;
-	atomic_set(&priv->data_pending, 0);
+	priv->data_pending = 0;
 	clear_bit(0, &priv->chip->is_open);
 	kfree(priv);
 	return 0;

From e424bee248c38266c6057d43f3e350072fc41c5d Mon Sep 17 00:00:00 2001
From: Thomas Egerer <hakke_007@gmx.de>
Date: Mon, 25 Jan 2016 12:58:44 +0100
Subject: [PATCH 271/519] ipv4+ipv6: Make INET*_ESP select CRYPTO_ECHAINIV

commit 32b6170ca59ccf07d0e394561e54b2cd9726038c upstream.

The ESP algorithms using CBC mode require echainiv. Hence INET*_ESP have
to select CRYPTO_ECHAINIV in order to work properly. This solves the
issues caused by a misconfiguration as described in [1].
The original approach, patching crypto/Kconfig was turned down by
Herbert Xu [2].

[1] https://lists.strongswan.org/pipermail/users/2015-December/009074.html
[2] http://marc.info/?l=linux-crypto-vger&m=145224655809562&w=2

Signed-off-by: Thomas Egerer <hakke_007@gmx.de>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
Cc: Yongqin Liu <yongqin.liu@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/Kconfig | 1 +
 net/ipv6/Kconfig | 1 +
 2 files changed, 2 insertions(+)

diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 93581bba8643..09d6c4a6b53d 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -354,6 +354,7 @@ config INET_ESP
 	select CRYPTO_CBC
 	select CRYPTO_SHA1
 	select CRYPTO_DES
+	select CRYPTO_ECHAINIV
 	---help---
 	  Support for IPsec ESP.
 
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 851d5c9e3ecc..0f50248bad17 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -69,6 +69,7 @@ config INET6_ESP
 	select CRYPTO_CBC
 	select CRYPTO_SHA1
 	select CRYPTO_DES
+	select CRYPTO_ECHAINIV
 	---help---
 	  Support for IPsec ESP.
 

From 1e4006421429ab672c62ab25afb3c39e6f4aa94f Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 20 Apr 2018 14:55:31 -0700
Subject: [PATCH 272/519] fork: unconditionally clear stack on fork

commit e01e80634ecdde1dd113ac43b3adad21b47f3957 upstream.

One of the classes of kernel stack content leaks[1] is exposing the
contents of prior heap or stack contents when a new process stack is
allocated.  Normally, those stacks are not zeroed, and the old contents
remain in place.  In the face of stack content exposure flaws, those
contents can leak to userspace.

Fixing this will make the kernel no longer vulnerable to these flaws, as
the stack will be wiped each time a stack is assigned to a new process.
There's not a meaningful change in runtime performance; it almost looks
like it provides a benefit.

Performing back-to-back kernel builds before:
	Run times: 157.86 157.09 158.90 160.94 160.80
	Mean: 159.12
	Std Dev: 1.54

and after:
	Run times: 159.31 157.34 156.71 158.15 160.81
	Mean: 158.46
	Std Dev: 1.46

Instead of making this a build or runtime config, Andy Lutomirski
recommended this just be enabled by default.

[1] A noisy search for many kinds of stack content leaks can be seen here:
https://cve.mitre.org/cgi-bin/cvekey.cgi?keyword=linux+kernel+stack+leak

I did some more with perf and cycle counts on running 100,000 execs of
/bin/true.

before:
Cycles: 218858861551 218853036130 214727610969 227656844122 224980542841
Mean:  221015379122.60
Std Dev: 4662486552.47

after:
Cycles: 213868945060 213119275204 211820169456 224426673259 225489986348
Mean:  217745009865.40
Std Dev: 5935559279.99

It continues to look like it's faster, though the deviation is rather
wide, but I'm not sure what I could do that would be less noisy.  I'm
open to ideas!

Link: http://lkml.kernel.org/r/20180221021659.GA37073@beast
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Cc: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
[ Srivatsa: Backported to 4.4.y ]
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Srinidhi Rao <srinidhir@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/thread_info.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index ff307b548ed3..646891f3bc1e 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -55,11 +55,7 @@ extern long do_no_restart_syscall(struct restart_block *parm);
 
 #ifdef __KERNEL__
 
-#ifdef CONFIG_DEBUG_STACK_USAGE
-# define THREADINFO_GFP		(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
-#else
-# define THREADINFO_GFP		(GFP_KERNEL | __GFP_NOTRACK)
-#endif
+#define THREADINFO_GFP		(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
 
 /*
  * flag set/clear/test wrappers

From a9252a70174362912fee1556f8c3a25d66cd7637 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sat, 28 Jul 2018 11:47:17 +0200
Subject: [PATCH 273/519] parisc: Enable CONFIG_MLONGCALLS by default

commit 66509a276c8c1d19ee3f661a41b418d101c57d29 upstream.

Enable the -mlong-calls compiler option by default, because otherwise in most
cases linking the vmlinux binary fails due to truncations of R_PARISC_PCREL22F
relocations. This fixes building the 64-bit defconfig.

Cc: stable@vger.kernel.org # 4.0+
Signed-off-by: Helge Deller <deller@gmx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/parisc/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 729f89163bc3..210b3d675261 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -177,7 +177,7 @@ config PREFETCH
 
 config MLONGCALLS
 	bool "Enable the -mlong-calls compiler option for big kernels"
-	def_bool y if (!MODULES)
+	default y
 	depends on PA8X00
 	help
 	  If you configure the kernel to include many drivers built-in instead

From 277b161b1a1d339985b4c24e796e86eae9511382 Mon Sep 17 00:00:00 2001
From: John David Anglin <dave.anglin@bell.net>
Date: Sun, 5 Aug 2018 13:30:31 -0400
Subject: [PATCH 274/519] parisc: Define mb() and add memory barriers to
 assembler unlock sequences
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit fedb8da96355f5f64353625bf96dc69423ad1826 upstream.

For years I thought all parisc machines executed loads and stores in
order. However, Jeff Law recently indicated on gcc-patches that this is
not correct. There are various degrees of out-of-order execution all the
way back to the PA7xxx processor series (hit-under-miss). The PA8xxx
series has full out-of-order execution for both integer operations, and
loads and stores.

This is described in the following article:
http://web.archive.org/web/20040214092531/http://www.cpus.hp.com/technical_references/advperf.shtml

For this reason, we need to define mb() and to insert a memory barrier
before the store unlocking spinlocks. This ensures that all memory
accesses are complete prior to unlocking. The ldcw instruction performs
the same function on entry.

Signed-off-by: John David Anglin <dave.anglin@bell.net>
Cc: stable@vger.kernel.org # 4.0+
Signed-off-by: Helge Deller <deller@gmx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/parisc/include/asm/barrier.h | 32 +++++++++++++++++++++++++++++++
 arch/parisc/kernel/entry.S        |  2 ++
 arch/parisc/kernel/pacache.S      |  1 +
 arch/parisc/kernel/syscall.S      |  4 ++++
 4 files changed, 39 insertions(+)
 create mode 100644 arch/parisc/include/asm/barrier.h

diff --git a/arch/parisc/include/asm/barrier.h b/arch/parisc/include/asm/barrier.h
new file mode 100644
index 000000000000..dbaaca84f27f
--- /dev/null
+++ b/arch/parisc/include/asm/barrier.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_BARRIER_H
+#define __ASM_BARRIER_H
+
+#ifndef __ASSEMBLY__
+
+/* The synchronize caches instruction executes as a nop on systems in
+   which all memory references are performed in order. */
+#define synchronize_caches() __asm__ __volatile__ ("sync" : : : "memory")
+
+#if defined(CONFIG_SMP)
+#define mb()		do { synchronize_caches(); } while (0)
+#define rmb()		mb()
+#define wmb()		mb()
+#define dma_rmb()	mb()
+#define dma_wmb()	mb()
+#else
+#define mb()		barrier()
+#define rmb()		barrier()
+#define wmb()		barrier()
+#define dma_rmb()	barrier()
+#define dma_wmb()	barrier()
+#endif
+
+#define __smp_mb()	mb()
+#define __smp_rmb()	mb()
+#define __smp_wmb()	mb()
+
+#include <asm-generic/barrier.h>
+
+#endif /* !__ASSEMBLY__ */
+#endif /* __ASM_BARRIER_H */
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index 5dc831955de5..13cb2461fef5 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -481,6 +481,8 @@
 	/* Release pa_tlb_lock lock without reloading lock address. */
 	.macro		tlb_unlock0	spc,tmp
 #ifdef CONFIG_SMP
+	or,COND(=)	%r0,\spc,%r0
+	sync
 	or,COND(=)	%r0,\spc,%r0
 	stw             \spc,0(\tmp)
 #endif
diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S
index 16073f472118..b3434a7fd3c9 100644
--- a/arch/parisc/kernel/pacache.S
+++ b/arch/parisc/kernel/pacache.S
@@ -354,6 +354,7 @@ ENDPROC(flush_data_cache_local)
 	.macro	tlb_unlock	la,flags,tmp
 #ifdef CONFIG_SMP
 	ldi		1,\tmp
+	sync
 	stw		\tmp,0(\la)
 	mtsm		\flags
 #endif
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index 9f22195b90ed..f68eedc72484 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -631,6 +631,7 @@ cas_action:
 	sub,<>	%r28, %r25, %r0
 2:	stw,ma	%r24, 0(%r26)
 	/* Free lock */
+	sync
 	stw,ma	%r20, 0(%sr2,%r20)
 #if ENABLE_LWS_DEBUG
 	/* Clear thread register indicator */
@@ -645,6 +646,7 @@ cas_action:
 3:		
 	/* Error occurred on load or store */
 	/* Free lock */
+	sync
 	stw	%r20, 0(%sr2,%r20)
 #if ENABLE_LWS_DEBUG
 	stw	%r0, 4(%sr2,%r20)
@@ -846,6 +848,7 @@ cas2_action:
 
 cas2_end:
 	/* Free lock */
+	sync
 	stw,ma	%r20, 0(%sr2,%r20)
 	/* Enable interrupts */
 	ssm	PSW_SM_I, %r0
@@ -856,6 +859,7 @@ cas2_end:
 22:
 	/* Error occurred on load or store */
 	/* Free lock */
+	sync
 	stw	%r20, 0(%sr2,%r20)
 	ssm	PSW_SM_I, %r0
 	ldo	1(%r0),%r28

From 6b1f6243b39c4f49d44bafa9e4639be4f124577f Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Thu, 9 Aug 2018 16:42:16 +0200
Subject: [PATCH 275/519] xen/netfront: don't cache skb_shinfo()

commit d472b3a6cf63cd31cae1ed61930f07e6cd6671b5 upstream.

skb_shinfo() can change when calling __pskb_pull_tail(): Don't cache
its return value.

Cc: stable@vger.kernel.org
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Wei Liu <wei.liu2@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/xen-netfront.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index bec9f099573b..68d0a5c9d437 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -879,7 +879,6 @@ static RING_IDX xennet_fill_frags(struct netfront_queue *queue,
 				  struct sk_buff *skb,
 				  struct sk_buff_head *list)
 {
-	struct skb_shared_info *shinfo = skb_shinfo(skb);
 	RING_IDX cons = queue->rx.rsp_cons;
 	struct sk_buff *nskb;
 
@@ -888,15 +887,16 @@ static RING_IDX xennet_fill_frags(struct netfront_queue *queue,
 			RING_GET_RESPONSE(&queue->rx, ++cons);
 		skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0];
 
-		if (shinfo->nr_frags == MAX_SKB_FRAGS) {
+		if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS) {
 			unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
 
 			BUG_ON(pull_to <= skb_headlen(skb));
 			__pskb_pull_tail(skb, pull_to - skb_headlen(skb));
 		}
-		BUG_ON(shinfo->nr_frags >= MAX_SKB_FRAGS);
+		BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS);
 
-		skb_add_rx_frag(skb, shinfo->nr_frags, skb_frag_page(nfrag),
+		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+				skb_frag_page(nfrag),
 				rx->offset, rx->status, PAGE_SIZE);
 
 		skb_shinfo(nskb)->nr_frags = 0;

From 277131baccf9c96e01d5ffdb0c6447770b634eae Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 26 Apr 2018 14:10:24 +0200
Subject: [PATCH 276/519] ACPI / LPSS: Add missing prv_offset setting for
 byt/cht PWM devices

commit fdcb613d49321b5bf5d5a1bd0fba8e7c241dcc70 upstream.

The LPSS PWM device on on Bay Trail and Cherry Trail devices has a set
of private registers at offset 0x800, the current lpss_device_desc for
them already sets the LPSS_SAVE_CTX flag to have these saved/restored
over device-suspend, but the current lpss_device_desc was not setting
the prv_offset field, leading to the regular device registers getting
saved/restored instead.

This is causing the PWM controller to no longer work, resulting in a black
screen,  after a suspend/resume on systems where the firmware clears the
APB clock and reset bits at offset 0x804.

This commit fixes this by properly setting prv_offset to 0x800 for
the PWM devices.

Cc: stable@vger.kernel.org
Fixes: e1c748179754 ("ACPI / LPSS: Add Intel BayTrail ACPI mode PWM")
Fixes: 1bfbd8eb8a7f ("ACPI / LPSS: Add ACPI IDs for Intel Braswell")
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Rafael J . Wysocki <rjw@rjwysocki.net>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/acpi_lpss.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index f9e0d09f7c66..8a0f77fb5181 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -154,10 +154,12 @@ static const struct lpss_device_desc lpt_sdio_dev_desc = {
 
 static const struct lpss_device_desc byt_pwm_dev_desc = {
 	.flags = LPSS_SAVE_CTX,
+	.prv_offset = 0x800,
 };
 
 static const struct lpss_device_desc bsw_pwm_dev_desc = {
 	.flags = LPSS_SAVE_CTX | LPSS_NO_D3_DELAY,
+	.prv_offset = 0x800,
 };
 
 static const struct lpss_device_desc byt_uart_dev_desc = {

From 6aef4c4a1690b0b371d88babc41a8a314d0fd3f9 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Thu, 2 Aug 2018 10:44:42 -0700
Subject: [PATCH 277/519] scsi: sr: Avoid that opening a CD-ROM hangs with
 runtime power management enabled

commit 1214fd7b497400d200e3f4e64e2338b303a20949 upstream.

Surround scsi_execute() calls with scsi_autopm_get_device() and
scsi_autopm_put_device(). Note: removing sr_mutex protection from the
scsi_cd_get() and scsi_cd_put() calls is safe because the purpose of
sr_mutex is to serialize cdrom_*() calls.

This patch avoids that complaints similar to the following appear in the
kernel log if runtime power management is enabled:

INFO: task systemd-udevd:650 blocked for more than 120 seconds.
     Not tainted 4.18.0-rc7-dbg+ #1
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
systemd-udevd   D28176   650    513 0x00000104
Call Trace:
__schedule+0x444/0xfe0
schedule+0x4e/0xe0
schedule_preempt_disabled+0x18/0x30
__mutex_lock+0x41c/0xc70
mutex_lock_nested+0x1b/0x20
__blkdev_get+0x106/0x970
blkdev_get+0x22c/0x5a0
blkdev_open+0xe9/0x100
do_dentry_open.isra.19+0x33e/0x570
vfs_open+0x7c/0xd0
path_openat+0x6e3/0x1120
do_filp_open+0x11c/0x1c0
do_sys_open+0x208/0x2d0
__x64_sys_openat+0x59/0x70
do_syscall_64+0x77/0x230
entry_SYSCALL_64_after_hwframe+0x49/0xbe

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Maurizio Lombardi <mlombard@redhat.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: <stable@vger.kernel.org>
Tested-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/sr.c | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index de53c9694b68..5dc288fecace 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -520,18 +520,26 @@ static int sr_init_command(struct scsi_cmnd *SCpnt)
 static int sr_block_open(struct block_device *bdev, fmode_t mode)
 {
 	struct scsi_cd *cd;
+	struct scsi_device *sdev;
 	int ret = -ENXIO;
 
+	cd = scsi_cd_get(bdev->bd_disk);
+	if (!cd)
+		goto out;
+
+	sdev = cd->device;
+	scsi_autopm_get_device(sdev);
 	check_disk_change(bdev);
 
 	mutex_lock(&sr_mutex);
-	cd = scsi_cd_get(bdev->bd_disk);
-	if (cd) {
-		ret = cdrom_open(&cd->cdi, bdev, mode);
-		if (ret)
-			scsi_cd_put(cd);
-	}
+	ret = cdrom_open(&cd->cdi, bdev, mode);
 	mutex_unlock(&sr_mutex);
+
+	scsi_autopm_put_device(sdev);
+	if (ret)
+		scsi_cd_put(cd);
+
+out:
 	return ret;
 }
 
@@ -559,6 +567,8 @@ static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 	if (ret)
 		goto out;
 
+	scsi_autopm_get_device(sdev);
+
 	/*
 	 * Send SCSI addressing ioctls directly to mid level, send other
 	 * ioctls to cdrom/block level.
@@ -567,15 +577,18 @@ static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 	case SCSI_IOCTL_GET_IDLUN:
 	case SCSI_IOCTL_GET_BUS_NUMBER:
 		ret = scsi_ioctl(sdev, cmd, argp);
-		goto out;
+		goto put;
 	}
 
 	ret = cdrom_ioctl(&cd->cdi, bdev, mode, cmd, arg);
 	if (ret != -ENOSYS)
-		goto out;
+		goto put;
 
 	ret = scsi_ioctl(sdev, cmd, argp);
 
+put:
+	scsi_autopm_put_device(sdev);
+
 out:
 	mutex_unlock(&sr_mutex);
 	return ret;

From ba744147871e7c6d3b6b60eede06f74a1a7abcd9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 6 Aug 2018 09:03:58 -0400
Subject: [PATCH 278/519] root dentries need RCU-delayed freeing

commit 90bad5e05bcdb0308cfa3d3a60f5c0b9c8e2efb3 upstream.

Since mountpoint crossing can happen without leaving lazy mode,
root dentries do need the same protection against having their
memory freed without RCU delay as everything else in the tree.

It's partially hidden by RCU delay between detaching from the
mount tree and dropping the vfsmount reference, but the starting
point of pathwalk can be on an already detached mount, in which
case umount-caused RCU delay has already passed by the time the
lazy pathwalk grabs rcu_read_lock().  If the starting point
happens to be at the root of that vfsmount *and* that vfsmount
covers the entire filesystem, we get trouble.

Fixes: 48a066e72d97 ("RCU'd vsfmounts")
Cc: stable@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/dcache.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 250c1222e30c..807efaab838e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1954,10 +1954,12 @@ struct dentry *d_make_root(struct inode *root_inode)
 		static const struct qstr name = QSTR_INIT("/", 1);
 
 		res = __d_alloc(root_inode->i_sb, &name);
-		if (res)
+		if (res) {
+			res->d_flags |= DCACHE_RCUACCESS;
 			d_instantiate(res, root_inode);
-		else
+		} else {
 			iput(root_inode);
+		}
 	}
 	return res;
 }

From a3ababd599e72b9b92420c159564684fcbfa489f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 9 Aug 2018 17:21:17 -0400
Subject: [PATCH 279/519] fix mntput/mntput race

commit 9ea0a46ca2c318fcc449c1e6b62a7230a17888f1 upstream.

mntput_no_expire() does the calculation of total refcount under mount_lock;
unfortunately, the decrement (as well as all increments) are done outside
of it, leading to false positives in the "are we dropping the last reference"
test.  Consider the following situation:
	* mnt is a lazy-umounted mount, kept alive by two opened files.  One
of those files gets closed.  Total refcount of mnt is 2.  On CPU 42
mntput(mnt) (called from __fput()) drops one reference, decrementing component
	* After it has looked at component #0, the process on CPU 0 does
mntget(), incrementing component #0, gets preempted and gets to run again -
on CPU 69.  There it does mntput(), which drops the reference (component #69)
and proceeds to spin on mount_lock.
	* On CPU 42 our first mntput() finishes counting.  It observes the
decrement of component #69, but not the increment of component #0.  As the
result, the total it gets is not 1 as it should've been - it's 0.  At which
point we decide that vfsmount needs to be killed and proceed to free it and
shut the filesystem down.  However, there's still another opened file
on that filesystem, with reference to (now freed) vfsmount, etc. and we are
screwed.

It's not a wide race, but it can be reproduced with artificial slowdown of
the mnt_get_count() loop, and it should be easier to hit on SMP KVM setups.

Fix consists of moving the refcount decrement under mount_lock; the tricky
part is that we want (and can) keep the fast case (i.e. mount that still
has non-NULL ->mnt_ns) entirely out of mount_lock.  All places that zero
mnt->mnt_ns are dropping some reference to mnt and they call synchronize_rcu()
before that mntput().  IOW, if mntput() observes (under rcu_read_lock())
a non-NULL ->mnt_ns, it is guaranteed that there is another reference yet to
be dropped.

Reported-by: Jann Horn <jannh@google.com>
Tested-by: Jann Horn <jannh@google.com>
Fixes: 48a066e72d97 ("RCU'd vsfmounts")
Cc: stable@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/namespace.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index a879560ea144..643b7eecf7b7 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1124,12 +1124,22 @@ static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
 static void mntput_no_expire(struct mount *mnt)
 {
 	rcu_read_lock();
-	mnt_add_count(mnt, -1);
-	if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */
+	if (likely(READ_ONCE(mnt->mnt_ns))) {
+		/*
+		 * Since we don't do lock_mount_hash() here,
+		 * ->mnt_ns can change under us.  However, if it's
+		 * non-NULL, then there's a reference that won't
+		 * be dropped until after an RCU delay done after
+		 * turning ->mnt_ns NULL.  So if we observe it
+		 * non-NULL under rcu_read_lock(), the reference
+		 * we are dropping is not the final one.
+		 */
+		mnt_add_count(mnt, -1);
 		rcu_read_unlock();
 		return;
 	}
 	lock_mount_hash();
+	mnt_add_count(mnt, -1);
 	if (mnt_get_count(mnt)) {
 		rcu_read_unlock();
 		unlock_mount_hash();

From b9341f5aebd89f46d2cda7dd9c39aabc0a559bdb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 9 Aug 2018 17:51:32 -0400
Subject: [PATCH 280/519] fix __legitimize_mnt()/mntput() race

commit 119e1ef80ecfe0d1deb6378d4ab41f5b71519de1 upstream.

__legitimize_mnt() has two problems - one is that in case of success
the check of mount_lock is not ordered wrt preceding increment of
refcount, making it possible to have successful __legitimize_mnt()
on one CPU just before the otherwise final mntpu() on another,
with __legitimize_mnt() not seeing mntput() taking the lock and
mntput() not seeing the increment done by __legitimize_mnt().
Solved by a pair of barriers.

Another is that failure of __legitimize_mnt() on the second
read_seqretry() leaves us with reference that'll need to be
dropped by caller; however, if that races with final mntput()
we can end up with caller dropping rcu_read_lock() and doing
mntput() to release that reference - with the first mntput()
having freed the damn thing just as rcu_read_lock() had been
dropped.  Solution: in "do mntput() yourself" failure case
grab mount_lock, check if MNT_DOOMED has been set by racing
final mntput() that has missed our increment and if it has -
undo the increment and treat that as "failure, caller doesn't
need to drop anything" case.

It's not easy to hit - the final mntput() has to come right
after the first read_seqretry() in __legitimize_mnt() *and*
manage to miss the increment done by __legitimize_mnt() before
the second read_seqretry() in there.  The things that are almost
impossible to hit on bare hardware are not impossible on SMP
KVM, though...

Reported-by: Oleg Nesterov <oleg@redhat.com>
Fixes: 48a066e72d97 ("RCU'd vsfmounts")
Cc: stable@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/namespace.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/fs/namespace.c b/fs/namespace.c
index 643b7eecf7b7..b56b50e3da11 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -603,12 +603,21 @@ int __legitimize_mnt(struct vfsmount *bastard, unsigned seq)
 		return 0;
 	mnt = real_mount(bastard);
 	mnt_add_count(mnt, 1);
+	smp_mb();			// see mntput_no_expire()
 	if (likely(!read_seqretry(&mount_lock, seq)))
 		return 0;
 	if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
 		mnt_add_count(mnt, -1);
 		return 1;
 	}
+	lock_mount_hash();
+	if (unlikely(bastard->mnt_flags & MNT_DOOMED)) {
+		mnt_add_count(mnt, -1);
+		unlock_mount_hash();
+		return 1;
+	}
+	unlock_mount_hash();
+	/* caller will mntput() */
 	return -1;
 }
 
@@ -1139,6 +1148,11 @@ static void mntput_no_expire(struct mount *mnt)
 		return;
 	}
 	lock_mount_hash();
+	/*
+	 * make sure that if __legitimize_mnt() has not seen us grab
+	 * mount_lock, we'll see their refcount increment here.
+	 */
+	smp_mb();
 	mnt_add_count(mnt, -1);
 	if (mnt_get_count(mnt)) {
 		rcu_read_unlock();

From 01b377d3f0d286d071f46c30586cb261c79559f7 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Wed, 23 May 2018 15:30:30 +0300
Subject: [PATCH 281/519] IB/core: Make testing MR flags for writability a
 static inline function

commit 08bb558ac11ab944e0539e78619d7b4c356278bd upstream.

Make the MR writability flags check, which is performed in umem.c,
a static inline function in file ib_verbs.h

This allows the function to be used by low-level infiniband drivers.

Cc: <stable@vger.kernel.org>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/core/umem.c | 11 +----------
 include/rdma/ib_verbs.h        | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 6790ebb366dd..98fd9a594841 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -122,16 +122,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 	umem->address   = addr;
 	umem->page_size = PAGE_SIZE;
 	umem->pid       = get_task_pid(current, PIDTYPE_PID);
-	/*
-	 * We ask for writable memory if any of the following
-	 * access flags are set.  "Local write" and "remote write"
-	 * obviously require write access.  "Remote atomic" can do
-	 * things like fetch and add, which will modify memory, and
-	 * "MW bind" can change permissions by binding a window.
-	 */
-	umem->writable  = !!(access &
-		(IB_ACCESS_LOCAL_WRITE   | IB_ACCESS_REMOTE_WRITE |
-		 IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND));
+	umem->writable   = ib_access_writable(access);
 
 	if (access & IB_ACCESS_ON_DEMAND) {
 		put_pid(umem->pid);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 120da1d7f57e..10fefb0dc640 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -3007,6 +3007,20 @@ static inline int ib_check_mr_access(int flags)
 	return 0;
 }
 
+static inline bool ib_access_writable(int access_flags)
+{
+	/*
+	 * We have writable memory backing the MR if any of the following
+	 * access flags are set.  "Local write" and "remote write" obviously
+	 * require write access.  "Remote atomic" can do things like fetch and
+	 * add, which will modify memory, and "MW bind" can change permissions
+	 * by binding a window.
+	 */
+	return access_flags &
+		(IB_ACCESS_LOCAL_WRITE   | IB_ACCESS_REMOTE_WRITE |
+		 IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND);
+}
+
 /**
  * ib_check_mr_status: lightweight check of MR status.
  *     This routine may provide status checks on a selected

From d803aa2fe665f2dca0e46cefca982ad5c537ca7e Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Wed, 23 May 2018 15:30:31 +0300
Subject: [PATCH 282/519] IB/mlx4: Mark user MR as writable if actual virtual
 memory is writable

commit d8f9cc328c8888369880e2527e9186d745f2bbf6 upstream.

To allow rereg_user_mr to modify the MR from read-only to writable without
using get_user_pages again, we needed to define the initial MR as writable.
However, this was originally done unconditionally, without taking into
account the writability of the underlying virtual memory.

As a result, any attempt to register a read-only MR over read-only
virtual memory failed.

To fix this, do not add the writable flag bit when the user virtual memory
is not writable (e.g. const memory).

However, when the underlying memory is NOT writable (and we therefore
do not define the initial MR as writable), the IB core adds a
"force writable" flag to its user-pages request. If this succeeds,
the reg_user_mr caller gets a writable copy of the original pages.

If the user-space caller then does a rereg_user_mr operation to enable
writability, this will succeed. This should not be allowed, since
the original virtual memory was not writable.

Cc: <stable@vger.kernel.org>
Fixes: 9376932d0c26 ("IB/mlx4_ib: Add support for user MR re-registration")
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/hw/mlx4/mr.c | 50 +++++++++++++++++++++++++++------
 1 file changed, 42 insertions(+), 8 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index ce87e9cc7eff..bf52e35dd506 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -130,6 +130,40 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
 	return err;
 }
 
+static struct ib_umem *mlx4_get_umem_mr(struct ib_ucontext *context, u64 start,
+					u64 length, u64 virt_addr,
+					int access_flags)
+{
+	/*
+	 * Force registering the memory as writable if the underlying pages
+	 * are writable.  This is so rereg can change the access permissions
+	 * from readable to writable without having to run through ib_umem_get
+	 * again
+	 */
+	if (!ib_access_writable(access_flags)) {
+		struct vm_area_struct *vma;
+
+		down_read(&current->mm->mmap_sem);
+		/*
+		 * FIXME: Ideally this would iterate over all the vmas that
+		 * cover the memory, but for now it requires a single vma to
+		 * entirely cover the MR to support RO mappings.
+		 */
+		vma = find_vma(current->mm, start);
+		if (vma && vma->vm_end >= start + length &&
+		    vma->vm_start <= start) {
+			if (vma->vm_flags & VM_WRITE)
+				access_flags |= IB_ACCESS_LOCAL_WRITE;
+		} else {
+			access_flags |= IB_ACCESS_LOCAL_WRITE;
+		}
+
+		up_read(&current->mm->mmap_sem);
+	}
+
+	return ib_umem_get(context, start, length, access_flags, 0);
+}
+
 struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 				  u64 virt_addr, int access_flags,
 				  struct ib_udata *udata)
@@ -144,10 +178,8 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 	if (!mr)
 		return ERR_PTR(-ENOMEM);
 
-	/* Force registering the memory as writable. */
-	/* Used for memory re-registeration. HCA protects the access */
-	mr->umem = ib_umem_get(pd->uobject->context, start, length,
-			       access_flags | IB_ACCESS_LOCAL_WRITE, 0);
+	mr->umem = mlx4_get_umem_mr(pd->uobject->context, start, length,
+				    virt_addr, access_flags);
 	if (IS_ERR(mr->umem)) {
 		err = PTR_ERR(mr->umem);
 		goto err_free;
@@ -214,6 +246,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
 	}
 
 	if (flags & IB_MR_REREG_ACCESS) {
+		if (ib_access_writable(mr_access_flags) && !mmr->umem->writable)
+			return -EPERM;
+
 		err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry,
 					       convert_access(mr_access_flags));
 
@@ -227,10 +262,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
 
 		mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
 		ib_umem_release(mmr->umem);
-		mmr->umem = ib_umem_get(mr->uobject->context, start, length,
-					mr_access_flags |
-					IB_ACCESS_LOCAL_WRITE,
-					0);
+		mmr->umem =
+			mlx4_get_umem_mr(mr->uobject->context, start, length,
+					 virt_addr, mr_access_flags);
 		if (IS_ERR(mmr->umem)) {
 			err = PTR_ERR(mmr->umem);
 			/* Prevent mlx4_ib_dereg_mr from free'ing invalid pointer */

From 45c679be34ac44ad24bc7abf60193b9f43a83490 Mon Sep 17 00:00:00 2001
From: Michael Mera <dev@michaelmera.com>
Date: Mon, 1 May 2017 15:41:16 +0900
Subject: [PATCH 283/519] IB/ocrdma: fix out of bounds access to local buffer

commit 062d0f22a30c39840ea49b72cfcfc1aa4cc538fa upstream.

In write to debugfs file 'resource_stats' the local buffer 'tmp_str' is
written at index 'count-1' where 'count' is the size of the write, so
potentially 0.

This patch filters odd values for the write size/position to avoid this
type of problem.

Signed-off-by: Michael Mera <dev@michaelmera.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/hw/ocrdma/ocrdma_stats.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
index 748b63b86cbc..40242ead096f 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
@@ -643,7 +643,7 @@ static ssize_t ocrdma_dbgfs_ops_write(struct file *filp,
 	struct ocrdma_stats *pstats = filp->private_data;
 	struct ocrdma_dev *dev = pstats->dev;
 
-	if (count > 32)
+	if (*ppos != 0 || count == 0 || count > sizeof(tmp_str))
 		goto err;
 
 	if (copy_from_user(tmp_str, buffer, count))

From 916a57896e00d4f92318c8ff5a7b8ca07e4e95a7 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Fri, 15 Jun 2018 09:41:29 +0200
Subject: [PATCH 284/519] ARM: dts: imx6sx: fix irq for pcie bridge

commit 1bcfe0564044be578841744faea1c2f46adc8178 upstream.

Use the correct IRQ line for the MSI controller in the PCIe host
controller. Apparently a different IRQ line is used compared to other
i.MX6 variants. Without this change MSI IRQs aren't properly propagated
to the upstream interrupt controller.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Lucas Stach <l.stach@pengutronix.de>
Fixes: b1d17f68e5c5 ("ARM: dts: imx: add initial imx6sx device tree source")
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/imx6sx.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi
index 167f77b3bd43..6963dff815dc 100644
--- a/arch/arm/boot/dts/imx6sx.dtsi
+++ b/arch/arm/boot/dts/imx6sx.dtsi
@@ -1250,7 +1250,7 @@ pcie: pcie@0x08000000 {
 				  /* non-prefetchable memory */
 				  0x82000000 0 0x08000000 0x08000000 0 0x00f00000>;
 			num-lanes = <1>;
-			interrupts = <GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>;
 			clocks = <&clks IMX6SX_CLK_PCIE_REF_125M>,
 				 <&clks IMX6SX_CLK_PCIE_AXI>,
 				 <&clks IMX6SX_CLK_LVDS1_OUT>,

From 8dbce8a2e9cfc8e026565d75f7cb950393d04159 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 3 Aug 2018 16:41:39 +0200
Subject: [PATCH 285/519] x86/paravirt: Fix spectre-v2 mitigations for paravirt
 guests

commit 5800dc5c19f34e6e03b5adab1282535cb102fafd upstream.

Nadav reported that on guests we're failing to rewrite the indirect
calls to CALLEE_SAVE paravirt functions. In particular the
pv_queued_spin_unlock() call is left unpatched and that is all over the
place. This obviously wrecks Spectre-v2 mitigation (for paravirt
guests) which relies on not actually having indirect calls around.

The reason is an incorrect clobber test in paravirt_patch_call(); this
function rewrites an indirect call with a direct call to the _SAME_
function, there is no possible way the clobbers can be different
because of this.

Therefore remove this clobber check. Also put WARNs on the other patch
failure case (not enough room for the instruction) which I've not seen
trigger in my (limited) testing.

Three live kernel image disassemblies for lock_sock_nested (as a small
function that illustrates the problem nicely). PRE is the current
situation for guests, POST is with this patch applied and NATIVE is with
or without the patch for !guests.

PRE:

(gdb) disassemble lock_sock_nested
Dump of assembler code for function lock_sock_nested:
   0xffffffff817be970 <+0>:     push   %rbp
   0xffffffff817be971 <+1>:     mov    %rdi,%rbp
   0xffffffff817be974 <+4>:     push   %rbx
   0xffffffff817be975 <+5>:     lea    0x88(%rbp),%rbx
   0xffffffff817be97c <+12>:    callq  0xffffffff819f7160 <_cond_resched>
   0xffffffff817be981 <+17>:    mov    %rbx,%rdi
   0xffffffff817be984 <+20>:    callq  0xffffffff819fbb00 <_raw_spin_lock_bh>
   0xffffffff817be989 <+25>:    mov    0x8c(%rbp),%eax
   0xffffffff817be98f <+31>:    test   %eax,%eax
   0xffffffff817be991 <+33>:    jne    0xffffffff817be9ba <lock_sock_nested+74>
   0xffffffff817be993 <+35>:    movl   $0x1,0x8c(%rbp)
   0xffffffff817be99d <+45>:    mov    %rbx,%rdi
   0xffffffff817be9a0 <+48>:    callq  *0xffffffff822299e8
   0xffffffff817be9a7 <+55>:    pop    %rbx
   0xffffffff817be9a8 <+56>:    pop    %rbp
   0xffffffff817be9a9 <+57>:    mov    $0x200,%esi
   0xffffffff817be9ae <+62>:    mov    $0xffffffff817be993,%rdi
   0xffffffff817be9b5 <+69>:    jmpq   0xffffffff81063ae0 <__local_bh_enable_ip>
   0xffffffff817be9ba <+74>:    mov    %rbp,%rdi
   0xffffffff817be9bd <+77>:    callq  0xffffffff817be8c0 <__lock_sock>
   0xffffffff817be9c2 <+82>:    jmp    0xffffffff817be993 <lock_sock_nested+35>
End of assembler dump.

POST:

(gdb) disassemble lock_sock_nested
Dump of assembler code for function lock_sock_nested:
   0xffffffff817be970 <+0>:     push   %rbp
   0xffffffff817be971 <+1>:     mov    %rdi,%rbp
   0xffffffff817be974 <+4>:     push   %rbx
   0xffffffff817be975 <+5>:     lea    0x88(%rbp),%rbx
   0xffffffff817be97c <+12>:    callq  0xffffffff819f7160 <_cond_resched>
   0xffffffff817be981 <+17>:    mov    %rbx,%rdi
   0xffffffff817be984 <+20>:    callq  0xffffffff819fbb00 <_raw_spin_lock_bh>
   0xffffffff817be989 <+25>:    mov    0x8c(%rbp),%eax
   0xffffffff817be98f <+31>:    test   %eax,%eax
   0xffffffff817be991 <+33>:    jne    0xffffffff817be9ba <lock_sock_nested+74>
   0xffffffff817be993 <+35>:    movl   $0x1,0x8c(%rbp)
   0xffffffff817be99d <+45>:    mov    %rbx,%rdi
   0xffffffff817be9a0 <+48>:    callq  0xffffffff810a0c20 <__raw_callee_save___pv_queued_spin_unlock>
   0xffffffff817be9a5 <+53>:    xchg   %ax,%ax
   0xffffffff817be9a7 <+55>:    pop    %rbx
   0xffffffff817be9a8 <+56>:    pop    %rbp
   0xffffffff817be9a9 <+57>:    mov    $0x200,%esi
   0xffffffff817be9ae <+62>:    mov    $0xffffffff817be993,%rdi
   0xffffffff817be9b5 <+69>:    jmpq   0xffffffff81063aa0 <__local_bh_enable_ip>
   0xffffffff817be9ba <+74>:    mov    %rbp,%rdi
   0xffffffff817be9bd <+77>:    callq  0xffffffff817be8c0 <__lock_sock>
   0xffffffff817be9c2 <+82>:    jmp    0xffffffff817be993 <lock_sock_nested+35>
End of assembler dump.

NATIVE:

(gdb) disassemble lock_sock_nested
Dump of assembler code for function lock_sock_nested:
   0xffffffff817be970 <+0>:     push   %rbp
   0xffffffff817be971 <+1>:     mov    %rdi,%rbp
   0xffffffff817be974 <+4>:     push   %rbx
   0xffffffff817be975 <+5>:     lea    0x88(%rbp),%rbx
   0xffffffff817be97c <+12>:    callq  0xffffffff819f7160 <_cond_resched>
   0xffffffff817be981 <+17>:    mov    %rbx,%rdi
   0xffffffff817be984 <+20>:    callq  0xffffffff819fbb00 <_raw_spin_lock_bh>
   0xffffffff817be989 <+25>:    mov    0x8c(%rbp),%eax
   0xffffffff817be98f <+31>:    test   %eax,%eax
   0xffffffff817be991 <+33>:    jne    0xffffffff817be9ba <lock_sock_nested+74>
   0xffffffff817be993 <+35>:    movl   $0x1,0x8c(%rbp)
   0xffffffff817be99d <+45>:    mov    %rbx,%rdi
   0xffffffff817be9a0 <+48>:    movb   $0x0,(%rdi)
   0xffffffff817be9a3 <+51>:    nopl   0x0(%rax)
   0xffffffff817be9a7 <+55>:    pop    %rbx
   0xffffffff817be9a8 <+56>:    pop    %rbp
   0xffffffff817be9a9 <+57>:    mov    $0x200,%esi
   0xffffffff817be9ae <+62>:    mov    $0xffffffff817be993,%rdi
   0xffffffff817be9b5 <+69>:    jmpq   0xffffffff81063ae0 <__local_bh_enable_ip>
   0xffffffff817be9ba <+74>:    mov    %rbp,%rdi
   0xffffffff817be9bd <+77>:    callq  0xffffffff817be8c0 <__lock_sock>
   0xffffffff817be9c2 <+82>:    jmp    0xffffffff817be993 <lock_sock_nested+35>
End of assembler dump.


Fixes: 63f70270ccd9 ("[PATCH] i386: PARAVIRT: add common patching machinery")
Fixes: 3010a0663fd9 ("x86/paravirt, objtool: Annotate indirect calls")
Reported-by: Nadav Amit <namit@vmware.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Juergen Gross <jgross@suse.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/paravirt.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index f534a0e3af53..632195b41688 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -97,10 +97,12 @@ unsigned paravirt_patch_call(void *insnbuf,
 	struct branch *b = insnbuf;
 	unsigned long delta = (unsigned long)target - (addr+5);
 
-	if (tgt_clobbers & ~site_clobbers)
-		return len;	/* target would clobber too much for this site */
-	if (len < 5)
+	if (len < 5) {
+#ifdef CONFIG_RETPOLINE
+		WARN_ONCE("Failing to patch indirect CALL in %ps\n", (void *)addr);
+#endif
 		return len;	/* call too long for patch site */
+	}
 
 	b->opcode = 0xe8; /* call */
 	b->delta = delta;
@@ -115,8 +117,12 @@ unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
 	struct branch *b = insnbuf;
 	unsigned long delta = (unsigned long)target - (addr+5);
 
-	if (len < 5)
+	if (len < 5) {
+#ifdef CONFIG_RETPOLINE
+		WARN_ONCE("Failing to patch indirect JMP in %ps\n", (void *)addr);
+#endif
 		return len;	/* call too long for patch site */
+	}
 
 	b->opcode = 0xe9;	/* jmp */
 	b->delta = delta;

From 7744abbe29a59db367f59b0c9890356732f25a3b Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Thu, 26 Jul 2018 13:14:55 +0200
Subject: [PATCH 286/519] x86/speculation: Protect against userspace-userspace
 spectreRSB

commit fdf82a7856b32d905c39afc85e34364491e46346 upstream.

The article "Spectre Returns! Speculation Attacks using the Return Stack
Buffer" [1] describes two new (sub-)variants of spectrev2-like attacks,
making use solely of the RSB contents even on CPUs that don't fallback to
BTB on RSB underflow (Skylake+).

Mitigate userspace-userspace attacks by always unconditionally filling RSB on
context switch when the generic spectrev2 mitigation has been enabled.

[1] https://arxiv.org/pdf/1807.07940.pdf

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/nycvar.YFH.7.76.1807261308190.997@cbobk.fhfr.pm
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/bugs.c | 38 +++++++-------------------------------
 1 file changed, 7 insertions(+), 31 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 12a8867071f3..7688ce0b26c5 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -309,23 +309,6 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
 	return cmd;
 }
 
-/* Check for Skylake-like CPUs (for RSB handling) */
-static bool __init is_skylake_era(void)
-{
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
-	    boot_cpu_data.x86 == 6) {
-		switch (boot_cpu_data.x86_model) {
-		case INTEL_FAM6_SKYLAKE_MOBILE:
-		case INTEL_FAM6_SKYLAKE_DESKTOP:
-		case INTEL_FAM6_SKYLAKE_X:
-		case INTEL_FAM6_KABYLAKE_MOBILE:
-		case INTEL_FAM6_KABYLAKE_DESKTOP:
-			return true;
-		}
-	}
-	return false;
-}
-
 static void __init spectre_v2_select_mitigation(void)
 {
 	enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@@ -386,22 +369,15 @@ static void __init spectre_v2_select_mitigation(void)
 	pr_info("%s\n", spectre_v2_strings[mode]);
 
 	/*
-	 * If neither SMEP nor PTI are available, there is a risk of
-	 * hitting userspace addresses in the RSB after a context switch
-	 * from a shallow call stack to a deeper one. To prevent this fill
-	 * the entire RSB, even when using IBRS.
+	 * If spectre v2 protection has been enabled, unconditionally fill
+	 * RSB during a context switch; this protects against two independent
+	 * issues:
 	 *
-	 * Skylake era CPUs have a separate issue with *underflow* of the
-	 * RSB, when they will predict 'ret' targets from the generic BTB.
-	 * The proper mitigation for this is IBRS. If IBRS is not supported
-	 * or deactivated in favour of retpolines the RSB fill on context
-	 * switch is required.
+	 *	- RSB underflow (and switch to BTB) on Skylake+
+	 *	- SpectreRSB variant of spectre v2 on X86_BUG_SPECTRE_V2 CPUs
 	 */
-	if ((!boot_cpu_has(X86_FEATURE_KAISER) &&
-	     !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
-		setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
-		pr_info("Spectre v2 mitigation: Filling RSB on context switch\n");
-	}
+	setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
+	pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
 
 	/* Initialize Indirect Branch Prediction Barrier if supported */
 	if (boot_cpu_has(X86_FEATURE_IBPB)) {

From 866234c373a0f34774d5dcb3886a6c982397bbc9 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Sat, 28 Apr 2018 21:37:03 +0900
Subject: [PATCH 287/519] kprobes/x86: Fix %p uses in error messages

commit 0ea063306eecf300fcf06d2f5917474b580f666f upstream.

Remove all %p uses in error messages in kprobes/x86.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: David Howells <dhowells@redhat.com>
Cc: David S . Miller <davem@davemloft.net>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Jon Medhurst <tixy@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thomas Richter <tmricht@linux.ibm.com>
Cc: Tobin C . Harding <me@tobin.cc>
Cc: Will Deacon <will.deacon@arm.com>
Cc: acme@kernel.org
Cc: akpm@linux-foundation.org
Cc: brueckner@linux.vnet.ibm.com
Cc: linux-arch@vger.kernel.org
Cc: rostedt@goodmis.org
Cc: schwidefsky@de.ibm.com
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/lkml/152491902310.9916.13355297638917767319.stgit@devbox
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/kprobes/core.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 1f5c47a49e35..c6f466d6cc57 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -393,7 +393,6 @@ int __copy_instruction(u8 *dest, u8 *src)
 		newdisp = (u8 *) src + (s64) insn.displacement.value - (u8 *) dest;
 		if ((s64) (s32) newdisp != newdisp) {
 			pr_err("Kprobes error: new displacement does not fit into s32 (%llx)\n", newdisp);
-			pr_err("\tSrc: %p, Dest: %p, old disp: %x\n", src, dest, insn.displacement.value);
 			return 0;
 		}
 		disp = (u8 *) dest + insn_offset_displacement(&insn);
@@ -609,8 +608,7 @@ static int reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
 		 * Raise a BUG or we'll continue in an endless reentering loop
 		 * and eventually a stack overflow.
 		 */
-		printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n",
-		       p->addr);
+		pr_err("Unrecoverable kprobe detected.\n");
 		dump_kprobe(p);
 		BUG();
 	default:

From ec5aa64fec7206537442a2f3cb67decabad252f4 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Fri, 3 Aug 2018 10:05:50 -0700
Subject: [PATCH 288/519] x86/irqflags: Provide a declaration for
 native_save_fl

commit 208cbb32558907f68b3b2a081ca2337ac3744794 upstream.

It was reported that the commit d0a8d9378d16 is causing users of gcc < 4.9
to observe -Werror=missing-prototypes errors.

Indeed, it seems that:
extern inline unsigned long native_save_fl(void) { return 0; }

compiled with -Werror=missing-prototypes produces this warning in gcc <
4.9, but not gcc >= 4.9.

Fixes: d0a8d9378d16 ("x86/paravirt: Make native_save_fl() extern inline").
Reported-by: David Laight <david.laight@aculab.com>
Reported-by: Jean Delvare <jdelvare@suse.de>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: hpa@zytor.com
Cc: jgross@suse.com
Cc: kstewart@linuxfoundation.org
Cc: gregkh@linuxfoundation.org
Cc: boris.ostrovsky@oracle.com
Cc: astrachan@google.com
Cc: mka@chromium.org
Cc: arnd@arndb.de
Cc: tstellar@redhat.com
Cc: sedat.dilek@gmail.com
Cc: David.Laight@aculab.com
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20180803170550.164688-1-ndesaulniers@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/irqflags.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 0056bc945cd1..cb7f04981c6b 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -8,6 +8,8 @@
  * Interrupt control:
  */
 
+/* Declaration required for gcc < 4.9 to prevent -Werror=missing-prototypes */
+extern inline unsigned long native_save_fl(void);
 extern inline unsigned long native_save_fl(void)
 {
 	unsigned long flags;

From 90a231c63cc28d896ab353b027011a949e9884d3 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 13 Jun 2018 15:48:21 -0700
Subject: [PATCH 289/519] x86/speculation/l1tf: Increase 32bit PAE
 __PHYSICAL_PAGE_SHIFT

commit 50896e180c6aa3a9c61a26ced99e15d602666a4c upstream

L1 Terminal Fault (L1TF) is a speculation related vulnerability. The CPU
speculates on PTE entries which do not have the PRESENT bit set, if the
content of the resulting physical address is available in the L1D cache.

The OS side mitigation makes sure that a !PRESENT PTE entry points to a
physical address outside the actually existing and cachable memory
space. This is achieved by inverting the upper bits of the PTE. Due to the
address space limitations this only works for 64bit and 32bit PAE kernels,
but not for 32bit non PAE.

This mitigation applies to both host and guest kernels, but in case of a
64bit host (hypervisor) and a 32bit PAE guest, inverting the upper bits of
the PAE address space (44bit) is not enough if the host has more than 43
bits of populated memory address space, because the speculation treats the
PTE content as a physical host address bypassing EPT.

The host (hypervisor) protects itself against the guest by flushing L1D as
needed, but pages inside the guest are not protected against attacks from
other processes inside the same guest.

For the guest the inverted PTE mask has to match the host to provide the
full protection for all pages the host could possibly map into the
guest. The hosts populated address space is not known to the guest, so the
mask must cover the possible maximal host address space, i.e. 52 bit.

On 32bit PAE the maximum PTE mask is currently set to 44 bit because that
is the limit imposed by 32bit unsigned long PFNs in the VMs. This limits
the mask to be below what the host could possible use for physical pages.

The L1TF PROT_NONE protection code uses the PTE masks to determine which
bits to invert to make sure the higher bits are set for unmapped entries to
prevent L1TF speculation attacks against EPT inside guests.

In order to invert all bits that could be used by the host, increase
__PHYSICAL_PAGE_SHIFT to 52 to match 64bit.

The real limit for a 32bit PAE kernel is still 44 bits because all Linux
PTEs are created from unsigned long PFNs, so they cannot be higher than 44
bits on a 32bit kernel. So these extra PFN bits should be never set. The
only users of this macro are using it to look at PTEs, so it's safe.

[ tglx: Massaged changelog ]

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/page_32_types.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index 3a52ee0e726d..bfceb5cc6347 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -27,8 +27,13 @@
 #define N_EXCEPTION_STACKS 1
 
 #ifdef CONFIG_X86_PAE
-/* 44=32+12, the limit we can fit into an unsigned long pfn */
-#define __PHYSICAL_MASK_SHIFT	44
+/*
+ * This is beyond the 44 bit limit imposed by the 32bit long pfns,
+ * but we need the full mask to make sure inverted PROT_NONE
+ * entries have all the host bits set in a guest.
+ * The real limit is still 44 bits.
+ */
+#define __PHYSICAL_MASK_SHIFT	52
 #define __VIRTUAL_MASK_SHIFT	32
 
 #else  /* !CONFIG_X86_PAE */

From 0a5deacaac102f451bf8c1fb9d007047fcd712f6 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Thu, 7 Jul 2016 17:19:11 -0700
Subject: [PATCH 290/519] x86/mm: Move swap offset/type up in PTE to work
 around erratum

commit 00839ee3b299303c6a5e26a0a2485427a3afcbbf upstream

This erratum can result in Accessed/Dirty getting set by the hardware
when we do not expect them to be (on !Present PTEs).

Instead of trying to fix them up after this happens, we just
allow the bits to get set and try to ignore them.  We do this by
shifting the layout of the bits we use for swap offset/type in
our 64-bit PTEs.

It looks like this:

 bitnrs: |     ...            | 11| 10|  9|8|7|6|5| 4| 3|2|1|0|
 names:  |     ...            |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P|
 before: |         OFFSET (9-63)          |0|X|X| TYPE(1-5) |0|
  after: | OFFSET (14-63)  |  TYPE (9-13) |0|X|X|X| X| X|X|X|0|

Note that D was already a don't care (X) even before.  We just
move TYPE up and turn its old spot (which could be hit by the
A bit) into all don't cares.

We take 5 bits away from the offset, but that still leaves us
with 50 bits which lets us index into a 62-bit swapfile (4 EiB).
I think that's probably fine for the moment.  We could
theoretically reclaim 5 of the bits (1, 2, 3, 4, 7) but it
doesn't gain us anything.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: dave.hansen@intel.com
Cc: linux-mm@kvack.org
Cc: mhocko@suse.com
Link: http://lkml.kernel.org/r/20160708001911.9A3FD2B6@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/pgtable_64.h | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index c810226e741a..225405b690b8 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -163,18 +163,32 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
 #define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
 #define pte_unmap(pte) ((void)(pte))/* NOP */
 
-/* Encode and de-code a swap entry */
+/*
+ * Encode and de-code a swap entry
+ *
+ * |     ...            | 11| 10|  9|8|7|6|5| 4| 3|2|1|0| <- bit number
+ * |     ...            |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names
+ * | OFFSET (14->63) | TYPE (10-13) |0|X|X|X| X| X|X|X|0| <- swp entry
+ *
+ * G (8) is aliased and used as a PROT_NONE indicator for
+ * !present ptes.  We need to start storing swap entries above
+ * there.  We also need to avoid using A and D because of an
+ * erratum where they can be incorrectly set by hardware on
+ * non-present PTEs.
+ */
+#define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1)
 #define SWP_TYPE_BITS 5
-#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
+/* Place the offset above the type: */
+#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS + 1)
 
 #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
 
-#define __swp_type(x)			(((x).val >> (_PAGE_BIT_PRESENT + 1)) \
+#define __swp_type(x)			(((x).val >> (SWP_TYPE_FIRST_BIT)) \
 					 & ((1U << SWP_TYPE_BITS) - 1))
-#define __swp_offset(x)			((x).val >> SWP_OFFSET_SHIFT)
+#define __swp_offset(x)			((x).val >> SWP_OFFSET_FIRST_BIT)
 #define __swp_entry(type, offset)	((swp_entry_t) { \
-					 ((type) << (_PAGE_BIT_PRESENT + 1)) \
-					 | ((offset) << SWP_OFFSET_SHIFT) })
+					 ((type) << (SWP_TYPE_FIRST_BIT)) \
+					 | ((offset) << SWP_OFFSET_FIRST_BIT) })
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val((pte)) })
 #define __swp_entry_to_pte(x)		((pte_t) { .pte = (x).val })
 

From f487cf69cf1456ceb34857a50474373aae42dd8a Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Wed, 10 Aug 2016 10:23:25 -0700
Subject: [PATCH 291/519] x86/mm: Fix swap entry comment and macro

commit ace7fab7a6cdd363a615ec537f2aa94dbc761ee2 upstream

A recent patch changed the format of a swap PTE.

The comment explaining the format of the swap PTE is wrong about
the bits used for the swap type field.  Amusingly, the ASCII art
and the patch description are correct, but the comment itself
is wrong.

As I was looking at this, I also noticed that the
SWP_OFFSET_FIRST_BIT has an off-by-one error.  This does not
really hurt anything.  It just wasted a bit of space in the PTE,
giving us 2^59 bytes of addressable space in our swapfiles
instead of 2^60.  But, it doesn't match with the comments, and it
wastes a bit of space, so fix it.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hp.com>
Fixes: 00839ee3b299 ("x86/mm: Move swap offset/type up in PTE to work around erratum")
Link: http://lkml.kernel.org/r/20160810172325.E56AD7DA@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/pgtable_64.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 225405b690b8..ce97c8c6a310 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -168,7 +168,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
  *
  * |     ...            | 11| 10|  9|8|7|6|5| 4| 3|2|1|0| <- bit number
  * |     ...            |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names
- * | OFFSET (14->63) | TYPE (10-13) |0|X|X|X| X| X|X|X|0| <- swp entry
+ * | OFFSET (14->63) | TYPE (9-13)  |0|X|X|X| X| X|X|X|0| <- swp entry
  *
  * G (8) is aliased and used as a PROT_NONE indicator for
  * !present ptes.  We need to start storing swap entries above
@@ -179,7 +179,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
 #define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1)
 #define SWP_TYPE_BITS 5
 /* Place the offset above the type: */
-#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS + 1)
+#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS)
 
 #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
 

From 86b0948d7c546feb01cd2d7ac2bfb15476e6e974 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Fri, 8 Sep 2017 16:10:46 -0700
Subject: [PATCH 292/519] mm: x86: move _PAGE_SWP_SOFT_DIRTY from bit 7 to bit
 1

commit eee4818baac0f2b37848fdf90e4b16430dc536ac upstream

_PAGE_PSE is used to distinguish between a truly non-present
(_PAGE_PRESENT=0) PMD, and a PMD which is undergoing a THP split and
should be treated as present.

But _PAGE_SWP_SOFT_DIRTY currently uses the _PAGE_PSE bit, which would
cause confusion between one of those PMDs undergoing a THP split, and a
soft-dirty PMD.  Dropping _PAGE_PSE check in pmd_present() does not work
well, because it can hurt optimization of tlb handling in thp split.

Thus, we need to move the bit.

In the current kernel, bits 1-4 are not used in non-present format since
commit 00839ee3b299 ("x86/mm: Move swap offset/type up in PTE to work
around erratum").  So let's move _PAGE_SWP_SOFT_DIRTY to bit 1.  Bit 7
is used as reserved (always clear), so please don't use it for other
purpose.

[dwmw2: Pulled in to 4.9 backport to support L1TF changes]

Link: http://lkml.kernel.org/r/20170717193955.20207-3-zi.yan@sent.com
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Zi Yan <zi.yan@cs.rutgers.edu>
Acked-by: Dave Hansen <dave.hansen@intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Cc: David Nellans <dnellans@nvidia.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/pgtable_64.h    | 12 +++++++++---
 arch/x86/include/asm/pgtable_types.h | 10 +++++-----
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index ce97c8c6a310..008e1a58f96c 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -166,15 +166,21 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
 /*
  * Encode and de-code a swap entry
  *
- * |     ...            | 11| 10|  9|8|7|6|5| 4| 3|2|1|0| <- bit number
- * |     ...            |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names
- * | OFFSET (14->63) | TYPE (9-13)  |0|X|X|X| X| X|X|X|0| <- swp entry
+ * |     ...            | 11| 10|  9|8|7|6|5| 4| 3|2| 1|0| <- bit number
+ * |     ...            |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names
+ * | OFFSET (14->63) | TYPE (9-13)  |0|0|X|X| X| X|X|SD|0| <- swp entry
  *
  * G (8) is aliased and used as a PROT_NONE indicator for
  * !present ptes.  We need to start storing swap entries above
  * there.  We also need to avoid using A and D because of an
  * erratum where they can be incorrectly set by hardware on
  * non-present PTEs.
+ *
+ * SD (1) in swp entry is used to store soft dirty bit, which helps us
+ * remember soft dirty over page migration
+ *
+ * Bit 7 in swp entry should be 0 because pmd_present checks not only P,
+ * but also L and G.
  */
 #define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1)
 #define SWP_TYPE_BITS 5
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 8dba273da25a..7572ce32055e 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -70,15 +70,15 @@
 /*
  * Tracking soft dirty bit when a page goes to a swap is tricky.
  * We need a bit which can be stored in pte _and_ not conflict
- * with swap entry format. On x86 bits 6 and 7 are *not* involved
- * into swap entry computation, but bit 6 is used for nonlinear
- * file mapping, so we borrow bit 7 for soft dirty tracking.
+ * with swap entry format. On x86 bits 1-4 are *not* involved
+ * into swap entry computation, but bit 7 is used for thp migration,
+ * so we borrow bit 1 for soft dirty tracking.
  *
  * Please note that this bit must be treated as swap dirty page
- * mark if and only if the PTE has present bit clear!
+ * mark if and only if the PTE/PMD has present bit clear!
  */
 #ifdef CONFIG_MEM_SOFT_DIRTY
-#define _PAGE_SWP_SOFT_DIRTY	_PAGE_PSE
+#define _PAGE_SWP_SOFT_DIRTY	_PAGE_RW
 #else
 #define _PAGE_SWP_SOFT_DIRTY	(_AT(pteval_t, 0))
 #endif

From 614f5e84640e382b9916b6f606328191ed0264b3 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 13 Jun 2018 15:48:22 -0700
Subject: [PATCH 293/519] x86/speculation/l1tf: Change order of offset/type in
 swap entry

commit bcd11afa7adad8d720e7ba5ef58bdcd9775cf45f upstream

If pages are swapped out, the swap entry is stored in the corresponding
PTE, which has the Present bit cleared. CPUs vulnerable to L1TF speculate
on PTE entries which have the present bit set and would treat the swap
entry as phsyical address (PFN). To mitigate that the upper bits of the PTE
must be set so the PTE points to non existent memory.

The swap entry stores the type and the offset of a swapped out page in the
PTE. type is stored in bit 9-13 and offset in bit 14-63. The hardware
ignores the bits beyond the phsyical address space limit, so to make the
mitigation effective its required to start 'offset' at the lowest possible
bit so that even large swap offsets do not reach into the physical address
space limit bits.

Move offset to bit 9-58 and type to bit 59-63 which are the bits that
hardware generally doesn't care about.

That, in turn, means that if you on desktop chip with only 40 bits of
physical addressing, now that the offset starts at bit 9, there needs to be
30 bits of offset actually *in use* until bit 39 ends up being set, which
means when inverted it will again point into existing memory.

So that's 4 terabyte of swap space (because the offset is counted in pages,
so 30 bits of offset is 42 bits of actual coverage). With bigger physical
addressing, that obviously grows further, until the limit of the offset is
hit (at 50 bits of offset - 62 bits of actual swap file coverage).

This is a preparatory change for the actual swap entry inversion to protect
against L1TF.

[ AK: Updated description and minor tweaks. Split into two parts ]
[ tglx: Massaged changelog ]

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/pgtable_64.h | 31 ++++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 008e1a58f96c..a72c2ab24006 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -168,7 +168,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
  *
  * |     ...            | 11| 10|  9|8|7|6|5| 4| 3|2| 1|0| <- bit number
  * |     ...            |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names
- * | OFFSET (14->63) | TYPE (9-13)  |0|0|X|X| X| X|X|SD|0| <- swp entry
+ * | TYPE (59-63) |  OFFSET (9-58)  |0|0|X|X| X| X|X|SD|0| <- swp entry
  *
  * G (8) is aliased and used as a PROT_NONE indicator for
  * !present ptes.  We need to start storing swap entries above
@@ -182,19 +182,28 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
  * Bit 7 in swp entry should be 0 because pmd_present checks not only P,
  * but also L and G.
  */
-#define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1)
-#define SWP_TYPE_BITS 5
-/* Place the offset above the type: */
-#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS)
+#define SWP_TYPE_BITS		5
+
+#define SWP_OFFSET_FIRST_BIT	(_PAGE_BIT_PROTNONE + 1)
+
+/* We always extract/encode the offset by shifting it all the way up, and then down again */
+#define SWP_OFFSET_SHIFT	(SWP_OFFSET_FIRST_BIT+SWP_TYPE_BITS)
 
 #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
 
-#define __swp_type(x)			(((x).val >> (SWP_TYPE_FIRST_BIT)) \
-					 & ((1U << SWP_TYPE_BITS) - 1))
-#define __swp_offset(x)			((x).val >> SWP_OFFSET_FIRST_BIT)
-#define __swp_entry(type, offset)	((swp_entry_t) { \
-					 ((type) << (SWP_TYPE_FIRST_BIT)) \
-					 | ((offset) << SWP_OFFSET_FIRST_BIT) })
+/* Extract the high bits for type */
+#define __swp_type(x) ((x).val >> (64 - SWP_TYPE_BITS))
+
+/* Shift up (to get rid of type), then down to get value */
+#define __swp_offset(x) ((x).val << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT)
+
+/*
+ * Shift the offset up "too far" by TYPE bits, then down again
+ */
+#define __swp_entry(type, offset) ((swp_entry_t) { \
+	((unsigned long)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \
+	| ((unsigned long)(type) << (64-SWP_TYPE_BITS)) })
+
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val((pte)) })
 #define __swp_entry_to_pte(x)		((pte_t) { .pte = (x).val })
 

From 9bbdab847fc9a0b8cf23fa7354e1210f0b492821 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 13 Jun 2018 15:48:23 -0700
Subject: [PATCH 294/519] x86/speculation/l1tf: Protect swap entries against
 L1TF

commit 2f22b4cd45b67b3496f4aa4c7180a1271c6452f6 upstream

With L1 terminal fault the CPU speculates into unmapped PTEs, and resulting
side effects allow to read the memory the PTE is pointing too, if its
values are still in the L1 cache.

For swapped out pages Linux uses unmapped PTEs and stores a swap entry into
them.

To protect against L1TF it must be ensured that the swap entry is not
pointing to valid memory, which requires setting higher bits (between bit
36 and bit 45) that are inside the CPUs physical address space, but outside
any real memory.

To do this invert the offset to make sure the higher bits are always set,
as long as the swap file is not too big.

Note there is no workaround for 32bit !PAE, or on systems which have more
than MAX_PA/2 worth of memory. The later case is very unlikely to happen on
real systems.

[AK: updated description and minor tweaks by. Split out from the original
     patch ]

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/pgtable_64.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index a72c2ab24006..67f2fe43a593 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -168,7 +168,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
  *
  * |     ...            | 11| 10|  9|8|7|6|5| 4| 3|2| 1|0| <- bit number
  * |     ...            |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names
- * | TYPE (59-63) |  OFFSET (9-58)  |0|0|X|X| X| X|X|SD|0| <- swp entry
+ * | TYPE (59-63) | ~OFFSET (9-58)  |0|0|X|X| X| X|X|SD|0| <- swp entry
  *
  * G (8) is aliased and used as a PROT_NONE indicator for
  * !present ptes.  We need to start storing swap entries above
@@ -181,6 +181,9 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
  *
  * Bit 7 in swp entry should be 0 because pmd_present checks not only P,
  * but also L and G.
+ *
+ * The offset is inverted by a binary not operation to make the high
+ * physical bits set.
  */
 #define SWP_TYPE_BITS		5
 
@@ -195,13 +198,15 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
 #define __swp_type(x) ((x).val >> (64 - SWP_TYPE_BITS))
 
 /* Shift up (to get rid of type), then down to get value */
-#define __swp_offset(x) ((x).val << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT)
+#define __swp_offset(x) (~(x).val << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT)
 
 /*
  * Shift the offset up "too far" by TYPE bits, then down again
+ * The offset is inverted by a binary not operation to make the high
+ * physical bits set.
  */
 #define __swp_entry(type, offset) ((swp_entry_t) { \
-	((unsigned long)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \
+	(~(unsigned long)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \
 	| ((unsigned long)(type) << (64-SWP_TYPE_BITS)) })
 
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val((pte)) })

From 9ee2d2da676c48a459a99f10f45c71ffca8761a8 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 13 Jun 2018 15:48:24 -0700
Subject: [PATCH 295/519] x86/speculation/l1tf: Protect PROT_NONE PTEs against
 speculation

commit 6b28baca9b1f0d4a42b865da7a05b1c81424bd5c upstream

When PTEs are set to PROT_NONE the kernel just clears the Present bit and
preserves the PFN, which creates attack surface for L1TF speculation
speculation attacks.

This is important inside guests, because L1TF speculation bypasses physical
page remapping. While the host has its own migitations preventing leaking
data from other VMs into the guest, this would still risk leaking the wrong
page inside the current guest.

This uses the same technique as Linus' swap entry patch: while an entry is
is in PROTNONE state invert the complete PFN part part of it. This ensures
that the the highest bit will point to non existing memory.

The invert is done by pte/pmd_modify and pfn/pmd/pud_pte for PROTNONE and
pte/pmd/pud_pfn undo it.

This assume that no code path touches the PFN part of a PTE directly
without using these primitives.

This doesn't handle the case that MMIO is on the top of the CPU physical
memory. If such an MMIO region was exposed by an unpriviledged driver for
mmap it would be possible to attack some real memory.  However this
situation is all rather unlikely.

For 32bit non PAE the inversion is not done because there are really not
enough bits to protect anything.

Q: Why does the guest need to be protected when the HyperVisor already has
   L1TF mitigations?

A: Here's an example:

   Physical pages 1 2 get mapped into a guest as
   GPA 1 -> PA 2
   GPA 2 -> PA 1
   through EPT.

   The L1TF speculation ignores the EPT remapping.

   Now the guest kernel maps GPA 1 to process A and GPA 2 to process B, and
   they belong to different users and should be isolated.

   A sets the GPA 1 PA 2 PTE to PROT_NONE to bypass the EPT remapping and
   gets read access to the underlying physical page. Which in this case
   points to PA 2, so it can read process B's data, if it happened to be in
   L1, so isolation inside the guest is broken.

   There's nothing the hypervisor can do about this. This mitigation has to
   be done in the guest itself.

[ tglx: Massaged changelog ]
[ dwmw2: backported to 4.9 ]

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/pgtable-2level.h | 17 ++++++++++++
 arch/x86/include/asm/pgtable-3level.h |  2 ++
 arch/x86/include/asm/pgtable-invert.h | 32 ++++++++++++++++++++++
 arch/x86/include/asm/pgtable.h        | 38 +++++++++++++++++++--------
 arch/x86/include/asm/pgtable_64.h     |  2 ++
 5 files changed, 80 insertions(+), 11 deletions(-)
 create mode 100644 arch/x86/include/asm/pgtable-invert.h

diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index fd74a11959de..89c50332a71e 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -77,4 +77,21 @@ static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshi
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { (pte).pte_low })
 #define __swp_entry_to_pte(x)		((pte_t) { .pte = (x).val })
 
+/* No inverted PFNs on 2 level page tables */
+
+static inline u64 protnone_mask(u64 val)
+{
+	return 0;
+}
+
+static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask)
+{
+	return val;
+}
+
+static inline bool __pte_needs_invert(u64 val)
+{
+	return false;
+}
+
 #endif /* _ASM_X86_PGTABLE_2LEVEL_H */
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index cdaa58c9b39e..0c89891c7b44 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -184,4 +184,6 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp)
 #define __pte_to_swp_entry(pte)		((swp_entry_t){ (pte).pte_high })
 #define __swp_entry_to_pte(x)		((pte_t){ { .pte_high = (x).val } })
 
+#include <asm/pgtable-invert.h>
+
 #endif /* _ASM_X86_PGTABLE_3LEVEL_H */
diff --git a/arch/x86/include/asm/pgtable-invert.h b/arch/x86/include/asm/pgtable-invert.h
new file mode 100644
index 000000000000..177564187fc0
--- /dev/null
+++ b/arch/x86/include/asm/pgtable-invert.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_PGTABLE_INVERT_H
+#define _ASM_PGTABLE_INVERT_H 1
+
+#ifndef __ASSEMBLY__
+
+static inline bool __pte_needs_invert(u64 val)
+{
+	return (val & (_PAGE_PRESENT|_PAGE_PROTNONE)) == _PAGE_PROTNONE;
+}
+
+/* Get a mask to xor with the page table entry to get the correct pfn. */
+static inline u64 protnone_mask(u64 val)
+{
+	return __pte_needs_invert(val) ?  ~0ull : 0;
+}
+
+static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask)
+{
+	/*
+	 * When a PTE transitions from NONE to !NONE or vice-versa
+	 * invert the PFN part to stop speculation.
+	 * pte_pfn undoes this when needed.
+	 */
+	if (__pte_needs_invert(oldval) != __pte_needs_invert(val))
+		val = (val & ~mask) | (~val & mask);
+	return val;
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 84c62d950023..2ed1556d99b1 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -148,19 +148,29 @@ static inline int pte_special(pte_t pte)
 	return pte_flags(pte) & _PAGE_SPECIAL;
 }
 
+/* Entries that were set to PROT_NONE are inverted */
+
+static inline u64 protnone_mask(u64 val);
+
 static inline unsigned long pte_pfn(pte_t pte)
 {
-	return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT;
+	unsigned long pfn = pte_val(pte);
+	pfn ^= protnone_mask(pfn);
+	return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT;
 }
 
 static inline unsigned long pmd_pfn(pmd_t pmd)
 {
-	return (pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT;
+	unsigned long pfn = pmd_val(pmd);
+	pfn ^= protnone_mask(pfn);
+	return (pfn & pmd_pfn_mask(pmd)) >> PAGE_SHIFT;
 }
 
 static inline unsigned long pud_pfn(pud_t pud)
 {
-	return (pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT;
+	unsigned long pfn = pud_val(pud);
+	pfn ^= protnone_mask(pfn);
+	return (pfn & pud_pfn_mask(pud)) >> PAGE_SHIFT;
 }
 
 #define pte_page(pte)	pfn_to_page(pte_pfn(pte))
@@ -359,19 +369,25 @@ static inline pgprotval_t massage_pgprot(pgprot_t pgprot)
 
 static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
 {
-	return __pte(((phys_addr_t)page_nr << PAGE_SHIFT) |
-		     massage_pgprot(pgprot));
+	phys_addr_t pfn = page_nr << PAGE_SHIFT;
+	pfn ^= protnone_mask(pgprot_val(pgprot));
+	pfn &= PTE_PFN_MASK;
+	return __pte(pfn | massage_pgprot(pgprot));
 }
 
 static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
 {
-	return __pmd(((phys_addr_t)page_nr << PAGE_SHIFT) |
-		     massage_pgprot(pgprot));
+	phys_addr_t pfn = page_nr << PAGE_SHIFT;
+	pfn ^= protnone_mask(pgprot_val(pgprot));
+	pfn &= PHYSICAL_PMD_PAGE_MASK;
+	return __pmd(pfn | massage_pgprot(pgprot));
 }
 
+static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
+
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
-	pteval_t val = pte_val(pte);
+	pteval_t val = pte_val(pte), oldval = val;
 
 	/*
 	 * Chop off the NX bit (if present), and add the NX portion of
@@ -379,17 +395,17 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 	 */
 	val &= _PAGE_CHG_MASK;
 	val |= massage_pgprot(newprot) & ~_PAGE_CHG_MASK;
-
+	val = flip_protnone_guard(oldval, val, PTE_PFN_MASK);
 	return __pte(val);
 }
 
 static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
 {
-	pmdval_t val = pmd_val(pmd);
+	pmdval_t val = pmd_val(pmd), oldval = val;
 
 	val &= _HPAGE_CHG_MASK;
 	val |= massage_pgprot(newprot) & ~_HPAGE_CHG_MASK;
-
+	val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK);
 	return __pmd(val);
 }
 
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 67f2fe43a593..221a32ed1372 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -235,6 +235,8 @@ extern void cleanup_highmap(void);
 extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
 extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
 
+#include <asm/pgtable-invert.h>
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_X86_PGTABLE_64_H */

From 52dc5c9f8eee1c569974308f0bb7be64ec63565c Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 13 Jun 2018 15:48:25 -0700
Subject: [PATCH 296/519] x86/speculation/l1tf: Make sure the first page is
 always reserved

commit 10a70416e1f067f6c4efda6ffd8ea96002ac4223 upstream

The L1TF workaround doesn't make any attempt to mitigate speculate accesses
to the first physical page for zeroed PTEs. Normally it only contains some
data from the early real mode BIOS.

It's not entirely clear that the first page is reserved in all
configurations, so add an extra reservation call to make sure it is really
reserved. In most configurations (e.g.  with the standard reservations)
it's likely a nop.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/setup.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index bbaae4cf9e8e..31c4bc0d3372 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -851,6 +851,12 @@ void __init setup_arch(char **cmdline_p)
 	memblock_reserve(__pa_symbol(_text),
 			 (unsigned long)__bss_stop - (unsigned long)_text);
 
+	/*
+	 * Make sure page 0 is always reserved because on systems with
+	 * L1TF its contents can be leaked to user processes.
+	 */
+	memblock_reserve(0, PAGE_SIZE);
+
 	early_reserve_initrd();
 
 	/*

From bf0cca01b8736a5e146a980434ba36eb036e37ac Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 13 Jun 2018 15:48:26 -0700
Subject: [PATCH 297/519] x86/speculation/l1tf: Add sysfs reporting for l1tf

commit 17dbca119312b4e8173d4e25ff64262119fcef38 upstream

L1TF core kernel workarounds are cheap and normally always enabled, However
they still should be reported in sysfs if the system is vulnerable or
mitigated. Add the necessary CPU feature/bug bits.

- Extend the existing checks for Meltdowns to determine if the system is
  vulnerable. All CPUs which are not vulnerable to Meltdown are also not
  vulnerable to L1TF

- Check for 32bit non PAE and emit a warning as there is no practical way
  for mitigation due to the limited physical address bits

- If the system has more than MAX_PA/2 physical memory the invert page
  workarounds don't protect the system against the L1TF attack anymore,
  because an inverted physical address will also point to valid
  memory. Print a warning in this case and report that the system is
  vulnerable.

Add a function which returns the PFN limit for the L1TF mitigation, which
will be used in follow up patches for sanity and range checks.

[ tglx: Renamed the CPU feature bit to L1TF_PTEINV ]
[ dwmw2: Backport to 4.9 (cpufeatures.h, E820) ]

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/cpufeatures.h |  3 ++-
 arch/x86/include/asm/processor.h   |  5 ++++
 arch/x86/kernel/cpu/bugs.c         | 40 ++++++++++++++++++++++++++++++
 arch/x86/kernel/cpu/common.c       | 20 +++++++++++++++
 drivers/base/cpu.c                 |  8 ++++++
 include/linux/cpu.h                |  2 ++
 6 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index f4b175db70f4..f3a8479c0d87 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -214,7 +214,7 @@
 #define X86_FEATURE_IBPB	( 7*32+26) /* Indirect Branch Prediction Barrier */
 #define X86_FEATURE_STIBP	( 7*32+27) /* Single Thread Indirect Branch Predictors */
 #define X86_FEATURE_ZEN		( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
-
+#define X86_FEATURE_L1TF_PTEINV	( 7*32+29) /* "" L1TF workaround PTE inversion */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
@@ -331,5 +331,6 @@
 #define X86_BUG_SPECTRE_V1	X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
 #define X86_BUG_SPECTRE_V2	X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
 #define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */
+#define X86_BUG_L1TF		X86_BUG(18) /* CPU is affected by L1 Terminal Fault */
 
 #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 8e415cf65457..a3a53955f01c 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -172,6 +172,11 @@ extern const struct seq_operations cpuinfo_op;
 
 extern void cpu_detect(struct cpuinfo_x86 *c);
 
+static inline unsigned long l1tf_pfn_limit(void)
+{
+	return BIT(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT) - 1;
+}
+
 extern void early_cpu_init(void);
 extern void identify_boot_cpu(void);
 extern void identify_secondary_cpu(struct cpuinfo_x86 *);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 7688ce0b26c5..ce8d0abf3d35 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -26,9 +26,11 @@
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
 #include <asm/intel-family.h>
+#include <asm/e820.h>
 
 static void __init spectre_v2_select_mitigation(void);
 static void __init ssb_select_mitigation(void);
+static void __init l1tf_select_mitigation(void);
 
 /*
  * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any
@@ -80,6 +82,8 @@ void __init check_bugs(void)
 	 */
 	ssb_select_mitigation();
 
+	l1tf_select_mitigation();
+
 #ifdef CONFIG_X86_32
 	/*
 	 * Check whether we are able to run this kernel safely on SMP.
@@ -203,6 +207,32 @@ static void x86_amd_ssb_disable(void)
 		wrmsrl(MSR_AMD64_LS_CFG, msrval);
 }
 
+static void __init l1tf_select_mitigation(void)
+{
+	u64 half_pa;
+
+	if (!boot_cpu_has_bug(X86_BUG_L1TF))
+		return;
+
+#if CONFIG_PGTABLE_LEVELS == 2
+	pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n");
+	return;
+#endif
+
+	/*
+	 * This is extremely unlikely to happen because almost all
+	 * systems have far more MAX_PA/2 than RAM can be fit into
+	 * DIMM slots.
+	 */
+	half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
+	if (e820_any_mapped(half_pa, ULLONG_MAX - half_pa, E820_RAM)) {
+		pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
+		return;
+	}
+
+	setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV);
+}
+
 #ifdef RETPOLINE
 static bool spectre_v2_bad_module;
 
@@ -655,6 +685,11 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
 	case X86_BUG_SPEC_STORE_BYPASS:
 		return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);
 
+	case X86_BUG_L1TF:
+		if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV))
+			return sprintf(buf, "Mitigation: Page Table Inversion\n");
+		break;
+
 	default:
 		break;
 	}
@@ -681,4 +716,9 @@ ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *
 {
 	return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS);
 }
+
+ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return cpu_show_common(dev, attr, buf, X86_BUG_L1TF);
+}
 #endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 3d21b28f9826..4d3fa79c0f09 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -880,6 +880,21 @@ static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = {
 	{}
 };
 
+static const __initconst struct x86_cpu_id cpu_no_l1tf[] = {
+	/* in addition to cpu_no_speculation */
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_SILVERMONT1	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_SILVERMONT2	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_AIRMONT		},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_MERRIFIELD	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_MOOREFIELD	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_GOLDMONT	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_DENVERTON	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_GEMINI_LAKE	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_XEON_PHI_KNL		},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_XEON_PHI_KNM		},
+	{}
+};
+
 static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
 {
 	u64 ia32_cap = 0;
@@ -905,6 +920,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
 		return;
 
 	setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+
+	if (x86_match_cpu(cpu_no_l1tf))
+		return;
+
+	setup_force_cpu_bug(X86_BUG_L1TF);
 }
 
 /*
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 143edea1076f..41090ef5facb 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -524,16 +524,24 @@ ssize_t __weak cpu_show_spec_store_bypass(struct device *dev,
 	return sprintf(buf, "Not affected\n");
 }
 
+ssize_t __weak cpu_show_l1tf(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "Not affected\n");
+}
+
 static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
 static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
 static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
 static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL);
+static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL);
 
 static struct attribute *cpu_root_vulnerabilities_attrs[] = {
 	&dev_attr_meltdown.attr,
 	&dev_attr_spectre_v1.attr,
 	&dev_attr_spectre_v2.attr,
 	&dev_attr_spec_store_bypass.attr,
+	&dev_attr_l1tf.attr,
 	NULL
 };
 
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 2f9d12022100..063c73ed6d78 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -48,6 +48,8 @@ extern ssize_t cpu_show_spectre_v2(struct device *dev,
 				   struct device_attribute *attr, char *buf);
 extern ssize_t cpu_show_spec_store_bypass(struct device *dev,
 					  struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_l1tf(struct device *dev,
+			     struct device_attribute *attr, char *buf);
 
 extern __printf(4, 5)
 struct device *cpu_device_create(struct device *parent, void *drvdata,

From 0371d9c4c822fceb290a0b4cd21119534f7bae47 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Tue, 29 Dec 2015 20:12:20 -0800
Subject: [PATCH 298/519] mm: Add vm_insert_pfn_prot()

commit 1745cbc5d0dee0749a6bc0ea8e872c5db0074061 upstream

The x86 vvar vma contains pages with differing cacheability
flags.  x86 currently implements this by manually inserting all
the ptes using (io_)remap_pfn_range when the vma is set up.

x86 wants to move to using .fault with VM_FAULT_NOPAGE to set up
the mappings as needed.  The correct API to use to insert a pfn
in .fault is vm_insert_pfn(), but vm_insert_pfn() can't override the
vma's cache mode, and the HPET page in particular needs to be
uncached despite the fact that the rest of the VMA is cached.

Add vm_insert_pfn_prot() to support varying cacheability within
the same non-COW VMA in a more sane manner.

x86 could alternatively use multiple VMAs, but that's messy,
would break CRIU, and would create unnecessary VMAs that would
waste memory.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/d2938d1eb37be7a5e4f86182db646551f11e45aa.1451446564.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/mm.h |  2 ++
 mm/memory.c        | 25 +++++++++++++++++++++++--
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index a100946607a5..1f4366567e7d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2083,6 +2083,8 @@ int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
 int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
 int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn);
+int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
+			unsigned long pfn, pgprot_t pgprot);
 int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn);
 int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len);
diff --git a/mm/memory.c b/mm/memory.c
index 177cb7d111a9..edb5b0d8a4a0 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1604,9 +1604,30 @@ static int insert_pfn(struct vm_area_struct *vma, unsigned long addr,
  */
 int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn)
+{
+	return vm_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot);
+}
+EXPORT_SYMBOL(vm_insert_pfn);
+
+/**
+ * vm_insert_pfn_prot - insert single pfn into user vma with specified pgprot
+ * @vma: user vma to map to
+ * @addr: target user address of this page
+ * @pfn: source kernel pfn
+ * @pgprot: pgprot flags for the inserted page
+ *
+ * This is exactly like vm_insert_pfn, except that it allows drivers to
+ * to override pgprot on a per-page basis.
+ *
+ * This only makes sense for IO mappings, and it makes no sense for
+ * cow mappings.  In general, using multiple vmas is preferable;
+ * vm_insert_pfn_prot should only be used if using multiple VMAs is
+ * impractical.
+ */
+int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
+			unsigned long pfn, pgprot_t pgprot)
 {
 	int ret;
-	pgprot_t pgprot = vma->vm_page_prot;
 	/*
 	 * Technically, architectures with pte_special can avoid all these
 	 * restrictions (same for remap_pfn_range).  However we would like
@@ -1628,7 +1649,7 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 
 	return ret;
 }
-EXPORT_SYMBOL(vm_insert_pfn);
+EXPORT_SYMBOL(vm_insert_pfn_prot);
 
 int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn)

From 9ac0dc7d949db7afd4116d55fa4fcf6a66d820f0 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 7 Oct 2016 17:00:18 -0700
Subject: [PATCH 299/519] mm: fix cache mode tracking in vm_insert_mixed()

commit 87744ab3832b83ba71b931f86f9cfdb000d07da5 upstream

vm_insert_mixed() unlike vm_insert_pfn_prot() and vmf_insert_pfn_pmd(),
fails to check the pgprot_t it uses for the mapping against the one
recorded in the memtype tracking tree.  Add the missing call to
track_pfn_insert() to preclude cases where incompatible aliased mappings
are established for a given physical address range.

[groeck: Backport to v4.4.y]

Link: http://lkml.kernel.org/r/147328717909.35069.14256589123570653697.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Cc: David Airlie <airlied@linux.ie>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/memory.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index edb5b0d8a4a0..78efb8c7ee20 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1654,10 +1654,14 @@ EXPORT_SYMBOL(vm_insert_pfn_prot);
 int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn)
 {
+	pgprot_t pgprot = vma->vm_page_prot;
+
 	BUG_ON(!(vma->vm_flags & VM_MIXEDMAP));
 
 	if (addr < vma->vm_start || addr >= vma->vm_end)
 		return -EFAULT;
+	if (track_pfn_insert(vma, &pgprot, pfn))
+		return -EINVAL;
 
 	/*
 	 * If we don't have pte special, then we have to use the pfn_valid()
@@ -1670,9 +1674,9 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
 		struct page *page;
 
 		page = pfn_to_page(pfn);
-		return insert_page(vma, addr, page, vma->vm_page_prot);
+		return insert_page(vma, addr, page, pgprot);
 	}
-	return insert_pfn(vma, addr, pfn, vma->vm_page_prot);
+	return insert_pfn(vma, addr, pfn, pgprot);
 }
 EXPORT_SYMBOL(vm_insert_mixed);
 

From d71af2dbacb5611c1dcdc16fd1d343821d61bd5e Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 13 Jun 2018 15:48:27 -0700
Subject: [PATCH 300/519] x86/speculation/l1tf: Disallow non privileged high
 MMIO PROT_NONE mappings

commit 42e4089c7890725fcd329999252dc489b72f2921 upstream

For L1TF PROT_NONE mappings are protected by inverting the PFN in the page
table entry. This sets the high bits in the CPU's address space, thus
making sure to point to not point an unmapped entry to valid cached memory.

Some server system BIOSes put the MMIO mappings high up in the physical
address space. If such an high mapping was mapped to unprivileged users
they could attack low memory by setting such a mapping to PROT_NONE. This
could happen through a special device driver which is not access
protected. Normal /dev/mem is of course access protected.

To avoid this forbid PROT_NONE mappings or mprotect for high MMIO mappings.

Valid page mappings are allowed because the system is then unsafe anyways.

It's not expected that users commonly use PROT_NONE on MMIO. But to
minimize any impact this is only enforced if the mapping actually refers to
a high MMIO address (defined as the MAX_PA-1 bit being set), and also skip
the check for root.

For mmaps this is straight forward and can be handled in vm_insert_pfn and
in remap_pfn_range().

For mprotect it's a bit trickier. At the point where the actual PTEs are
accessed a lot of state has been changed and it would be difficult to undo
on an error. Since this is a uncommon case use a separate early page talk
walk pass for MMIO PROT_NONE mappings that checks for this condition
early. For non MMIO and non PROT_NONE there are no changes.

[dwmw2: Backport to 4.9]
[groeck: Backport to 4.4]

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/pgtable.h |  8 ++++++
 arch/x86/mm/mmap.c             | 21 +++++++++++++++
 include/asm-generic/pgtable.h  | 12 +++++++++
 mm/memory.c                    | 29 +++++++++++++++-----
 mm/mprotect.c                  | 49 ++++++++++++++++++++++++++++++++++
 5 files changed, 112 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 2ed1556d99b1..70e2248353cb 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -942,6 +942,14 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
 }
 #endif
 
+#define __HAVE_ARCH_PFN_MODIFY_ALLOWED 1
+extern bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot);
+
+static inline bool arch_has_pfn_modify_check(void)
+{
+	return boot_cpu_has_bug(X86_BUG_L1TF);
+}
+
 #include <asm-generic/pgtable.h>
 #endif	/* __ASSEMBLY__ */
 
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 307f60ecfc6d..9a055ea279eb 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -121,3 +121,24 @@ const char *arch_vma_name(struct vm_area_struct *vma)
 		return "[mpx]";
 	return NULL;
 }
+
+/*
+ * Only allow root to set high MMIO mappings to PROT_NONE.
+ * This prevents an unpriv. user to set them to PROT_NONE and invert
+ * them, then pointing to valid memory for L1TF speculation.
+ *
+ * Note: for locked down kernels may want to disable the root override.
+ */
+bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
+{
+	if (!boot_cpu_has_bug(X86_BUG_L1TF))
+		return true;
+	if (!__pte_needs_invert(pgprot_val(prot)))
+		return true;
+	/* If it's real memory always allow */
+	if (pfn_valid(pfn))
+		return true;
+	if (pfn > l1tf_pfn_limit() && !capable(CAP_SYS_ADMIN))
+		return false;
+	return true;
+}
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 25b793325b09..976f749099ef 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -805,4 +805,16 @@ static inline int pmd_free_pte_page(pmd_t *pmd)
 #define io_remap_pfn_range remap_pfn_range
 #endif
 
+#ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED
+static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
+{
+	return true;
+}
+
+static inline bool arch_has_pfn_modify_check(void)
+{
+	return false;
+}
+#endif
+
 #endif /* _ASM_GENERIC_PGTABLE_H */
diff --git a/mm/memory.c b/mm/memory.c
index 78efb8c7ee20..d5bb1465d30c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1645,6 +1645,9 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
 	if (track_pfn_insert(vma, &pgprot, pfn))
 		return -EINVAL;
 
+	if (!pfn_modify_allowed(pfn, pgprot))
+		return -EACCES;
+
 	ret = insert_pfn(vma, addr, pfn, pgprot);
 
 	return ret;
@@ -1663,6 +1666,9 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
 	if (track_pfn_insert(vma, &pgprot, pfn))
 		return -EINVAL;
 
+	if (!pfn_modify_allowed(pfn, pgprot))
+		return -EACCES;
+
 	/*
 	 * If we don't have pte special, then we have to use the pfn_valid()
 	 * based VM_MIXEDMAP scheme (see vm_normal_page), and thus we *must*
@@ -1691,6 +1697,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
 {
 	pte_t *pte;
 	spinlock_t *ptl;
+	int err = 0;
 
 	pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
 	if (!pte)
@@ -1698,12 +1705,16 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
 	arch_enter_lazy_mmu_mode();
 	do {
 		BUG_ON(!pte_none(*pte));
+		if (!pfn_modify_allowed(pfn, prot)) {
+			err = -EACCES;
+			break;
+		}
 		set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot)));
 		pfn++;
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 	arch_leave_lazy_mmu_mode();
 	pte_unmap_unlock(pte - 1, ptl);
-	return 0;
+	return err;
 }
 
 static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
@@ -1712,6 +1723,7 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
 {
 	pmd_t *pmd;
 	unsigned long next;
+	int err;
 
 	pfn -= addr >> PAGE_SHIFT;
 	pmd = pmd_alloc(mm, pud, addr);
@@ -1720,9 +1732,10 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
 	VM_BUG_ON(pmd_trans_huge(*pmd));
 	do {
 		next = pmd_addr_end(addr, end);
-		if (remap_pte_range(mm, pmd, addr, next,
-				pfn + (addr >> PAGE_SHIFT), prot))
-			return -ENOMEM;
+		err = remap_pte_range(mm, pmd, addr, next,
+				pfn + (addr >> PAGE_SHIFT), prot);
+		if (err)
+			return err;
 	} while (pmd++, addr = next, addr != end);
 	return 0;
 }
@@ -1733,6 +1746,7 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
 {
 	pud_t *pud;
 	unsigned long next;
+	int err;
 
 	pfn -= addr >> PAGE_SHIFT;
 	pud = pud_alloc(mm, pgd, addr);
@@ -1740,9 +1754,10 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
 		return -ENOMEM;
 	do {
 		next = pud_addr_end(addr, end);
-		if (remap_pmd_range(mm, pud, addr, next,
-				pfn + (addr >> PAGE_SHIFT), prot))
-			return -ENOMEM;
+		err = remap_pmd_range(mm, pud, addr, next,
+				pfn + (addr >> PAGE_SHIFT), prot);
+		if (err)
+			return err;
 	} while (pud++, addr = next, addr != end);
 	return 0;
 }
diff --git a/mm/mprotect.c b/mm/mprotect.c
index c0b4b2a49462..a277f3412a5d 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -255,6 +255,42 @@ unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
 	return pages;
 }
 
+static int prot_none_pte_entry(pte_t *pte, unsigned long addr,
+			       unsigned long next, struct mm_walk *walk)
+{
+	return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ?
+		0 : -EACCES;
+}
+
+static int prot_none_hugetlb_entry(pte_t *pte, unsigned long hmask,
+				   unsigned long addr, unsigned long next,
+				   struct mm_walk *walk)
+{
+	return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ?
+		0 : -EACCES;
+}
+
+static int prot_none_test(unsigned long addr, unsigned long next,
+			  struct mm_walk *walk)
+{
+	return 0;
+}
+
+static int prot_none_walk(struct vm_area_struct *vma, unsigned long start,
+			   unsigned long end, unsigned long newflags)
+{
+	pgprot_t new_pgprot = vm_get_page_prot(newflags);
+	struct mm_walk prot_none_walk = {
+		.pte_entry = prot_none_pte_entry,
+		.hugetlb_entry = prot_none_hugetlb_entry,
+		.test_walk = prot_none_test,
+		.mm = current->mm,
+		.private = &new_pgprot,
+	};
+
+	return walk_page_range(start, end, &prot_none_walk);
+}
+
 int
 mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
 	unsigned long start, unsigned long end, unsigned long newflags)
@@ -272,6 +308,19 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
 		return 0;
 	}
 
+	/*
+	 * Do PROT_NONE PFN permission checks here when we can still
+	 * bail out without undoing a lot of state. This is a rather
+	 * uncommon case, so doesn't need to be very optimized.
+	 */
+	if (arch_has_pfn_modify_check() &&
+	    (vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) &&
+	    (newflags & (VM_READ|VM_WRITE|VM_EXEC)) == 0) {
+		error = prot_none_walk(vma, start, end, newflags);
+		if (error)
+			return error;
+	}
+
 	/*
 	 * If we make a private mapping writable we increase our commit;
 	 * but (without finer accounting) cannot reduce our commit if we

From 685b44483f077c949bd5016fdfe734b662b74aba Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 13 Jun 2018 15:48:28 -0700
Subject: [PATCH 301/519] x86/speculation/l1tf: Limit swap file size to
 MAX_PA/2

commit 377eeaa8e11fe815b1d07c81c4a0e2843a8c15eb upstream

For the L1TF workaround its necessary to limit the swap file size to below
MAX_PA/2, so that the higher bits of the swap offset inverted never point
to valid memory.

Add a mechanism for the architecture to override the swap file size check
in swapfile.c and add a x86 specific max swapfile check function that
enforces that limit.

The check is only enabled if the CPU is vulnerable to L1TF.

In VMs with 42bit MAX_PA the typical limit is 2TB now, on a native system
with 46bit PA it is 32TB. The limit is only per individual swap file, so
it's always possible to exceed these limits with multiple swap files or
partitions.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/mm/init.c       | 15 +++++++++++++
 include/linux/swapfile.h |  2 ++
 mm/swapfile.c            | 46 ++++++++++++++++++++++++++--------------
 3 files changed, 47 insertions(+), 16 deletions(-)

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 151fd33e9043..afde6da2768f 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -4,6 +4,8 @@
 #include <linux/swap.h>
 #include <linux/memblock.h>
 #include <linux/bootmem.h>	/* for max_low_pfn */
+#include <linux/swapfile.h>
+#include <linux/swapops.h>
 
 #include <asm/cacheflush.h>
 #include <asm/e820.h>
@@ -767,3 +769,16 @@ void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
 	__cachemode2pte_tbl[cache] = __cm_idx2pte(entry);
 	__pte2cachemode_tbl[entry] = cache;
 }
+
+unsigned long max_swapfile_size(void)
+{
+	unsigned long pages;
+
+	pages = generic_max_swapfile_size();
+
+	if (boot_cpu_has_bug(X86_BUG_L1TF)) {
+		/* Limit the swap file size to MAX_PA/2 for L1TF workaround */
+		pages = min_t(unsigned long, l1tf_pfn_limit() + 1, pages);
+	}
+	return pages;
+}
diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h
index 388293a91e8c..e4594de79bc4 100644
--- a/include/linux/swapfile.h
+++ b/include/linux/swapfile.h
@@ -9,5 +9,7 @@ extern spinlock_t swap_lock;
 extern struct plist_head swap_active_head;
 extern struct swap_info_struct *swap_info[];
 extern int try_to_unuse(unsigned int, bool, unsigned long);
+extern unsigned long generic_max_swapfile_size(void);
+extern unsigned long max_swapfile_size(void);
 
 #endif /* _LINUX_SWAPFILE_H */
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 674bf177ce44..8e25ff2b693a 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2206,6 +2206,35 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
 	return 0;
 }
 
+
+/*
+ * Find out how many pages are allowed for a single swap device. There
+ * are two limiting factors:
+ * 1) the number of bits for the swap offset in the swp_entry_t type, and
+ * 2) the number of bits in the swap pte, as defined by the different
+ * architectures.
+ *
+ * In order to find the largest possible bit mask, a swap entry with
+ * swap type 0 and swap offset ~0UL is created, encoded to a swap pte,
+ * decoded to a swp_entry_t again, and finally the swap offset is
+ * extracted.
+ *
+ * This will mask all the bits from the initial ~0UL mask that can't
+ * be encoded in either the swp_entry_t or the architecture definition
+ * of a swap pte.
+ */
+unsigned long generic_max_swapfile_size(void)
+{
+	return swp_offset(pte_to_swp_entry(
+			swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
+}
+
+/* Can be overridden by an architecture for additional checks. */
+__weak unsigned long max_swapfile_size(void)
+{
+	return generic_max_swapfile_size();
+}
+
 static unsigned long read_swap_header(struct swap_info_struct *p,
 					union swap_header *swap_header,
 					struct inode *inode)
@@ -2241,22 +2270,7 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
 	p->cluster_next = 1;
 	p->cluster_nr = 0;
 
-	/*
-	 * Find out how many pages are allowed for a single swap
-	 * device. There are two limiting factors: 1) the number
-	 * of bits for the swap offset in the swp_entry_t type, and
-	 * 2) the number of bits in the swap pte as defined by the
-	 * different architectures. In order to find the
-	 * largest possible bit mask, a swap entry with swap type 0
-	 * and swap offset ~0UL is created, encoded to a swap pte,
-	 * decoded to a swp_entry_t again, and finally the swap
-	 * offset is extracted. This will mask all the bits from
-	 * the initial ~0UL mask that can't be encoded in either
-	 * the swp_entry_t or the architecture definition of a
-	 * swap pte.
-	 */
-	maxpages = swp_offset(pte_to_swp_entry(
-			swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
+	maxpages = max_swapfile_size();
 	last_page = swap_header->info.last_page;
 	if (!last_page) {
 		pr_warn("Empty swap-file\n");

From fa86c208d22d8179ef3d295f6084fc87390c8366 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 20 Jun 2018 16:42:57 -0400
Subject: [PATCH 302/519] x86/bugs: Move the l1tf function and define pr_fmt
 properly

commit 56563f53d3066afa9e63d6c997bf67e76a8b05c0 upstream

The pr_warn in l1tf_select_mitigation would have used the prior pr_fmt
which was defined as "Spectre V2 : ".

Move the function to be past SSBD and also define the pr_fmt.

Fixes: 17dbca119312 ("x86/speculation/l1tf: Add sysfs reporting for l1tf")
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/bugs.c | 55 ++++++++++++++++++++------------------
 1 file changed, 29 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index ce8d0abf3d35..34e4aaaf03d2 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -207,32 +207,6 @@ static void x86_amd_ssb_disable(void)
 		wrmsrl(MSR_AMD64_LS_CFG, msrval);
 }
 
-static void __init l1tf_select_mitigation(void)
-{
-	u64 half_pa;
-
-	if (!boot_cpu_has_bug(X86_BUG_L1TF))
-		return;
-
-#if CONFIG_PGTABLE_LEVELS == 2
-	pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n");
-	return;
-#endif
-
-	/*
-	 * This is extremely unlikely to happen because almost all
-	 * systems have far more MAX_PA/2 than RAM can be fit into
-	 * DIMM slots.
-	 */
-	half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
-	if (e820_any_mapped(half_pa, ULLONG_MAX - half_pa, E820_RAM)) {
-		pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
-		return;
-	}
-
-	setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV);
-}
-
 #ifdef RETPOLINE
 static bool spectre_v2_bad_module;
 
@@ -658,6 +632,35 @@ void x86_spec_ctrl_setup_ap(void)
 		x86_amd_ssb_disable();
 }
 
+#undef pr_fmt
+#define pr_fmt(fmt)	"L1TF: " fmt
+static void __init l1tf_select_mitigation(void)
+{
+	u64 half_pa;
+
+	if (!boot_cpu_has_bug(X86_BUG_L1TF))
+		return;
+
+#if CONFIG_PGTABLE_LEVELS == 2
+	pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n");
+	return;
+#endif
+
+	/*
+	 * This is extremely unlikely to happen because almost all
+	 * systems have far more MAX_PA/2 than RAM can be fit into
+	 * DIMM slots.
+	 */
+	half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
+	if (e820_any_mapped(half_pa, ULLONG_MAX - half_pa, E820_RAM)) {
+		pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
+		return;
+	}
+
+	setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV);
+}
+#undef pr_fmt
+
 #ifdef CONFIG_SYSFS
 
 static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,

From df7fd6ccb358bd4aa3abc8a6ff995b1f3da1b0fb Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Thu, 21 Jun 2018 12:36:29 +0200
Subject: [PATCH 303/519] x86/speculation/l1tf: Extend 64bit swap file size
 limit

commit 1a7ed1ba4bba6c075d5ad61bb75e3fbc870840d6 upstream

The previous patch has limited swap file size so that large offsets cannot
clear bits above MAX_PA/2 in the pte and interfere with L1TF mitigation.

It assumed that offsets are encoded starting with bit 12, same as pfn. But
on x86_64, offsets are encoded starting with bit 9.

Thus the limit can be raised by 3 bits. That means 16TB with 42bit MAX_PA
and 256TB with 46bit MAX_PA.

Fixes: 377eeaa8e11f ("x86/speculation/l1tf: Limit swap file size to MAX_PA/2")
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/mm/init.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index afde6da2768f..8de904926d7f 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -778,7 +778,15 @@ unsigned long max_swapfile_size(void)
 
 	if (boot_cpu_has_bug(X86_BUG_L1TF)) {
 		/* Limit the swap file size to MAX_PA/2 for L1TF workaround */
-		pages = min_t(unsigned long, l1tf_pfn_limit() + 1, pages);
+		unsigned long l1tf_limit = l1tf_pfn_limit() + 1;
+		/*
+		 * We encode swap offsets also with 3 bits below those for pfn
+		 * which makes the usable limit higher.
+		 */
+#ifdef CONFIG_X86_64
+		l1tf_limit <<= PAGE_SHIFT - SWP_OFFSET_FIRST_BIT;
+#endif
+		pages = min_t(unsigned long, l1tf_limit, pages);
 	}
 	return pages;
 }

From dc48c1a2f45b628d3128ad4bb31d1bcd342c059d Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 20 Jun 2018 16:42:58 -0400
Subject: [PATCH 304/519] x86/cpufeatures: Add detection of L1D cache flush
 support.

commit 11e34e64e4103955fc4568750914c75d65ea87ee upstream

336996-Speculative-Execution-Side-Channel-Mitigations.pdf defines a new MSR
(IA32_FLUSH_CMD) which is detected by CPUID.7.EDX[28]=1 bit being set.

This new MSR "gives software a way to invalidate structures with finer
granularity than other architectual methods like WBINVD."

A copy of this document is available at
  https://bugzilla.kernel.org/show_bug.cgi?id=199511

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/cpufeatures.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index f3a8479c0d87..123a7105db1b 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -310,6 +310,7 @@
 #define X86_FEATURE_AVX512_4FMAPS	(18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
 #define X86_FEATURE_SPEC_CTRL		(18*32+26) /* "" Speculation Control (IBRS + IBPB) */
 #define X86_FEATURE_INTEL_STIBP		(18*32+27) /* "" Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_FLUSH_L1D		(18*32+28) /* Flush L1D cache */
 #define X86_FEATURE_ARCH_CAPABILITIES	(18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
 #define X86_FEATURE_SPEC_CTRL_SSBD	(18*32+31) /* "" Speculative Store Bypass Disable */
 

From b55b06bd3b3c977da2c938d1a73d38674cb88086 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Fri, 22 Jun 2018 17:39:33 +0200
Subject: [PATCH 305/519] x86/speculation/l1tf: Protect PAE swap entries
 against L1TF

commit 0d0f6249058834ffe1ceaad0bb31464af66f6e7a upstream

The PAE 3-level paging code currently doesn't mitigate L1TF by flipping the
offset bits, and uses the high PTE word, thus bits 32-36 for type, 37-63 for
offset. The lower word is zeroed, thus systems with less than 4GB memory are
safe. With 4GB to 128GB the swap type selects the memory locations vulnerable
to L1TF; with even more memory, also the swap offfset influences the address.
This might be a problem with 32bit PAE guests running on large 64bit hosts.

By continuing to keep the whole swap entry in either high or low 32bit word of
PTE we would limit the swap size too much. Thus this patch uses the whole PAE
PTE with the same layout as the 64bit version does. The macros just become a
bit tricky since they assume the arch-dependent swp_entry_t to be 32bit.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/pgtable-3level.h | 35 +++++++++++++++++++++++++--
 arch/x86/mm/init.c                    |  2 +-
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 0c89891c7b44..5c686382d84b 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -177,12 +177,43 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp)
 #endif
 
 /* Encode and de-code a swap entry */
+#define SWP_TYPE_BITS		5
+
+#define SWP_OFFSET_FIRST_BIT	(_PAGE_BIT_PROTNONE + 1)
+
+/* We always extract/encode the offset by shifting it all the way up, and then down again */
+#define SWP_OFFSET_SHIFT	(SWP_OFFSET_FIRST_BIT + SWP_TYPE_BITS)
+
 #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5)
 #define __swp_type(x)			(((x).val) & 0x1f)
 #define __swp_offset(x)			((x).val >> 5)
 #define __swp_entry(type, offset)	((swp_entry_t){(type) | (offset) << 5})
-#define __pte_to_swp_entry(pte)		((swp_entry_t){ (pte).pte_high })
-#define __swp_entry_to_pte(x)		((pte_t){ { .pte_high = (x).val } })
+
+/*
+ * Normally, __swp_entry() converts from arch-independent swp_entry_t to
+ * arch-dependent swp_entry_t, and __swp_entry_to_pte() just stores the result
+ * to pte. But here we have 32bit swp_entry_t and 64bit pte, and need to use the
+ * whole 64 bits. Thus, we shift the "real" arch-dependent conversion to
+ * __swp_entry_to_pte() through the following helper macro based on 64bit
+ * __swp_entry().
+ */
+#define __swp_pteval_entry(type, offset) ((pteval_t) { \
+	(~(pteval_t)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \
+	| ((pteval_t)(type) << (64 - SWP_TYPE_BITS)) })
+
+#define __swp_entry_to_pte(x)	((pte_t){ .pte = \
+		__swp_pteval_entry(__swp_type(x), __swp_offset(x)) })
+/*
+ * Analogically, __pte_to_swp_entry() doesn't just extract the arch-dependent
+ * swp_entry_t, but also has to convert it from 64bit to the 32bit
+ * intermediate representation, using the following macros based on 64bit
+ * __swp_type() and __swp_offset().
+ */
+#define __pteval_swp_type(x) ((unsigned long)((x).pte >> (64 - SWP_TYPE_BITS)))
+#define __pteval_swp_offset(x) ((unsigned long)(~((x).pte) << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT))
+
+#define __pte_to_swp_entry(pte)	(__swp_entry(__pteval_swp_type(pte), \
+					     __pteval_swp_offset(pte)))
 
 #include <asm/pgtable-invert.h>
 
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 8de904926d7f..3a8e9abe0667 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -783,7 +783,7 @@ unsigned long max_swapfile_size(void)
 		 * We encode swap offsets also with 3 bits below those for pfn
 		 * which makes the usable limit higher.
 		 */
-#ifdef CONFIG_X86_64
+#if CONFIG_PGTABLE_LEVELS > 2
 		l1tf_limit <<= PAGE_SHIFT - SWP_OFFSET_FIRST_BIT;
 #endif
 		pages = min_t(unsigned long, l1tf_limit, pages);

From 09049f022a9b96b0d09d90023d4f0a097a61a767 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.cz>
Date: Wed, 27 Jun 2018 17:46:50 +0200
Subject: [PATCH 306/519] x86/speculation/l1tf: Fix up pte->pfn conversion for
 PAE

commit e14d7dfb41f5807a0c1c26a13f2b8ef16af24935 upstream

Jan has noticed that pte_pfn and co. resp. pfn_pte are incorrect for
CONFIG_PAE because phys_addr_t is wider than unsigned long and so the
pte_val reps. shift left would get truncated. Fix this up by using proper
types.

[dwmw2: Backport to 4.9]

Fixes: 6b28baca9b1f ("x86/speculation/l1tf: Protect PROT_NONE PTEs against speculation")
Reported-by: Jan Beulich <JBeulich@suse.com>
Signed-off-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/pgtable.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 70e2248353cb..16c6886a1ece 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -154,21 +154,21 @@ static inline u64 protnone_mask(u64 val);
 
 static inline unsigned long pte_pfn(pte_t pte)
 {
-	unsigned long pfn = pte_val(pte);
+	phys_addr_t pfn = pte_val(pte);
 	pfn ^= protnone_mask(pfn);
 	return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT;
 }
 
 static inline unsigned long pmd_pfn(pmd_t pmd)
 {
-	unsigned long pfn = pmd_val(pmd);
+	phys_addr_t pfn = pmd_val(pmd);
 	pfn ^= protnone_mask(pfn);
 	return (pfn & pmd_pfn_mask(pmd)) >> PAGE_SHIFT;
 }
 
 static inline unsigned long pud_pfn(pud_t pud)
 {
-	unsigned long pfn = pud_val(pud);
+	phys_addr_t pfn = pud_val(pud);
 	pfn ^= protnone_mask(pfn);
 	return (pfn & pud_pfn_mask(pud)) >> PAGE_SHIFT;
 }
@@ -369,7 +369,7 @@ static inline pgprotval_t massage_pgprot(pgprot_t pgprot)
 
 static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
 {
-	phys_addr_t pfn = page_nr << PAGE_SHIFT;
+	phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
 	pfn ^= protnone_mask(pgprot_val(pgprot));
 	pfn &= PTE_PFN_MASK;
 	return __pte(pfn | massage_pgprot(pgprot));
@@ -377,7 +377,7 @@ static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
 
 static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
 {
-	phys_addr_t pfn = page_nr << PAGE_SHIFT;
+	phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
 	pfn ^= protnone_mask(pgprot_val(pgprot));
 	pfn &= PHYSICAL_PMD_PAGE_MASK;
 	return __pmd(pfn | massage_pgprot(pgprot));

From 0aae5fe8413dfcd949d0df1c7d6b835efecd5b3b Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 7 Aug 2018 15:09:36 -0700
Subject: [PATCH 307/519] x86/speculation/l1tf: Invert all not present mappings

commit f22cc87f6c1f771b57c407555cfefd811cdd9507 upstream

For kernel mappings PAGE_PROTNONE is not necessarily set for a non present
mapping, but the inversion logic explicitely checks for !PRESENT and
PROT_NONE.

Remove the PROT_NONE check and make the inversion unconditional for all not
present mappings.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/pgtable-invert.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/pgtable-invert.h b/arch/x86/include/asm/pgtable-invert.h
index 177564187fc0..44b1203ece12 100644
--- a/arch/x86/include/asm/pgtable-invert.h
+++ b/arch/x86/include/asm/pgtable-invert.h
@@ -6,7 +6,7 @@
 
 static inline bool __pte_needs_invert(u64 val)
 {
-	return (val & (_PAGE_PRESENT|_PAGE_PROTNONE)) == _PAGE_PROTNONE;
+	return !(val & _PAGE_PRESENT);
 }
 
 /* Get a mask to xor with the page table entry to get the correct pfn. */

From 9feecdb6cb73feaa55b0135aee8777eaac848c78 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 7 Aug 2018 15:09:37 -0700
Subject: [PATCH 308/519] x86/speculation/l1tf: Make pmd/pud_mknotpresent()
 invert

commit 0768f91530ff46683e0b372df14fd79fe8d156e5 upstream

Some cases in THP like:
  - MADV_FREE
  - mprotect
  - split

mark the PMD non present for temporarily to prevent races. The window for
an L1TF attack in these contexts is very small, but it wants to be fixed
for correctness sake.

Use the proper low level functions for pmd/pud_mknotpresent() to address
this.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/pgtable.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 16c6886a1ece..b5e157c065ae 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -315,11 +315,6 @@ static inline pmd_t pmd_mkwrite(pmd_t pmd)
 	return pmd_set_flags(pmd, _PAGE_RW);
 }
 
-static inline pmd_t pmd_mknotpresent(pmd_t pmd)
-{
-	return pmd_clear_flags(pmd, _PAGE_PRESENT | _PAGE_PROTNONE);
-}
-
 #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
 static inline int pte_soft_dirty(pte_t pte)
 {
@@ -383,6 +378,12 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
 	return __pmd(pfn | massage_pgprot(pgprot));
 }
 
+static inline pmd_t pmd_mknotpresent(pmd_t pmd)
+{
+	return pfn_pmd(pmd_pfn(pmd),
+		       __pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
+}
+
 static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
 
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)

From 02ff2769edbce2261e981effbc3c4b98fae4faf0 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 7 Aug 2018 15:09:39 -0700
Subject: [PATCH 309/519] x86/mm/pat: Make set_memory_np() L1TF safe

commit 958f79b9ee55dfaf00c8106ed1c22a2919e0028b upstream

set_memory_np() is used to mark kernel mappings not present, but it has
it's own open coded mechanism which does not have the L1TF protection of
inverting the address bits.

Replace the open coded PTE manipulation with the L1TF protecting low level
PTE routines.

Passes the CPA self test.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
[ dwmw2: Pull in pud_mkhuge() from commit a00cc7d9dd, and pfn_pud() ]
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
[groeck: port to 4.4]
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/pgtable.h | 27 +++++++++++++++++++++++++++
 arch/x86/mm/pageattr.c         |  8 ++++----
 2 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index b5e157c065ae..4de6c282c02a 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -378,12 +378,39 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
 	return __pmd(pfn | massage_pgprot(pgprot));
 }
 
+static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot)
+{
+	phys_addr_t pfn = page_nr << PAGE_SHIFT;
+	pfn ^= protnone_mask(pgprot_val(pgprot));
+	pfn &= PHYSICAL_PUD_PAGE_MASK;
+	return __pud(pfn | massage_pgprot(pgprot));
+}
+
 static inline pmd_t pmd_mknotpresent(pmd_t pmd)
 {
 	return pfn_pmd(pmd_pfn(pmd),
 		       __pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
 }
 
+static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
+{
+	pudval_t v = native_pud_val(pud);
+
+	return __pud(v | set);
+}
+
+static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
+{
+	pudval_t v = native_pud_val(pud);
+
+	return __pud(v & ~clear);
+}
+
+static inline pud_t pud_mkhuge(pud_t pud)
+{
+	return pud_set_flags(pud, _PAGE_PSE);
+}
+
 static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
 
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 79377e2a7bcd..27610c2d1821 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1006,8 +1006,8 @@ static int populate_pmd(struct cpa_data *cpa,
 
 		pmd = pmd_offset(pud, start);
 
-		set_pmd(pmd, __pmd(cpa->pfn | _PAGE_PSE |
-				   massage_pgprot(pmd_pgprot)));
+		set_pmd(pmd, pmd_mkhuge(pfn_pmd(cpa->pfn,
+					canon_pgprot(pmd_pgprot))));
 
 		start	  += PMD_SIZE;
 		cpa->pfn  += PMD_SIZE;
@@ -1079,8 +1079,8 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
 	 * Map everything starting from the Gb boundary, possibly with 1G pages
 	 */
 	while (end - start >= PUD_SIZE) {
-		set_pud(pud, __pud(cpa->pfn | _PAGE_PSE |
-				   massage_pgprot(pud_pgprot)));
+		set_pud(pud, pud_mkhuge(pfn_pud(cpa->pfn,
+				   canon_pgprot(pud_pgprot))));
 
 		start	  += PUD_SIZE;
 		cpa->pfn  += PUD_SIZE;

From 6b06f36f07e2c91ad0126f17d0fc8f933c827da8 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 7 Aug 2018 15:09:38 -0700
Subject: [PATCH 310/519] x86/mm/kmmio: Make the tracer robust against L1TF

commit 1063711b57393c1999248cccb57bebfaf16739e7 upstream

The mmio tracer sets io mapping PTEs and PMDs to non present when enabled
without inverting the address bits, which makes the PTE entry vulnerable
for L1TF.

Make it use the right low level macros to actually invert the address bits
to protect against L1TF.

In principle this could be avoided because MMIO tracing is not likely to be
enabled on production machines, but the fix is straigt forward and for
consistency sake it's better to get rid of the open coded PTE manipulation.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/mm/kmmio.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 76604c8a2a48..7bf14e74fc8f 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -125,24 +125,29 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long addr)
 
 static void clear_pmd_presence(pmd_t *pmd, bool clear, pmdval_t *old)
 {
+	pmd_t new_pmd;
 	pmdval_t v = pmd_val(*pmd);
 	if (clear) {
-		*old = v & _PAGE_PRESENT;
-		v &= ~_PAGE_PRESENT;
-	} else	/* presume this has been called with clear==true previously */
-		v |= *old;
-	set_pmd(pmd, __pmd(v));
+		*old = v;
+		new_pmd = pmd_mknotpresent(*pmd);
+	} else {
+		/* Presume this has been called with clear==true previously */
+		new_pmd = __pmd(*old);
+	}
+	set_pmd(pmd, new_pmd);
 }
 
 static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old)
 {
 	pteval_t v = pte_val(*pte);
 	if (clear) {
-		*old = v & _PAGE_PRESENT;
-		v &= ~_PAGE_PRESENT;
-	} else	/* presume this has been called with clear==true previously */
-		v |= *old;
-	set_pte_atomic(pte, __pte(v));
+		*old = v;
+		/* Nothing should care about address */
+		pte_clear(&init_mm, 0, pte);
+	} else {
+		/* Presume this has been called with clear==true previously */
+		set_pte_atomic(pte, __pte(*old));
+	}
 }
 
 static int clear_page_presence(struct kmmio_fault_page *f, bool clear)

From eb993211b9d7856a9ab8c487c701c84103842713 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Mon, 13 Aug 2018 10:15:16 -0700
Subject: [PATCH 311/519] x86/speculation/l1tf: Fix up CPU feature flags

In linux-4.4.y, the definition of X86_FEATURE_RETPOLINE and
X86_FEATURE_RETPOLINE_AMD is different from the upstream
definition. Result is an overlap with the newly introduced
X86_FEATURE_L1TF_PTEINV. Update RETPOLINE definitions to match
upstream definitions to improve alignment with upstream code.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/cpufeatures.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 123a7105db1b..dd2269dcbc47 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -193,12 +193,12 @@
 #define X86_FEATURE_HW_PSTATE	( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
 
+#define X86_FEATURE_RETPOLINE	( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
+
 #define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
 #define X86_FEATURE_RSB_CTXSW	( 7*32+19) /* "" Fill RSB on context switches */
 
-#define X86_FEATURE_RETPOLINE	( 7*32+29) /* "" Generic Retpoline mitigation for Spectre variant 2 */
-#define X86_FEATURE_RETPOLINE_AMD ( 7*32+30) /* "" AMD Retpoline mitigation for Spectre variant 2 */
-
 #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
 #define X86_FEATURE_SSBD	( 7*32+17) /* Speculative Store Bypass Disable */
 

From 4b90ff885c6cc88795b678414aaf5d7b0153a5dc Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 14 Aug 2018 20:50:47 +0200
Subject: [PATCH 312/519] x86/init: fix build with CONFIG_SWAP=n

commit 792adb90fa724ce07c0171cbc96b9215af4b1045 upstream.

The introduction of generic_max_swapfile_size and arch-specific versions has
broken linking on x86 with CONFIG_SWAP=n due to undefined reference to
'generic_max_swapfile_size'. Fix it by compiling the x86-specific
max_swapfile_size() only with CONFIG_SWAP=y.

Reported-by: Tomas Pruzina <pruzinat@gmail.com>
Fixes: 377eeaa8e11f ("x86/speculation/l1tf: Limit swap file size to MAX_PA/2")
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: stable@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/mm/init.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 3a8e9abe0667..4954a6cef50a 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -770,6 +770,7 @@ void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
 	__pte2cachemode_tbl[entry] = cache;
 }
 
+#ifdef CONFIG_SWAP
 unsigned long max_swapfile_size(void)
 {
 	unsigned long pages;
@@ -790,3 +791,4 @@ unsigned long max_swapfile_size(void)
 	}
 	return pages;
 }
+#endif

From 8f2adf3d2118cc0822b83a7bb43475f9149a1d26 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Sat, 14 Jul 2018 21:56:13 +0200
Subject: [PATCH 313/519] x86/speculation/l1tf: Unbreak
 !__HAVE_ARCH_PFN_MODIFY_ALLOWED architectures

commit 6c26fcd2abfe0a56bbd95271fce02df2896cfd24 upstream.

pfn_modify_allowed() and arch_has_pfn_modify_check() are outside of the
!__ASSEMBLY__ section in include/asm-generic/pgtable.h, which confuses
assembler on archs that don't have __HAVE_ARCH_PFN_MODIFY_ALLOWED (e.g.
ia64) and breaks build:

    include/asm-generic/pgtable.h: Assembler messages:
    include/asm-generic/pgtable.h:538: Error: Unknown opcode `static inline bool pfn_modify_allowed(unsigned long pfn,pgprot_t prot)'
    include/asm-generic/pgtable.h:540: Error: Unknown opcode `return true'
    include/asm-generic/pgtable.h:543: Error: Unknown opcode `static inline bool arch_has_pfn_modify_check(void)'
    include/asm-generic/pgtable.h:545: Error: Unknown opcode `return false'
    arch/ia64/kernel/entry.S:69: Error: `mov' does not fit into bundle

Move those two static inlines into the !__ASSEMBLY__ section so that they
don't confuse the asm build pass.

Fixes: 42e4089c7890 ("x86/speculation/l1tf: Disallow non privileged high MMIO PROT_NONE mappings")
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
[groeck: Context changes]
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 include/asm-generic/pgtable.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 976f749099ef..dabecb661264 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -799,12 +799,6 @@ static inline int pmd_free_pte_page(pmd_t *pmd)
 }
 #endif	/* CONFIG_HAVE_ARCH_HUGE_VMAP */
 
-#endif /* !__ASSEMBLY__ */
-
-#ifndef io_remap_pfn_range
-#define io_remap_pfn_range remap_pfn_range
-#endif
-
 #ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED
 static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
 {
@@ -815,6 +809,12 @@ static inline bool arch_has_pfn_modify_check(void)
 {
 	return false;
 }
+#endif /* !_HAVE_ARCH_PFN_MODIFY_ALLOWED */
+
+#endif /* !__ASSEMBLY__ */
+
+#ifndef io_remap_pfn_range
+#define io_remap_pfn_range remap_pfn_range
 #endif
 
 #endif /* _ASM_GENERIC_PGTABLE_H */

From 30a97c1e2dc39f45d9deeeccc2733278fc285d5e Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 15 Aug 2018 17:42:11 +0200
Subject: [PATCH 314/519] Linux 4.4.148

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index ee92a12e3a4b..9b795164122e 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 4
-SUBLEVEL = 147
+SUBLEVEL = 148
 EXTRAVERSION =
 NAME = Blurry Fish Butt
 

From 438604aa025a449e5c994ba4c824cc7267b8ccd2 Mon Sep 17 00:00:00 2001
From: Toshi Kani <toshi.kani@hpe.com>
Date: Wed, 27 Jun 2018 08:13:46 -0600
Subject: [PATCH 315/519] x86/mm: Disable ioremap free page handling on x86-PAE

commit f967db0b9ed44ec3057a28f3b28efc51df51b835 upstream.

ioremap() supports pmd mappings on x86-PAE.  However, kernel's pmd
tables are not shared among processes on x86-PAE.  Therefore, any
update to sync'd pmd entries need re-syncing.  Freeing a pte page
also leads to a vmalloc fault and hits the BUG_ON in vmalloc_sync_one().

Disable free page handling on x86-PAE.  pud_free_pmd_page() and
pmd_free_pte_page() simply return 0 if a given pud/pmd entry is present.
This assures that ioremap() does not update sync'd pmd entries at the
cost of falling back to pte mappings.

Fixes: 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces")
Reported-by: Joerg Roedel <joro@8bytes.org>
Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: mhocko@suse.com
Cc: akpm@linux-foundation.org
Cc: hpa@zytor.com
Cc: cpandya@codeaurora.org
Cc: linux-mm@kvack.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: stable@vger.kernel.org
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <stable@vger.kernel.org>
Link: https://lkml.kernel.org/r/20180627141348.21777-2-toshi.kani@hpe.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/mm/pgtable.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 08e94b6139ab..8e0378a00d50 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -676,6 +676,7 @@ int pmd_clear_huge(pmd_t *pmd)
 	return 0;
 }
 
+#ifdef CONFIG_X86_64
 /**
  * pud_free_pmd_page - Clear pud entry and free pmd page.
  * @pud: Pointer to a PUD.
@@ -723,4 +724,22 @@ int pmd_free_pte_page(pmd_t *pmd)
 
 	return 1;
 }
+
+#else /* !CONFIG_X86_64 */
+
+int pud_free_pmd_page(pud_t *pud)
+{
+	return pud_none(*pud);
+}
+
+/*
+ * Disable free page handling on x86-PAE. This assures that ioremap()
+ * does not update sync'd pmd entries. See vmalloc_sync_one().
+ */
+int pmd_free_pte_page(pmd_t *pmd)
+{
+	return pmd_none(*pmd);
+}
+
+#endif /* CONFIG_X86_64 */
 #endif	/* CONFIG_HAVE_ARCH_HUGE_VMAP */

From 42962538cd9fe281a6e8602f22c7b1e218ed812a Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 15 Aug 2018 11:58:46 +0200
Subject: [PATCH 316/519] tcp: Fix missing range_truesize enlargement in the
 backport

The 4.4.y stable backport dc6ae4dffd65 for the upstream commit
3d4bf93ac120 ("tcp: detect malicious patterns in
tcp_collapse_ofo_queue()") missed a line that enlarges the
range_truesize value, which broke the whole check.

Fixes: dc6ae4dffd65 ("tcp: detect malicious patterns in tcp_collapse_ofo_queue()")
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Cc: Michal Kubecek <mkubecek@suse.cz>
---
 net/ipv4/tcp_input.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 4a261e078082..9c4c6cd0316e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4835,6 +4835,7 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
 			end = TCP_SKB_CB(skb)->end_seq;
 			range_truesize = skb->truesize;
 		} else {
+			range_truesize += skb->truesize;
 			if (before(TCP_SKB_CB(skb)->seq, start))
 				start = TCP_SKB_CB(skb)->seq;
 			if (after(TCP_SKB_CB(skb)->end_seq, end))

From dcb852a7db98fb702878e811425063cd00e688b2 Mon Sep 17 00:00:00 2001
From: Andrey Konovalov <andreyknvl@google.com>
Date: Tue, 6 Feb 2018 15:36:00 -0800
Subject: [PATCH 317/519] kasan: don't emit builtin calls when sanitization is
 off

commit 0e410e158e5baa1300bdf678cea4f4e0cf9d8b94 upstream.

With KASAN enabled the kernel has two different memset() functions, one
with KASAN checks (memset) and one without (__memset).  KASAN uses some
macro tricks to use the proper version where required.  For example
memset() calls in mm/slub.c are without KASAN checks, since they operate
on poisoned slab object metadata.

The issue is that clang emits memset() calls even when there is no
memset() in the source code.  They get linked with improper memset()
implementation and the kernel fails to boot due to a huge amount of KASAN
reports during early boot stages.

The solution is to add -fno-builtin flag for files with KASAN_SANITIZE :=
n marker.

Link: http://lkml.kernel.org/r/8ffecfffe04088c52c42b92739c2bd8a0bcb3f5e.1516384594.git.andreyknvl@google.com
Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
Acked-by: Nick Desaulniers <ndesaulniers@google.com>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Michal Marek <michal.lkml@markovi.net>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
[ Nick : Backported to 4.4 avoiding KUBSAN ]
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Makefile               | 3 ++-
 scripts/Makefile.kasan | 3 +++
 scripts/Makefile.lib   | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 9b795164122e..cacb45c3173a 100644
--- a/Makefile
+++ b/Makefile
@@ -418,7 +418,8 @@ export MAKE AWK GENKSYMS INSTALLKERNEL PERL PYTHON UTS_MACHINE
 export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS
 
 export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS
-export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_GCOV CFLAGS_KASAN
+export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_GCOV
+export CFLAGS_KASAN CFLAGS_KASAN_NOSANITIZE
 export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE
 export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE
 export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL
diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan
index 37323b0df374..2624d4bf9a45 100644
--- a/scripts/Makefile.kasan
+++ b/scripts/Makefile.kasan
@@ -28,4 +28,7 @@ else
         CFLAGS_KASAN := $(CFLAGS_KASAN_MINIMAL)
     endif
 endif
+
+CFLAGS_KASAN_NOSANITIZE := -fno-builtin
+
 endif
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 24914e7de944..a2d0e6d32659 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -126,7 +126,7 @@ endif
 ifeq ($(CONFIG_KASAN),y)
 _c_flags += $(if $(patsubst n%,, \
 		$(KASAN_SANITIZE_$(basetarget).o)$(KASAN_SANITIZE)y), \
-		$(CFLAGS_KASAN))
+		$(CFLAGS_KASAN), $(CFLAGS_KASAN_NOSANITIZE))
 endif
 
 # If building the kernel in a separate objtree expand all occurrences

From 3abc229cfc033612acf27a9efbe9a44ea9004592 Mon Sep 17 00:00:00 2001
From: Liwei Song <liwei.song@windriver.com>
Date: Tue, 13 Jun 2017 00:59:53 -0400
Subject: [PATCH 318/519] i2c: ismt: fix wrong device address when unmap the
 data buffer

commit 17e83549e199d89aace7788a9f11c108671eecf5 upstream.

Fix the following kernel bug:

kernel BUG at drivers/iommu/intel-iommu.c:3260!
invalid opcode: 0000 [#5] PREEMPT SMP
Hardware name: Intel Corp. Harcuvar/Server, BIOS HAVLCRB0.X64.0013.D39.1608311820 08/31/2016
task: ffff880175389950 ti: ffff880176bec000 task.ti: ffff880176bec000
RIP: 0010:[<ffffffff8150a83b>]  [<ffffffff8150a83b>] intel_unmap+0x25b/0x260
RSP: 0018:ffff880176bef5e8  EFLAGS: 00010296
RAX: 0000000000000024 RBX: ffff8800773c7c88 RCX: 000000000000ce04
RDX: 0000000080000000 RSI: 0000000000000000 RDI: 0000000000000009
RBP: ffff880176bef638 R08: 0000000000000010 R09: 0000000000000004
R10: ffff880175389c78 R11: 0000000000000a4f R12: ffff8800773c7868
R13: 00000000ffffac88 R14: ffff8800773c7818 R15: 0000000000000001
FS:  00007fef21258700(0000) GS:ffff88017b5c0000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000000000066d6d8 CR3: 000000007118c000 CR4: 00000000003406e0
Stack:
 00000000ffffac88 ffffffff8199867f ffff880176bef5f8 ffff880100000030
 ffff880176bef668 ffff8800773c7c88 ffff880178288098 ffff8800772c0010
 ffff8800773c7818 0000000000000001 ffff880176bef648 ffffffff8150a86e
Call Trace:
 [<ffffffff8199867f>] ? printk+0x46/0x48
 [<ffffffff8150a86e>] intel_unmap_page+0xe/0x10
 [<ffffffffa039d99b>] ismt_access+0x27b/0x8fa [i2c_ismt]
 [<ffffffff81554420>] ? __pm_runtime_suspend+0xa0/0xa0
 [<ffffffff815544a0>] ? pm_suspend_timer_fn+0x80/0x80
 [<ffffffff81554420>] ? __pm_runtime_suspend+0xa0/0xa0
 [<ffffffff815544a0>] ? pm_suspend_timer_fn+0x80/0x80
 [<ffffffff8143dfd0>] ? pci_bus_read_dev_vendor_id+0xf0/0xf0
 [<ffffffff8172b36c>] i2c_smbus_xfer+0xec/0x4b0
 [<ffffffff810aa4d5>] ? vprintk_emit+0x345/0x530
 [<ffffffffa038936b>] i2cdev_ioctl_smbus+0x12b/0x240 [i2c_dev]
 [<ffffffff810aa829>] ? vprintk_default+0x29/0x40
 [<ffffffffa0389b33>] i2cdev_ioctl+0x63/0x1ec [i2c_dev]
 [<ffffffff811b04c8>] do_vfs_ioctl+0x328/0x5d0
 [<ffffffff8119d8ec>] ? vfs_write+0x11c/0x190
 [<ffffffff8109d449>] ? rt_up_read+0x19/0x20
 [<ffffffff811b07f1>] SyS_ioctl+0x81/0xa0
 [<ffffffff819a351b>] system_call_fastpath+0x16/0x6e

This happen When run "i2cdetect -y 0" detect SMBus iSMT adapter.

After finished I2C block read/write, when unmap the data buffer,
a wrong device address was pass to dma_unmap_single().

To fix this, give dma_unmap_single() the "dev" parameter, just like
what dma_map_single() does, then unmap can find the right devices.

Fixes: 13f35ac14cd0 ("i2c: Adding support for Intel iSMT SMBus 2.0 host controller")
Signed-off-by: Liwei Song <liwei.song@windriver.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/i2c/busses/i2c-ismt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c
index 1111cb966a44..fa2b58142cde 100644
--- a/drivers/i2c/busses/i2c-ismt.c
+++ b/drivers/i2c/busses/i2c-ismt.c
@@ -587,7 +587,7 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr,
 
 	/* unmap the data buffer */
 	if (dma_size != 0)
-		dma_unmap_single(&adap->dev, dma_addr, dma_size, dma_direction);
+		dma_unmap_single(dev, dma_addr, dma_size, dma_direction);
 
 	if (unlikely(!time_left)) {
 		dev_err(dev, "completion wait timed out\n");

From 293cce718b133959f04b94148326334c9bfab5ad Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 1 Jul 2018 19:46:06 -0700
Subject: [PATCH 319/519] kbuild: verify that $DEPMOD is installed

commit 934193a654c1f4d0643ddbf4b2529b508cae926e upstream.

Verify that 'depmod' ($DEPMOD) is installed.
This is a partial revert of commit 620c231c7a7f
("kbuild: do not check for ancient modutils tools").

Also update Documentation/process/changes.rst to refer to
kmod instead of module-init-tools.

Fixes kernel bugzilla #198965:
https://bugzilla.kernel.org/show_bug.cgi?id=198965

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Lucas De Marchi <lucas.demarchi@profusion.mobi>
Cc: Lucas De Marchi <lucas.de.marchi@gmail.com>
Cc: Michal Marek <michal.lkml@markovi.net>
Cc: Jessica Yu <jeyu@kernel.org>
Cc: Chih-Wei Huang <cwhuang@linux.org.tw>
Cc: stable@vger.kernel.org # any kernel since 2012
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/Changes | 17 ++++++-----------
 scripts/depmod.sh     |  8 +++++++-
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/Documentation/Changes b/Documentation/Changes
index ec97b77c8b00..f25649ffb892 100644
--- a/Documentation/Changes
+++ b/Documentation/Changes
@@ -25,7 +25,7 @@ o  GNU C                  3.2                     # gcc --version
 o  GNU make               3.80                    # make --version
 o  binutils               2.12                    # ld -v
 o  util-linux             2.10o                   # fdformat --version
-o  module-init-tools      0.9.10                  # depmod -V
+o  kmod                   13                      # depmod -V
 o  e2fsprogs              1.41.4                  # e2fsck -V
 o  jfsutils               1.1.3                   # fsck.jfs -V
 o  reiserfsprogs          3.6.3                   # reiserfsck -V
@@ -132,12 +132,6 @@ is not build with CONFIG_KALLSYMS and you have no way to rebuild and
 reproduce the Oops with that option, then you can still decode that Oops
 with ksymoops.
 
-Module-Init-Tools
------------------
-
-A new module loader is now in the kernel that requires module-init-tools
-to use.  It is backward compatible with the 2.4.x series kernels.
-
 Mkinitrd
 --------
 
@@ -319,14 +313,15 @@ Util-linux
 ----------
 o  <ftp://ftp.kernel.org/pub/linux/utils/util-linux/>
 
+Kmod
+----
+o  <https://www.kernel.org/pub/linux/utils/kernel/kmod/>
+o  <https://git.kernel.org/pub/scm/utils/kernel/kmod/kmod.git>
+
 Ksymoops
 --------
 o  <ftp://ftp.kernel.org/pub/linux/utils/kernel/ksymoops/v2.4/>
 
-Module-Init-Tools
------------------
-o  <ftp://ftp.kernel.org/pub/linux/kernel/people/rusty/modules/>
-
 Mkinitrd
 --------
 o  <https://code.launchpad.net/initrd-tools/main>
diff --git a/scripts/depmod.sh b/scripts/depmod.sh
index 122599b1c13b..ea1e96921e3b 100755
--- a/scripts/depmod.sh
+++ b/scripts/depmod.sh
@@ -10,10 +10,16 @@ DEPMOD=$1
 KERNELRELEASE=$2
 SYMBOL_PREFIX=$3
 
-if ! test -r System.map -a -x "$DEPMOD"; then
+if ! test -r System.map ; then
 	exit 0
 fi
 
+if [ -z $(command -v $DEPMOD) ]; then
+	echo "'make modules_install' requires $DEPMOD. Please install it." >&2
+	echo "This is probably in the kmod package." >&2
+	exit 1
+fi
+
 # older versions of depmod don't support -P <symbol-prefix>
 # support was added in module-init-tools 3.13
 if test -n "$SYMBOL_PREFIX"; then

From 9054a54766f7cd3e33086c47701ad19eb1d0d404 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 18 Jun 2018 10:22:37 -0700
Subject: [PATCH 320/519] crypto: vmac - require a block cipher with 128-bit
 block size

commit 73bf20ef3df262026c3470241ae4ac8196943ffa upstream.

The VMAC template assumes the block cipher has a 128-bit block size, but
it failed to check for that.  Thus it was possible to instantiate it
using a 64-bit block size cipher, e.g. "vmac(cast5)", causing
uninitialized memory to be used.

Add the needed check when instantiating the template.

Fixes: f1939f7c5645 ("crypto: vmac - New hash algorithm for intel_txt support")
Cc: <stable@vger.kernel.org> # v2.6.32+
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 crypto/vmac.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/crypto/vmac.c b/crypto/vmac.c
index df76a816cfb2..3034454a3713 100644
--- a/crypto/vmac.c
+++ b/crypto/vmac.c
@@ -655,6 +655,10 @@ static int vmac_create(struct crypto_template *tmpl, struct rtattr **tb)
 	if (IS_ERR(alg))
 		return PTR_ERR(alg);
 
+	err = -EINVAL;
+	if (alg->cra_blocksize != 16)
+		goto out_put_alg;
+
 	inst = shash_alloc_instance("vmac", alg);
 	err = PTR_ERR(inst);
 	if (IS_ERR(inst))

From 335e988310f9bf17b94001945f0c6985e54c88b4 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 18 Jun 2018 10:22:38 -0700
Subject: [PATCH 321/519] crypto: vmac - separate tfm and request context

commit bb29648102335586e9a66289a1d98a0cb392b6e5 upstream.

syzbot reported a crash in vmac_final() when multiple threads
concurrently use the same "vmac(aes)" transform through AF_ALG.  The bug
is pretty fundamental: the VMAC template doesn't separate per-request
state from per-tfm (per-key) state like the other hash algorithms do,
but rather stores it all in the tfm context.  That's wrong.

Also, vmac_final() incorrectly zeroes most of the state including the
derived keys and cached pseudorandom pad.  Therefore, only the first
VMAC invocation with a given key calculates the correct digest.

Fix these bugs by splitting the per-tfm state from the per-request state
and using the proper init/update/final sequencing for requests.

Reproducer for the crash:

    #include <linux/if_alg.h>
    #include <sys/socket.h>
    #include <unistd.h>

    int main()
    {
            int fd;
            struct sockaddr_alg addr = {
                    .salg_type = "hash",
                    .salg_name = "vmac(aes)",
            };
            char buf[256] = { 0 };

            fd = socket(AF_ALG, SOCK_SEQPACKET, 0);
            bind(fd, (void *)&addr, sizeof(addr));
            setsockopt(fd, SOL_ALG, ALG_SET_KEY, buf, 16);
            fork();
            fd = accept(fd, NULL, NULL);
            for (;;)
                    write(fd, buf, 256);
    }

The immediate cause of the crash is that vmac_ctx_t.partial_size exceeds
VMAC_NHBYTES, causing vmac_final() to memset() a negative length.

Reported-by: syzbot+264bca3a6e8d645550d3@syzkaller.appspotmail.com
Fixes: f1939f7c5645 ("crypto: vmac - New hash algorithm for intel_txt support")
Cc: <stable@vger.kernel.org> # v2.6.32+
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 crypto/vmac.c         | 428 +++++++++++++++++++-----------------------
 include/crypto/vmac.h |  63 -------
 2 files changed, 191 insertions(+), 300 deletions(-)
 delete mode 100644 include/crypto/vmac.h

diff --git a/crypto/vmac.c b/crypto/vmac.c
index 3034454a3713..bb2fc787d615 100644
--- a/crypto/vmac.c
+++ b/crypto/vmac.c
@@ -1,6 +1,10 @@
 /*
- * Modified to interface to the Linux kernel
+ * VMAC: Message Authentication Code using Universal Hashing
+ *
+ * Reference: https://tools.ietf.org/html/draft-krovetz-vmac-01
+ *
  * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2018, Google Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -16,14 +20,15 @@
  * Place - Suite 330, Boston, MA 02111-1307 USA.
  */
 
-/* --------------------------------------------------------------------------
- * VMAC and VHASH Implementation by Ted Krovetz (tdk@acm.org) and Wei Dai.
- * This implementation is herby placed in the public domain.
- * The authors offers no warranty. Use at your own risk.
- * Please send bug reports to the authors.
- * Last modified: 17 APR 08, 1700 PDT
- * ----------------------------------------------------------------------- */
+/*
+ * Derived from:
+ *	VMAC and VHASH Implementation by Ted Krovetz (tdk@acm.org) and Wei Dai.
+ *	This implementation is herby placed in the public domain.
+ *	The authors offers no warranty. Use at your own risk.
+ *	Last modified: 17 APR 08, 1700 PDT
+ */
 
+#include <asm/unaligned.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/crypto.h>
@@ -31,9 +36,35 @@
 #include <linux/scatterlist.h>
 #include <asm/byteorder.h>
 #include <crypto/scatterwalk.h>
-#include <crypto/vmac.h>
 #include <crypto/internal/hash.h>
 
+/*
+ * User definable settings.
+ */
+#define VMAC_TAG_LEN	64
+#define VMAC_KEY_SIZE	128/* Must be 128, 192 or 256			*/
+#define VMAC_KEY_LEN	(VMAC_KEY_SIZE/8)
+#define VMAC_NHBYTES	128/* Must 2^i for any 3 < i < 13 Standard = 128*/
+
+/* per-transform (per-key) context */
+struct vmac_tfm_ctx {
+	struct crypto_cipher *cipher;
+	u64 nhkey[(VMAC_NHBYTES/8)+2*(VMAC_TAG_LEN/64-1)];
+	u64 polykey[2*VMAC_TAG_LEN/64];
+	u64 l3key[2*VMAC_TAG_LEN/64];
+};
+
+/* per-request context */
+struct vmac_desc_ctx {
+	union {
+		u8 partial[VMAC_NHBYTES];	/* partial block */
+		__le64 partial_words[VMAC_NHBYTES / 8];
+	};
+	unsigned int partial_size;	/* size of the partial block */
+	bool first_block_processed;
+	u64 polytmp[2*VMAC_TAG_LEN/64];	/* running total of L2-hash */
+};
+
 /*
  * Constants and masks
  */
@@ -318,13 +349,6 @@ static void poly_step_func(u64 *ahi, u64 *alo,
 	} while (0)
 #endif
 
-static void vhash_abort(struct vmac_ctx *ctx)
-{
-	ctx->polytmp[0] = ctx->polykey[0] ;
-	ctx->polytmp[1] = ctx->polykey[1] ;
-	ctx->first_block_processed = 0;
-}
-
 static u64 l3hash(u64 p1, u64 p2, u64 k1, u64 k2, u64 len)
 {
 	u64 rh, rl, t, z = 0;
@@ -364,280 +388,209 @@ static u64 l3hash(u64 p1, u64 p2, u64 k1, u64 k2, u64 len)
 	return rl;
 }
 
-static void vhash_update(const unsigned char *m,
-			unsigned int mbytes, /* Pos multiple of VMAC_NHBYTES */
-			struct vmac_ctx *ctx)
+/* L1 and L2-hash one or more VMAC_NHBYTES-byte blocks */
+static void vhash_blocks(const struct vmac_tfm_ctx *tctx,
+			 struct vmac_desc_ctx *dctx,
+			 const __le64 *mptr, unsigned int blocks)
 {
-	u64 rh, rl, *mptr;
-	const u64 *kptr = (u64 *)ctx->nhkey;
-	int i;
-	u64 ch, cl;
-	u64 pkh = ctx->polykey[0];
-	u64 pkl = ctx->polykey[1];
+	const u64 *kptr = tctx->nhkey;
+	const u64 pkh = tctx->polykey[0];
+	const u64 pkl = tctx->polykey[1];
+	u64 ch = dctx->polytmp[0];
+	u64 cl = dctx->polytmp[1];
+	u64 rh, rl;
 
-	if (!mbytes)
-		return;
-
-	BUG_ON(mbytes % VMAC_NHBYTES);
-
-	mptr = (u64 *)m;
-	i = mbytes / VMAC_NHBYTES;  /* Must be non-zero */
-
-	ch = ctx->polytmp[0];
-	cl = ctx->polytmp[1];
-
-	if (!ctx->first_block_processed) {
-		ctx->first_block_processed = 1;
+	if (!dctx->first_block_processed) {
+		dctx->first_block_processed = true;
 		nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, rh, rl);
 		rh &= m62;
 		ADD128(ch, cl, rh, rl);
 		mptr += (VMAC_NHBYTES/sizeof(u64));
-		i--;
+		blocks--;
 	}
 
-	while (i--) {
+	while (blocks--) {
 		nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, rh, rl);
 		rh &= m62;
 		poly_step(ch, cl, pkh, pkl, rh, rl);
 		mptr += (VMAC_NHBYTES/sizeof(u64));
 	}
 
-	ctx->polytmp[0] = ch;
-	ctx->polytmp[1] = cl;
+	dctx->polytmp[0] = ch;
+	dctx->polytmp[1] = cl;
 }
 
-static u64 vhash(unsigned char m[], unsigned int mbytes,
-			u64 *tagl, struct vmac_ctx *ctx)
+static int vmac_setkey(struct crypto_shash *tfm,
+		       const u8 *key, unsigned int keylen)
 {
-	u64 rh, rl, *mptr;
-	const u64 *kptr = (u64 *)ctx->nhkey;
-	int i, remaining;
-	u64 ch, cl;
-	u64 pkh = ctx->polykey[0];
-	u64 pkl = ctx->polykey[1];
+	struct vmac_tfm_ctx *tctx = crypto_shash_ctx(tfm);
+	__be64 out[2];
+	u8 in[16] = { 0 };
+	unsigned int i;
+	int err;
 
-	mptr = (u64 *)m;
-	i = mbytes / VMAC_NHBYTES;
-	remaining = mbytes % VMAC_NHBYTES;
-
-	if (ctx->first_block_processed) {
-		ch = ctx->polytmp[0];
-		cl = ctx->polytmp[1];
-	} else if (i) {
-		nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, ch, cl);
-		ch &= m62;
-		ADD128(ch, cl, pkh, pkl);
-		mptr += (VMAC_NHBYTES/sizeof(u64));
-		i--;
-	} else if (remaining) {
-		nh_16(mptr, kptr, 2*((remaining+15)/16), ch, cl);
-		ch &= m62;
-		ADD128(ch, cl, pkh, pkl);
-		mptr += (VMAC_NHBYTES/sizeof(u64));
-		goto do_l3;
-	} else {/* Empty String */
-		ch = pkh; cl = pkl;
-		goto do_l3;
+	if (keylen != VMAC_KEY_LEN) {
+		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
 	}
 
-	while (i--) {
-		nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, rh, rl);
-		rh &= m62;
-		poly_step(ch, cl, pkh, pkl, rh, rl);
-		mptr += (VMAC_NHBYTES/sizeof(u64));
-	}
-	if (remaining) {
-		nh_16(mptr, kptr, 2*((remaining+15)/16), rh, rl);
-		rh &= m62;
-		poly_step(ch, cl, pkh, pkl, rh, rl);
-	}
-
-do_l3:
-	vhash_abort(ctx);
-	remaining *= 8;
-	return l3hash(ch, cl, ctx->l3key[0], ctx->l3key[1], remaining);
-}
-
-static u64 vmac(unsigned char m[], unsigned int mbytes,
-			const unsigned char n[16], u64 *tagl,
-			struct vmac_ctx_t *ctx)
-{
-	u64 *in_n, *out_p;
-	u64 p, h;
-	int i;
-
-	in_n = ctx->__vmac_ctx.cached_nonce;
-	out_p = ctx->__vmac_ctx.cached_aes;
-
-	i = n[15] & 1;
-	if ((*(u64 *)(n+8) != in_n[1]) || (*(u64 *)(n) != in_n[0])) {
-		in_n[0] = *(u64 *)(n);
-		in_n[1] = *(u64 *)(n+8);
-		((unsigned char *)in_n)[15] &= 0xFE;
-		crypto_cipher_encrypt_one(ctx->child,
-			(unsigned char *)out_p, (unsigned char *)in_n);
-
-		((unsigned char *)in_n)[15] |= (unsigned char)(1-i);
-	}
-	p = be64_to_cpup(out_p + i);
-	h = vhash(m, mbytes, (u64 *)0, &ctx->__vmac_ctx);
-	return le64_to_cpu(p + h);
-}
-
-static int vmac_set_key(unsigned char user_key[], struct vmac_ctx_t *ctx)
-{
-	u64 in[2] = {0}, out[2];
-	unsigned i;
-	int err = 0;
-
-	err = crypto_cipher_setkey(ctx->child, user_key, VMAC_KEY_LEN);
+	err = crypto_cipher_setkey(tctx->cipher, key, keylen);
 	if (err)
 		return err;
 
 	/* Fill nh key */
-	((unsigned char *)in)[0] = 0x80;
-	for (i = 0; i < sizeof(ctx->__vmac_ctx.nhkey)/8; i += 2) {
-		crypto_cipher_encrypt_one(ctx->child,
-			(unsigned char *)out, (unsigned char *)in);
-		ctx->__vmac_ctx.nhkey[i] = be64_to_cpup(out);
-		ctx->__vmac_ctx.nhkey[i+1] = be64_to_cpup(out+1);
-		((unsigned char *)in)[15] += 1;
+	in[0] = 0x80;
+	for (i = 0; i < ARRAY_SIZE(tctx->nhkey); i += 2) {
+		crypto_cipher_encrypt_one(tctx->cipher, (u8 *)out, in);
+		tctx->nhkey[i] = be64_to_cpu(out[0]);
+		tctx->nhkey[i+1] = be64_to_cpu(out[1]);
+		in[15]++;
 	}
 
 	/* Fill poly key */
-	((unsigned char *)in)[0] = 0xC0;
-	in[1] = 0;
-	for (i = 0; i < sizeof(ctx->__vmac_ctx.polykey)/8; i += 2) {
-		crypto_cipher_encrypt_one(ctx->child,
-			(unsigned char *)out, (unsigned char *)in);
-		ctx->__vmac_ctx.polytmp[i] =
-			ctx->__vmac_ctx.polykey[i] =
-				be64_to_cpup(out) & mpoly;
-		ctx->__vmac_ctx.polytmp[i+1] =
-			ctx->__vmac_ctx.polykey[i+1] =
-				be64_to_cpup(out+1) & mpoly;
-		((unsigned char *)in)[15] += 1;
+	in[0] = 0xC0;
+	in[15] = 0;
+	for (i = 0; i < ARRAY_SIZE(tctx->polykey); i += 2) {
+		crypto_cipher_encrypt_one(tctx->cipher, (u8 *)out, in);
+		tctx->polykey[i] = be64_to_cpu(out[0]) & mpoly;
+		tctx->polykey[i+1] = be64_to_cpu(out[1]) & mpoly;
+		in[15]++;
 	}
 
 	/* Fill ip key */
-	((unsigned char *)in)[0] = 0xE0;
-	in[1] = 0;
-	for (i = 0; i < sizeof(ctx->__vmac_ctx.l3key)/8; i += 2) {
+	in[0] = 0xE0;
+	in[15] = 0;
+	for (i = 0; i < ARRAY_SIZE(tctx->l3key); i += 2) {
 		do {
-			crypto_cipher_encrypt_one(ctx->child,
-				(unsigned char *)out, (unsigned char *)in);
-			ctx->__vmac_ctx.l3key[i] = be64_to_cpup(out);
-			ctx->__vmac_ctx.l3key[i+1] = be64_to_cpup(out+1);
-			((unsigned char *)in)[15] += 1;
-		} while (ctx->__vmac_ctx.l3key[i] >= p64
-			|| ctx->__vmac_ctx.l3key[i+1] >= p64);
+			crypto_cipher_encrypt_one(tctx->cipher, (u8 *)out, in);
+			tctx->l3key[i] = be64_to_cpu(out[0]);
+			tctx->l3key[i+1] = be64_to_cpu(out[1]);
+			in[15]++;
+		} while (tctx->l3key[i] >= p64 || tctx->l3key[i+1] >= p64);
 	}
 
-	/* Invalidate nonce/aes cache and reset other elements */
-	ctx->__vmac_ctx.cached_nonce[0] = (u64)-1; /* Ensure illegal nonce */
-	ctx->__vmac_ctx.cached_nonce[1] = (u64)0;  /* Ensure illegal nonce */
-	ctx->__vmac_ctx.first_block_processed = 0;
-
-	return err;
-}
-
-static int vmac_setkey(struct crypto_shash *parent,
-		const u8 *key, unsigned int keylen)
-{
-	struct vmac_ctx_t *ctx = crypto_shash_ctx(parent);
-
-	if (keylen != VMAC_KEY_LEN) {
-		crypto_shash_set_flags(parent, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	return vmac_set_key((u8 *)key, ctx);
-}
-
-static int vmac_init(struct shash_desc *pdesc)
-{
-	return 0;
-}
-
-static int vmac_update(struct shash_desc *pdesc, const u8 *p,
-		unsigned int len)
-{
-	struct crypto_shash *parent = pdesc->tfm;
-	struct vmac_ctx_t *ctx = crypto_shash_ctx(parent);
-	int expand;
-	int min;
-
-	expand = VMAC_NHBYTES - ctx->partial_size > 0 ?
-			VMAC_NHBYTES - ctx->partial_size : 0;
-
-	min = len < expand ? len : expand;
-
-	memcpy(ctx->partial + ctx->partial_size, p, min);
-	ctx->partial_size += min;
-
-	if (len < expand)
-		return 0;
-
-	vhash_update(ctx->partial, VMAC_NHBYTES, &ctx->__vmac_ctx);
-	ctx->partial_size = 0;
-
-	len -= expand;
-	p += expand;
-
-	if (len % VMAC_NHBYTES) {
-		memcpy(ctx->partial, p + len - (len % VMAC_NHBYTES),
-			len % VMAC_NHBYTES);
-		ctx->partial_size = len % VMAC_NHBYTES;
-	}
-
-	vhash_update(p, len - len % VMAC_NHBYTES, &ctx->__vmac_ctx);
-
 	return 0;
 }
 
-static int vmac_final(struct shash_desc *pdesc, u8 *out)
+static int vmac_init(struct shash_desc *desc)
 {
-	struct crypto_shash *parent = pdesc->tfm;
-	struct vmac_ctx_t *ctx = crypto_shash_ctx(parent);
-	vmac_t mac;
-	u8 nonce[16] = {};
+	const struct vmac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+	struct vmac_desc_ctx *dctx = shash_desc_ctx(desc);
 
-	/* vmac() ends up accessing outside the array bounds that
-	 * we specify.  In appears to access up to the next 2-word
-	 * boundary.  We'll just be uber cautious and zero the
-	 * unwritten bytes in the buffer.
-	 */
-	if (ctx->partial_size) {
-		memset(ctx->partial + ctx->partial_size, 0,
-			VMAC_NHBYTES - ctx->partial_size);
+	dctx->partial_size = 0;
+	dctx->first_block_processed = false;
+	memcpy(dctx->polytmp, tctx->polykey, sizeof(dctx->polytmp));
+	return 0;
+}
+
+static int vmac_update(struct shash_desc *desc, const u8 *p, unsigned int len)
+{
+	const struct vmac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+	struct vmac_desc_ctx *dctx = shash_desc_ctx(desc);
+	unsigned int n;
+
+	if (dctx->partial_size) {
+		n = min(len, VMAC_NHBYTES - dctx->partial_size);
+		memcpy(&dctx->partial[dctx->partial_size], p, n);
+		dctx->partial_size += n;
+		p += n;
+		len -= n;
+		if (dctx->partial_size == VMAC_NHBYTES) {
+			vhash_blocks(tctx, dctx, dctx->partial_words, 1);
+			dctx->partial_size = 0;
+		}
 	}
-	mac = vmac(ctx->partial, ctx->partial_size, nonce, NULL, ctx);
-	memcpy(out, &mac, sizeof(vmac_t));
-	memzero_explicit(&mac, sizeof(vmac_t));
-	memset(&ctx->__vmac_ctx, 0, sizeof(struct vmac_ctx));
-	ctx->partial_size = 0;
+
+	if (len >= VMAC_NHBYTES) {
+		n = round_down(len, VMAC_NHBYTES);
+		/* TODO: 'p' may be misaligned here */
+		vhash_blocks(tctx, dctx, (const __le64 *)p, n / VMAC_NHBYTES);
+		p += n;
+		len -= n;
+	}
+
+	if (len) {
+		memcpy(dctx->partial, p, len);
+		dctx->partial_size = len;
+	}
+
+	return 0;
+}
+
+static u64 vhash_final(const struct vmac_tfm_ctx *tctx,
+		       struct vmac_desc_ctx *dctx)
+{
+	unsigned int partial = dctx->partial_size;
+	u64 ch = dctx->polytmp[0];
+	u64 cl = dctx->polytmp[1];
+
+	/* L1 and L2-hash the final block if needed */
+	if (partial) {
+		/* Zero-pad to next 128-bit boundary */
+		unsigned int n = round_up(partial, 16);
+		u64 rh, rl;
+
+		memset(&dctx->partial[partial], 0, n - partial);
+		nh_16(dctx->partial_words, tctx->nhkey, n / 8, rh, rl);
+		rh &= m62;
+		if (dctx->first_block_processed)
+			poly_step(ch, cl, tctx->polykey[0], tctx->polykey[1],
+				  rh, rl);
+		else
+			ADD128(ch, cl, rh, rl);
+	}
+
+	/* L3-hash the 128-bit output of L2-hash */
+	return l3hash(ch, cl, tctx->l3key[0], tctx->l3key[1], partial * 8);
+}
+
+static int vmac_final(struct shash_desc *desc, u8 *out)
+{
+	const struct vmac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+	struct vmac_desc_ctx *dctx = shash_desc_ctx(desc);
+	static const u8 nonce[16] = {}; /* TODO: this is insecure */
+	union {
+		u8 bytes[16];
+		__be64 pads[2];
+	} block;
+	int index;
+	u64 hash, pad;
+
+	/* Finish calculating the VHASH of the message */
+	hash = vhash_final(tctx, dctx);
+
+	/* Generate pseudorandom pad by encrypting the nonce */
+	memcpy(&block, nonce, 16);
+	index = block.bytes[15] & 1;
+	block.bytes[15] &= ~1;
+	crypto_cipher_encrypt_one(tctx->cipher, block.bytes, block.bytes);
+	pad = be64_to_cpu(block.pads[index]);
+
+	/* The VMAC is the sum of VHASH and the pseudorandom pad */
+	put_unaligned_le64(hash + pad, out);
 	return 0;
 }
 
 static int vmac_init_tfm(struct crypto_tfm *tfm)
 {
-	struct crypto_cipher *cipher;
-	struct crypto_instance *inst = (void *)tfm->__crt_alg;
+	struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
 	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
-	struct vmac_ctx_t *ctx = crypto_tfm_ctx(tfm);
+	struct vmac_tfm_ctx *tctx = crypto_tfm_ctx(tfm);
+	struct crypto_cipher *cipher;
 
 	cipher = crypto_spawn_cipher(spawn);
 	if (IS_ERR(cipher))
 		return PTR_ERR(cipher);
 
-	ctx->child = cipher;
+	tctx->cipher = cipher;
 	return 0;
 }
 
 static void vmac_exit_tfm(struct crypto_tfm *tfm)
 {
-	struct vmac_ctx_t *ctx = crypto_tfm_ctx(tfm);
-	crypto_free_cipher(ctx->child);
+	struct vmac_tfm_ctx *tctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_cipher(tctx->cipher);
 }
 
 static int vmac_create(struct crypto_template *tmpl, struct rtattr **tb)
@@ -674,11 +627,12 @@ static int vmac_create(struct crypto_template *tmpl, struct rtattr **tb)
 	inst->alg.base.cra_blocksize = alg->cra_blocksize;
 	inst->alg.base.cra_alignmask = alg->cra_alignmask;
 
-	inst->alg.digestsize = sizeof(vmac_t);
-	inst->alg.base.cra_ctxsize = sizeof(struct vmac_ctx_t);
+	inst->alg.base.cra_ctxsize = sizeof(struct vmac_tfm_ctx);
 	inst->alg.base.cra_init = vmac_init_tfm;
 	inst->alg.base.cra_exit = vmac_exit_tfm;
 
+	inst->alg.descsize = sizeof(struct vmac_desc_ctx);
+	inst->alg.digestsize = VMAC_TAG_LEN / 8;
 	inst->alg.init = vmac_init;
 	inst->alg.update = vmac_update;
 	inst->alg.final = vmac_final;
diff --git a/include/crypto/vmac.h b/include/crypto/vmac.h
deleted file mode 100644
index 6b700c7b2fe1..000000000000
--- a/include/crypto/vmac.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Modified to interface to the Linux kernel
- * Copyright (c) 2009, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-
-#ifndef __CRYPTO_VMAC_H
-#define __CRYPTO_VMAC_H
-
-/* --------------------------------------------------------------------------
- * VMAC and VHASH Implementation by Ted Krovetz (tdk@acm.org) and Wei Dai.
- * This implementation is herby placed in the public domain.
- * The authors offers no warranty. Use at your own risk.
- * Please send bug reports to the authors.
- * Last modified: 17 APR 08, 1700 PDT
- * ----------------------------------------------------------------------- */
-
-/*
- * User definable settings.
- */
-#define VMAC_TAG_LEN	64
-#define VMAC_KEY_SIZE	128/* Must be 128, 192 or 256			*/
-#define VMAC_KEY_LEN	(VMAC_KEY_SIZE/8)
-#define VMAC_NHBYTES	128/* Must 2^i for any 3 < i < 13 Standard = 128*/
-
-/*
- * This implementation uses u32 and u64 as names for unsigned 32-
- * and 64-bit integer types. These are defined in C99 stdint.h. The
- * following may need adaptation if you are not running a C99 or
- * Microsoft C environment.
- */
-struct vmac_ctx {
-	u64 nhkey[(VMAC_NHBYTES/8)+2*(VMAC_TAG_LEN/64-1)];
-	u64 polykey[2*VMAC_TAG_LEN/64];
-	u64 l3key[2*VMAC_TAG_LEN/64];
-	u64 polytmp[2*VMAC_TAG_LEN/64];
-	u64 cached_nonce[2];
-	u64 cached_aes[2];
-	int first_block_processed;
-};
-
-typedef u64 vmac_t;
-
-struct vmac_ctx_t {
-	struct crypto_cipher *child;
-	struct vmac_ctx __vmac_ctx;
-	u8 partial[VMAC_NHBYTES];	/* partial block */
-	int partial_size;		/* size of the partial block */
-};
-
-#endif /* __CRYPTO_VMAC_H */

From a55a2512827fde609f4844be33d7e6dce7b2a4cd Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 23 Jul 2018 10:54:57 -0700
Subject: [PATCH 322/519] crypto: blkcipher - fix crash flushing dcache in
 error path

commit 0868def3e4100591e7a1fdbf3eed1439cc8f7ca3 upstream.

Like the skcipher_walk case:

scatterwalk_done() is only meant to be called after a nonzero number of
bytes have been processed, since scatterwalk_pagedone() will flush the
dcache of the *previous* page.  But in the error case of
blkcipher_walk_done(), e.g. if the input wasn't an integer number of
blocks, scatterwalk_done() was actually called after advancing 0 bytes.
This caused a crash ("BUG: unable to handle kernel paging request")
during '!PageSlab(page)' on architectures like arm and arm64 that define
ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE, provided that the input was
page-aligned as in that case walk->offset == 0.

Fix it by reorganizing blkcipher_walk_done() to skip the
scatterwalk_advance() and scatterwalk_done() if an error has occurred.

This bug was found by syzkaller fuzzing.

Reproducer, assuming ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE:

	#include <linux/if_alg.h>
	#include <sys/socket.h>
	#include <unistd.h>

	int main()
	{
		struct sockaddr_alg addr = {
			.salg_type = "skcipher",
			.salg_name = "ecb(aes-generic)",
		};
		char buffer[4096] __attribute__((aligned(4096))) = { 0 };
		int fd;

		fd = socket(AF_ALG, SOCK_SEQPACKET, 0);
		bind(fd, (void *)&addr, sizeof(addr));
		setsockopt(fd, SOL_ALG, ALG_SET_KEY, buffer, 16);
		fd = accept(fd, NULL, NULL);
		write(fd, buffer, 15);
		read(fd, buffer, 15);
	}

Reported-by: Liu Chao <liuchao741@huawei.com>
Fixes: 5cde0af2a982 ("[CRYPTO] cipher: Added block cipher type")
Cc: <stable@vger.kernel.org> # v2.6.19+
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 crypto/blkcipher.c | 54 ++++++++++++++++++++++------------------------
 1 file changed, 26 insertions(+), 28 deletions(-)

diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c
index dca7bc87dad9..2d08e59b3212 100644
--- a/crypto/blkcipher.c
+++ b/crypto/blkcipher.c
@@ -71,19 +71,18 @@ static inline u8 *blkcipher_get_spot(u8 *start, unsigned int len)
 	return max(start, end_page);
 }
 
-static inline unsigned int blkcipher_done_slow(struct blkcipher_walk *walk,
-					       unsigned int bsize)
+static inline void blkcipher_done_slow(struct blkcipher_walk *walk,
+				       unsigned int bsize)
 {
 	u8 *addr;
 
 	addr = (u8 *)ALIGN((unsigned long)walk->buffer, walk->alignmask + 1);
 	addr = blkcipher_get_spot(addr, bsize);
 	scatterwalk_copychunks(addr, &walk->out, bsize, 1);
-	return bsize;
 }
 
-static inline unsigned int blkcipher_done_fast(struct blkcipher_walk *walk,
-					       unsigned int n)
+static inline void blkcipher_done_fast(struct blkcipher_walk *walk,
+				       unsigned int n)
 {
 	if (walk->flags & BLKCIPHER_WALK_COPY) {
 		blkcipher_map_dst(walk);
@@ -97,49 +96,48 @@ static inline unsigned int blkcipher_done_fast(struct blkcipher_walk *walk,
 
 	scatterwalk_advance(&walk->in, n);
 	scatterwalk_advance(&walk->out, n);
-
-	return n;
 }
 
 int blkcipher_walk_done(struct blkcipher_desc *desc,
 			struct blkcipher_walk *walk, int err)
 {
-	unsigned int nbytes = 0;
+	unsigned int n; /* bytes processed */
+	bool more;
 
-	if (likely(err >= 0)) {
-		unsigned int n = walk->nbytes - err;
+	if (unlikely(err < 0))
+		goto finish;
 
-		if (likely(!(walk->flags & BLKCIPHER_WALK_SLOW)))
-			n = blkcipher_done_fast(walk, n);
-		else if (WARN_ON(err)) {
+	n = walk->nbytes - err;
+	walk->total -= n;
+	more = (walk->total != 0);
+
+	if (likely(!(walk->flags & BLKCIPHER_WALK_SLOW))) {
+		blkcipher_done_fast(walk, n);
+	} else {
+		if (WARN_ON(err)) {
+			/* unexpected case; didn't process all bytes */
 			err = -EINVAL;
-			goto err;
-		} else
-			n = blkcipher_done_slow(walk, n);
-
-		nbytes = walk->total - n;
-		err = 0;
+			goto finish;
+		}
+		blkcipher_done_slow(walk, n);
 	}
 
-	scatterwalk_done(&walk->in, 0, nbytes);
-	scatterwalk_done(&walk->out, 1, nbytes);
+	scatterwalk_done(&walk->in, 0, more);
+	scatterwalk_done(&walk->out, 1, more);
 
-err:
-	walk->total = nbytes;
-	walk->nbytes = nbytes;
-
-	if (nbytes) {
+	if (more) {
 		crypto_yield(desc->flags);
 		return blkcipher_walk_next(desc, walk);
 	}
-
+	err = 0;
+finish:
+	walk->nbytes = 0;
 	if (walk->iv != desc->info)
 		memcpy(desc->info, walk->iv, walk->ivsize);
 	if (walk->buffer != walk->page)
 		kfree(walk->buffer);
 	if (walk->page)
 		free_page((unsigned long)walk->page);
-
 	return err;
 }
 EXPORT_SYMBOL_GPL(blkcipher_walk_done);

From 930787c9cdd7179025f10ef45d9957f1ef38880b Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 23 Jul 2018 10:54:58 -0700
Subject: [PATCH 323/519] crypto: ablkcipher - fix crash flushing dcache in
 error path

commit 318abdfbe708aaaa652c79fb500e9bd60521f9dc upstream.

Like the skcipher_walk and blkcipher_walk cases:

scatterwalk_done() is only meant to be called after a nonzero number of
bytes have been processed, since scatterwalk_pagedone() will flush the
dcache of the *previous* page.  But in the error case of
ablkcipher_walk_done(), e.g. if the input wasn't an integer number of
blocks, scatterwalk_done() was actually called after advancing 0 bytes.
This caused a crash ("BUG: unable to handle kernel paging request")
during '!PageSlab(page)' on architectures like arm and arm64 that define
ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE, provided that the input was
page-aligned as in that case walk->offset == 0.

Fix it by reorganizing ablkcipher_walk_done() to skip the
scatterwalk_advance() and scatterwalk_done() if an error has occurred.

Reported-by: Liu Chao <liuchao741@huawei.com>
Fixes: bf06099db18a ("crypto: skcipher - Add ablkcipher_walk interfaces")
Cc: <stable@vger.kernel.org> # v2.6.35+
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 crypto/ablkcipher.c | 57 +++++++++++++++++++++------------------------
 1 file changed, 26 insertions(+), 31 deletions(-)

diff --git a/crypto/ablkcipher.c b/crypto/ablkcipher.c
index e5b5721809e2..149e7a7f04fe 100644
--- a/crypto/ablkcipher.c
+++ b/crypto/ablkcipher.c
@@ -73,11 +73,9 @@ static inline u8 *ablkcipher_get_spot(u8 *start, unsigned int len)
 	return max(start, end_page);
 }
 
-static inline unsigned int ablkcipher_done_slow(struct ablkcipher_walk *walk,
-						unsigned int bsize)
+static inline void ablkcipher_done_slow(struct ablkcipher_walk *walk,
+					unsigned int n)
 {
-	unsigned int n = bsize;
-
 	for (;;) {
 		unsigned int len_this_page = scatterwalk_pagelen(&walk->out);
 
@@ -89,17 +87,13 @@ static inline unsigned int ablkcipher_done_slow(struct ablkcipher_walk *walk,
 		n -= len_this_page;
 		scatterwalk_start(&walk->out, sg_next(walk->out.sg));
 	}
-
-	return bsize;
 }
 
-static inline unsigned int ablkcipher_done_fast(struct ablkcipher_walk *walk,
-						unsigned int n)
+static inline void ablkcipher_done_fast(struct ablkcipher_walk *walk,
+					unsigned int n)
 {
 	scatterwalk_advance(&walk->in, n);
 	scatterwalk_advance(&walk->out, n);
-
-	return n;
 }
 
 static int ablkcipher_walk_next(struct ablkcipher_request *req,
@@ -109,39 +103,40 @@ int ablkcipher_walk_done(struct ablkcipher_request *req,
 			 struct ablkcipher_walk *walk, int err)
 {
 	struct crypto_tfm *tfm = req->base.tfm;
-	unsigned int nbytes = 0;
+	unsigned int n; /* bytes processed */
+	bool more;
 
-	if (likely(err >= 0)) {
-		unsigned int n = walk->nbytes - err;
+	if (unlikely(err < 0))
+		goto finish;
 
-		if (likely(!(walk->flags & ABLKCIPHER_WALK_SLOW)))
-			n = ablkcipher_done_fast(walk, n);
-		else if (WARN_ON(err)) {
+	n = walk->nbytes - err;
+	walk->total -= n;
+	more = (walk->total != 0);
+
+	if (likely(!(walk->flags & ABLKCIPHER_WALK_SLOW))) {
+		ablkcipher_done_fast(walk, n);
+	} else {
+		if (WARN_ON(err)) {
+			/* unexpected case; didn't process all bytes */
 			err = -EINVAL;
-			goto err;
-		} else
-			n = ablkcipher_done_slow(walk, n);
-
-		nbytes = walk->total - n;
-		err = 0;
+			goto finish;
+		}
+		ablkcipher_done_slow(walk, n);
 	}
 
-	scatterwalk_done(&walk->in, 0, nbytes);
-	scatterwalk_done(&walk->out, 1, nbytes);
+	scatterwalk_done(&walk->in, 0, more);
+	scatterwalk_done(&walk->out, 1, more);
 
-err:
-	walk->total = nbytes;
-	walk->nbytes = nbytes;
-
-	if (nbytes) {
+	if (more) {
 		crypto_yield(req->base.flags);
 		return ablkcipher_walk_next(req, walk);
 	}
-
+	err = 0;
+finish:
+	walk->nbytes = 0;
 	if (walk->iv != req->info)
 		memcpy(req->info, walk->iv, tfm->crt_ablkcipher.ivsize);
 	kfree(walk->iv_buffer);
-
 	return err;
 }
 EXPORT_SYMBOL_GPL(ablkcipher_walk_done);

From 59e68641add22a2b6847b653f8e823ccb4042d0a Mon Sep 17 00:00:00 2001
From: Thierry Escande <thierry.escande@collabora.com>
Date: Fri, 8 Sep 2017 00:13:08 -0500
Subject: [PATCH 324/519] ASoC: Intel: cht_bsw_max98090_ti: Fix jack
 initialization

commit 3bbda5a38601f7675a214be2044e41d7749e6c7b upstream.

If the ts3a227e audio accessory detection hardware is present and its
driver probed, the jack needs to be created before enabling jack
detection in the ts3a227e driver. With this patch, the jack is
instantiated in the max98090 headset init function if the ts3a227e is
present. This fixes a null pointer dereference as the jack detection
enabling function in the ts3a driver was called before the jack is
created.

[minor correction to keep error handling on jack creation the same
as before by Pierre Bossart]

Signed-off-by: Thierry Escande <thierry.escande@collabora.com>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Acked-By: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/intel/boards/cht_bsw_max98090_ti.c | 45 ++++++++++++++------
 1 file changed, 31 insertions(+), 14 deletions(-)

diff --git a/sound/soc/intel/boards/cht_bsw_max98090_ti.c b/sound/soc/intel/boards/cht_bsw_max98090_ti.c
index 4e2fcf188dd1..01a573a063d1 100644
--- a/sound/soc/intel/boards/cht_bsw_max98090_ti.c
+++ b/sound/soc/intel/boards/cht_bsw_max98090_ti.c
@@ -131,23 +131,19 @@ static int cht_codec_init(struct snd_soc_pcm_runtime *runtime)
 	struct cht_mc_private *ctx = snd_soc_card_get_drvdata(runtime->card);
 	struct snd_soc_jack *jack = &ctx->jack;
 
-	/**
-	* TI supports 4 butons headset detection
-	* KEY_MEDIA
-	* KEY_VOICECOMMAND
-	* KEY_VOLUMEUP
-	* KEY_VOLUMEDOWN
-	*/
-	if (ctx->ts3a227e_present)
-		jack_type = SND_JACK_HEADPHONE | SND_JACK_MICROPHONE |
-					SND_JACK_BTN_0 | SND_JACK_BTN_1 |
-					SND_JACK_BTN_2 | SND_JACK_BTN_3;
-	else
-		jack_type = SND_JACK_HEADPHONE | SND_JACK_MICROPHONE;
+	if (ctx->ts3a227e_present) {
+		/*
+		 * The jack has already been created in the
+		 * cht_max98090_headset_init() function.
+		 */
+		snd_soc_jack_notifier_register(jack, &cht_jack_nb);
+		return 0;
+	}
+
+	jack_type = SND_JACK_HEADPHONE | SND_JACK_MICROPHONE;
 
 	ret = snd_soc_card_jack_new(runtime->card, "Headset Jack",
 					jack_type, jack, NULL, 0);
-
 	if (ret) {
 		dev_err(runtime->dev, "Headset Jack creation failed %d\n", ret);
 		return ret;
@@ -203,6 +199,27 @@ static int cht_max98090_headset_init(struct snd_soc_component *component)
 {
 	struct snd_soc_card *card = component->card;
 	struct cht_mc_private *ctx = snd_soc_card_get_drvdata(card);
+	struct snd_soc_jack *jack = &ctx->jack;
+	int jack_type;
+	int ret;
+
+	/*
+	 * TI supports 4 butons headset detection
+	 * KEY_MEDIA
+	 * KEY_VOICECOMMAND
+	 * KEY_VOLUMEUP
+	 * KEY_VOLUMEDOWN
+	 */
+	jack_type = SND_JACK_HEADPHONE | SND_JACK_MICROPHONE |
+		    SND_JACK_BTN_0 | SND_JACK_BTN_1 |
+		    SND_JACK_BTN_2 | SND_JACK_BTN_3;
+
+	ret = snd_soc_card_jack_new(card, "Headset Jack", jack_type,
+				    jack, NULL, 0);
+	if (ret) {
+		dev_err(card->dev, "Headset Jack creation failed %d\n", ret);
+		return ret;
+	}
 
 	return ts3a227e_enable_jack_detect(component, &ctx->jack);
 }

From 17c1e0b1f6a161cc4f533d4869ff574273dbfe8d Mon Sep 17 00:00:00 2001
From: Mark Salyzyn <salyzyn@android.com>
Date: Tue, 31 Jul 2018 15:02:13 -0700
Subject: [PATCH 325/519] Bluetooth: hidp: buffer overflow in
 hidp_process_report

commit 7992c18810e568b95c869b227137a2215702a805 upstream.

CVE-2018-9363

The buffer length is unsigned at all layers, but gets cast to int and
checked in hidp_process_report and can lead to a buffer overflow.
Switch len parameter to unsigned int to resolve issue.

This affects 3.18 and newer kernels.

Signed-off-by: Mark Salyzyn <salyzyn@android.com>
Fixes: a4b1b5877b514b276f0f31efe02388a9c2836728 ("HID: Bluetooth: hidp: make sure input buffers are big enough")
Cc: Marcel Holtmann <marcel@holtmann.org>
Cc: Johan Hedberg <johan.hedberg@gmail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Kees Cook <keescook@chromium.org>
Cc: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Cc: linux-bluetooth@vger.kernel.org
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: security@kernel.org
Cc: kernel-team@android.com
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/bluetooth/hidp/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 1fc076420d1e..1811f8e7ddf4 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -431,8 +431,8 @@ static void hidp_del_timer(struct hidp_session *session)
 		del_timer(&session->timer);
 }
 
-static void hidp_process_report(struct hidp_session *session,
-				int type, const u8 *data, int len, int intr)
+static void hidp_process_report(struct hidp_session *session, int type,
+				const u8 *data, unsigned int len, int intr)
 {
 	if (len > HID_MAX_BUFFER_SIZE)
 		len = HID_MAX_BUFFER_SIZE;

From 29f475cbff9b56c83821344b02d5152847839bc2 Mon Sep 17 00:00:00 2001
From: Chintan Pandya <cpandya@codeaurora.org>
Date: Wed, 27 Jun 2018 08:13:47 -0600
Subject: [PATCH 326/519] ioremap: Update pgtable free interfaces with addr

commit 785a19f9d1dd8a4ab2d0633be4656653bd3de1fc upstream.

The following kernel panic was observed on ARM64 platform due to a stale
TLB entry.

 1. ioremap with 4K size, a valid pte page table is set.
 2. iounmap it, its pte entry is set to 0.
 3. ioremap the same address with 2M size, update its pmd entry with
    a new value.
 4. CPU may hit an exception because the old pmd entry is still in TLB,
    which leads to a kernel panic.

Commit b6bdb7517c3d ("mm/vmalloc: add interfaces to free unmapped page
table") has addressed this panic by falling to pte mappings in the above
case on ARM64.

To support pmd mappings in all cases, TLB purge needs to be performed
in this case on ARM64.

Add a new arg, 'addr', to pud_free_pmd_page() and pmd_free_pte_page()
so that TLB purge can be added later in seprate patches.

[toshi.kani@hpe.com: merge changes, rewrite patch description]
Fixes: 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces")
Signed-off-by: Chintan Pandya <cpandya@codeaurora.org>
Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: mhocko@suse.com
Cc: akpm@linux-foundation.org
Cc: hpa@zytor.com
Cc: linux-mm@kvack.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: Will Deacon <will.deacon@arm.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: stable@vger.kernel.org
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <stable@vger.kernel.org>
Link: https://lkml.kernel.org/r/20180627141348.21777-3-toshi.kani@hpe.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/mm/mmu.c           |  4 ++--
 arch/x86/mm/pgtable.c         | 12 +++++++-----
 include/asm-generic/pgtable.h |  8 ++++----
 lib/ioremap.c                 |  4 ++--
 4 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 51ac84e0812d..e9d96b028766 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -699,12 +699,12 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
 }
 
 #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
-int pud_free_pmd_page(pud_t *pud)
+int pud_free_pmd_page(pud_t *pud, unsigned long addr)
 {
 	return pud_none(*pud);
 }
 
-int pmd_free_pte_page(pmd_t *pmd)
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
 {
 	return pmd_none(*pmd);
 }
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 8e0378a00d50..8f5ef9d39555 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -680,11 +680,12 @@ int pmd_clear_huge(pmd_t *pmd)
 /**
  * pud_free_pmd_page - Clear pud entry and free pmd page.
  * @pud: Pointer to a PUD.
+ * @addr: Virtual address associated with pud.
  *
  * Context: The pud range has been unmaped and TLB purged.
  * Return: 1 if clearing the entry succeeded. 0 otherwise.
  */
-int pud_free_pmd_page(pud_t *pud)
+int pud_free_pmd_page(pud_t *pud, unsigned long addr)
 {
 	pmd_t *pmd;
 	int i;
@@ -695,7 +696,7 @@ int pud_free_pmd_page(pud_t *pud)
 	pmd = (pmd_t *)pud_page_vaddr(*pud);
 
 	for (i = 0; i < PTRS_PER_PMD; i++)
-		if (!pmd_free_pte_page(&pmd[i]))
+		if (!pmd_free_pte_page(&pmd[i], addr + (i * PMD_SIZE)))
 			return 0;
 
 	pud_clear(pud);
@@ -707,11 +708,12 @@ int pud_free_pmd_page(pud_t *pud)
 /**
  * pmd_free_pte_page - Clear pmd entry and free pte page.
  * @pmd: Pointer to a PMD.
+ * @addr: Virtual address associated with pmd.
  *
  * Context: The pmd range has been unmaped and TLB purged.
  * Return: 1 if clearing the entry succeeded. 0 otherwise.
  */
-int pmd_free_pte_page(pmd_t *pmd)
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
 {
 	pte_t *pte;
 
@@ -727,7 +729,7 @@ int pmd_free_pte_page(pmd_t *pmd)
 
 #else /* !CONFIG_X86_64 */
 
-int pud_free_pmd_page(pud_t *pud)
+int pud_free_pmd_page(pud_t *pud, unsigned long addr)
 {
 	return pud_none(*pud);
 }
@@ -736,7 +738,7 @@ int pud_free_pmd_page(pud_t *pud)
  * Disable free page handling on x86-PAE. This assures that ioremap()
  * does not update sync'd pmd entries. See vmalloc_sync_one().
  */
-int pmd_free_pte_page(pmd_t *pmd)
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
 {
 	return pmd_none(*pmd);
 }
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index dabecb661264..53a47d75cc43 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -770,8 +770,8 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot);
 int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
 int pud_clear_huge(pud_t *pud);
 int pmd_clear_huge(pmd_t *pmd);
-int pud_free_pmd_page(pud_t *pud);
-int pmd_free_pte_page(pmd_t *pmd);
+int pud_free_pmd_page(pud_t *pud, unsigned long addr);
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr);
 #else	/* !CONFIG_HAVE_ARCH_HUGE_VMAP */
 static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
 {
@@ -789,11 +789,11 @@ static inline int pmd_clear_huge(pmd_t *pmd)
 {
 	return 0;
 }
-static inline int pud_free_pmd_page(pud_t *pud)
+static inline int pud_free_pmd_page(pud_t *pud, unsigned long addr)
 {
 	return 0;
 }
-static inline int pmd_free_pte_page(pmd_t *pmd)
+static inline int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
 {
 	return 0;
 }
diff --git a/lib/ioremap.c b/lib/ioremap.c
index 5323b59ca393..b9462037868d 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -84,7 +84,7 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
 		if (ioremap_pmd_enabled() &&
 		    ((next - addr) == PMD_SIZE) &&
 		    IS_ALIGNED(phys_addr + addr, PMD_SIZE) &&
-		    pmd_free_pte_page(pmd)) {
+		    pmd_free_pte_page(pmd, addr)) {
 			if (pmd_set_huge(pmd, phys_addr + addr, prot))
 				continue;
 		}
@@ -111,7 +111,7 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
 		if (ioremap_pud_enabled() &&
 		    ((next - addr) == PUD_SIZE) &&
 		    IS_ALIGNED(phys_addr + addr, PUD_SIZE) &&
-		    pud_free_pmd_page(pud)) {
+		    pud_free_pmd_page(pud, addr)) {
 			if (pud_set_huge(pud, phys_addr + addr, prot))
 				continue;
 		}

From 5b9b4a8cca9c9fd9a035edbbe6eea3d1cf687981 Mon Sep 17 00:00:00 2001
From: Toshi Kani <toshi.kani@hpe.com>
Date: Wed, 27 Jun 2018 08:13:48 -0600
Subject: [PATCH 327/519] x86/mm: Add TLB purge to free pmd/pte page interfaces

commit 5e0fb5df2ee871b841f96f9cb6a7f2784e96aa4e upstream.

ioremap() calls pud_free_pmd_page() / pmd_free_pte_page() when it creates
a pud / pmd map.  The following preconditions are met at their entry.
 - All pte entries for a target pud/pmd address range have been cleared.
 - System-wide TLB purges have been peformed for a target pud/pmd address
   range.

The preconditions assure that there is no stale TLB entry for the range.
Speculation may not cache TLB entries since it requires all levels of page
entries, including ptes, to have P & A-bits set for an associated address.
However, speculation may cache pud/pmd entries (paging-structure caches)
when they have P-bit set.

Add a system-wide TLB purge (INVLPG) to a single page after clearing
pud/pmd entry's P-bit.

SDM 4.10.4.1, Operation that Invalidate TLBs and Paging-Structure Caches,
states that:
  INVLPG invalidates all paging-structure caches associated with the
  current PCID regardless of the liner addresses to which they correspond.

Fixes: 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces")
Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: mhocko@suse.com
Cc: akpm@linux-foundation.org
Cc: hpa@zytor.com
Cc: cpandya@codeaurora.org
Cc: linux-mm@kvack.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: Joerg Roedel <joro@8bytes.org>
Cc: stable@vger.kernel.org
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <stable@vger.kernel.org>
Link: https://lkml.kernel.org/r/20180627141348.21777-4-toshi.kani@hpe.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/mm/pgtable.c | 36 ++++++++++++++++++++++++++++++------
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 8f5ef9d39555..55c7446311a7 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -682,24 +682,44 @@ int pmd_clear_huge(pmd_t *pmd)
  * @pud: Pointer to a PUD.
  * @addr: Virtual address associated with pud.
  *
- * Context: The pud range has been unmaped and TLB purged.
+ * Context: The pud range has been unmapped and TLB purged.
  * Return: 1 if clearing the entry succeeded. 0 otherwise.
+ *
+ * NOTE: Callers must allow a single page allocation.
  */
 int pud_free_pmd_page(pud_t *pud, unsigned long addr)
 {
-	pmd_t *pmd;
+	pmd_t *pmd, *pmd_sv;
+	pte_t *pte;
 	int i;
 
 	if (pud_none(*pud))
 		return 1;
 
 	pmd = (pmd_t *)pud_page_vaddr(*pud);
+	pmd_sv = (pmd_t *)__get_free_page(GFP_KERNEL);
+	if (!pmd_sv)
+		return 0;
 
-	for (i = 0; i < PTRS_PER_PMD; i++)
-		if (!pmd_free_pte_page(&pmd[i], addr + (i * PMD_SIZE)))
-			return 0;
+	for (i = 0; i < PTRS_PER_PMD; i++) {
+		pmd_sv[i] = pmd[i];
+		if (!pmd_none(pmd[i]))
+			pmd_clear(&pmd[i]);
+	}
 
 	pud_clear(pud);
+
+	/* INVLPG to clear all paging-structure caches */
+	flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1);
+
+	for (i = 0; i < PTRS_PER_PMD; i++) {
+		if (!pmd_none(pmd_sv[i])) {
+			pte = (pte_t *)pmd_page_vaddr(pmd_sv[i]);
+			free_page((unsigned long)pte);
+		}
+	}
+
+	free_page((unsigned long)pmd_sv);
 	free_page((unsigned long)pmd);
 
 	return 1;
@@ -710,7 +730,7 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr)
  * @pmd: Pointer to a PMD.
  * @addr: Virtual address associated with pmd.
  *
- * Context: The pmd range has been unmaped and TLB purged.
+ * Context: The pmd range has been unmapped and TLB purged.
  * Return: 1 if clearing the entry succeeded. 0 otherwise.
  */
 int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
@@ -722,6 +742,10 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
 
 	pte = (pte_t *)pmd_page_vaddr(*pmd);
 	pmd_clear(pmd);
+
+	/* INVLPG to clear all paging-structure caches */
+	flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1);
+
 	free_page((unsigned long)pte);
 
 	return 1;

From 45cf1802a1057650768430cf3168ff7a02163338 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 17 Aug 2018 20:56:45 +0200
Subject: [PATCH 328/519] Linux 4.4.149

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index cacb45c3173a..e7c46ece5f27 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 4
-SUBLEVEL = 148
+SUBLEVEL = 149
 EXTRAVERSION =
 NAME = Blurry Fish Butt
 

From 4cdedeefa38f45299b18ae692426d5baaff6b785 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Fri, 17 Aug 2018 10:27:36 -0700
Subject: [PATCH 329/519] x86/speculation/l1tf: Exempt zeroed PTEs from
 inversion

commit f19f5c49bbc3ffcc9126cc245fc1b24cc29f4a37 upstream.

It turns out that we should *not* invert all not-present mappings,
because the all zeroes case is obviously special.

clear_page() does not undergo the XOR logic to invert the address bits,
i.e. PTE, PMD and PUD entries that have not been individually written
will have val=0 and so will trigger __pte_needs_invert(). As a result,
{pte,pmd,pud}_pfn() will return the wrong PFN value, i.e. all ones
(adjusted by the max PFN mask) instead of zero. A zeroed entry is ok
because the page at physical address 0 is reserved early in boot
specifically to mitigate L1TF, so explicitly exempt them from the
inversion when reading the PFN.

Manifested as an unexpected mprotect(..., PROT_NONE) failure when called
on a VMA that has VM_PFNMAP and was mmap'd to as something other than
PROT_NONE but never used. mprotect() sends the PROT_NONE request down
prot_none_walk(), which walks the PTEs to check the PFNs.
prot_none_pte_entry() gets the bogus PFN from pte_pfn() and returns
-EACCES because it thinks mprotect() is trying to adjust a high MMIO
address.

[ This is a very modified version of Sean's original patch, but all
  credit goes to Sean for doing this and also pointing out that
  sometimes the __pte_needs_invert() function only gets the protection
  bits, not the full eventual pte.  But zero remains special even in
  just protection bits, so that's ok.   - Linus ]

Fixes: f22cc87f6c1f ("x86/speculation/l1tf: Invert all not present mappings")
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/pgtable-invert.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/pgtable-invert.h b/arch/x86/include/asm/pgtable-invert.h
index 44b1203ece12..a0c1525f1b6f 100644
--- a/arch/x86/include/asm/pgtable-invert.h
+++ b/arch/x86/include/asm/pgtable-invert.h
@@ -4,9 +4,18 @@
 
 #ifndef __ASSEMBLY__
 
+/*
+ * A clear pte value is special, and doesn't get inverted.
+ *
+ * Note that even users that only pass a pgprot_t (rather
+ * than a full pte) won't trigger the special zero case,
+ * because even PAGE_NONE has _PAGE_PROTNONE | _PAGE_ACCESSED
+ * set. So the all zero case really is limited to just the
+ * cleared page table entry case.
+ */
 static inline bool __pte_needs_invert(u64 val)
 {
-	return !(val & _PAGE_PRESENT);
+	return val && !(val & _PAGE_PRESENT);
 }
 
 /* Get a mask to xor with the page table entry to get the correct pfn. */

From 7dc18ebc3101229d5238a2dc740804cd4836b383 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 18 Aug 2018 10:45:38 +0200
Subject: [PATCH 330/519] Linux 4.4.150

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index e7c46ece5f27..7789195c6a59 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 4
-SUBLEVEL = 149
+SUBLEVEL = 150
 EXTRAVERSION =
 NAME = Blurry Fish Butt
 

From f35e16c5977914d8c2e77277d554268b34bb4942 Mon Sep 17 00:00:00 2001
From: Alexey Kodanev <alexey.kodanev@oracle.com>
Date: Tue, 7 Aug 2018 20:03:57 +0300
Subject: [PATCH 331/519] dccp: fix undefined behavior with 'cwnd' shift in
 ccid2_cwnd_restart()

[ Upstream commit 61ef4b07fcdc30535889990cf4229766502561cf ]

The shift of 'cwnd' with '(now - hc->tx_lsndtime) / hc->tx_rto' value
can lead to undefined behavior [1].

In order to fix this use a gradual shift of the window with a 'while'
loop, similar to what tcp_cwnd_restart() is doing.

When comparing delta and RTO there is a minor difference between TCP
and DCCP, the last one also invokes dccp_cwnd_restart() and reduces
'cwnd' if delta equals RTO. That case is preserved in this change.

[1]:
[40850.963623] UBSAN: Undefined behaviour in net/dccp/ccids/ccid2.c:237:7
[40851.043858] shift exponent 67 is too large for 32-bit type 'unsigned int'
[40851.127163] CPU: 3 PID: 15940 Comm: netstress Tainted: G        W   E     4.18.0-rc7.x86_64 #1
...
[40851.377176] Call Trace:
[40851.408503]  dump_stack+0xf1/0x17b
[40851.451331]  ? show_regs_print_info+0x5/0x5
[40851.503555]  ubsan_epilogue+0x9/0x7c
[40851.548363]  __ubsan_handle_shift_out_of_bounds+0x25b/0x2b4
[40851.617109]  ? __ubsan_handle_load_invalid_value+0x18f/0x18f
[40851.686796]  ? xfrm4_output_finish+0x80/0x80
[40851.739827]  ? lock_downgrade+0x6d0/0x6d0
[40851.789744]  ? xfrm4_prepare_output+0x160/0x160
[40851.845912]  ? ip_queue_xmit+0x810/0x1db0
[40851.895845]  ? ccid2_hc_tx_packet_sent+0xd36/0x10a0 [dccp]
[40851.963530]  ccid2_hc_tx_packet_sent+0xd36/0x10a0 [dccp]
[40852.029063]  dccp_xmit_packet+0x1d3/0x720 [dccp]
[40852.086254]  dccp_write_xmit+0x116/0x1d0 [dccp]
[40852.142412]  dccp_sendmsg+0x428/0xb20 [dccp]
[40852.195454]  ? inet_dccp_listen+0x200/0x200 [dccp]
[40852.254833]  ? sched_clock+0x5/0x10
[40852.298508]  ? sched_clock+0x5/0x10
[40852.342194]  ? inet_create+0xdf0/0xdf0
[40852.388988]  sock_sendmsg+0xd9/0x160
...

Fixes: 113ced1f52e5 ("dccp ccid-2: Perform congestion-window validation")
Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/dccp/ccids/ccid2.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 86a2ed0fb219..161dfcf86126 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -228,14 +228,16 @@ static void ccid2_cwnd_restart(struct sock *sk, const u32 now)
 	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
 	u32 cwnd = hc->tx_cwnd, restart_cwnd,
 	    iwnd = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache);
+	s32 delta = now - hc->tx_lsndtime;
 
 	hc->tx_ssthresh = max(hc->tx_ssthresh, (cwnd >> 1) + (cwnd >> 2));
 
 	/* don't reduce cwnd below the initial window (IW) */
 	restart_cwnd = min(cwnd, iwnd);
-	cwnd >>= (now - hc->tx_lsndtime) / hc->tx_rto;
-	hc->tx_cwnd = max(cwnd, restart_cwnd);
 
+	while ((delta -= hc->tx_rto) >= 0 && cwnd > restart_cwnd)
+		cwnd >>= 1;
+	hc->tx_cwnd = max(cwnd, restart_cwnd);
 	hc->tx_cwnd_stamp = now;
 	hc->tx_cwnd_used  = 0;
 

From 4aef9b0fffd2295b1c523ebf43ca6b46e9cc8ffa Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Fri, 10 Aug 2018 11:14:56 -0700
Subject: [PATCH 332/519] l2tp: use sk_dst_check() to avoid race on
 sk->sk_dst_cache

[ Upstream commit 6d37fa49da1e8db8fb1995be22ac837ca41ac8a8 ]

In l2tp code, if it is a L2TP_UDP_ENCAP tunnel, tunnel->sk points to a
UDP socket. User could call sendmsg() on both this tunnel and the UDP
socket itself concurrently. As l2tp_xmit_skb() holds socket lock and call
__sk_dst_check() to refresh sk->sk_dst_cache, while udpv6_sendmsg() is
lockless and call sk_dst_check() to refresh sk->sk_dst_cache, there
could be a race and cause the dst cache to be freed multiple times.
So we fix l2tp side code to always call sk_dst_check() to garantee
xchg() is called when refreshing sk->sk_dst_cache to avoid race
conditions.

Syzkaller reported stack trace:
BUG: KASAN: use-after-free in atomic_read include/asm-generic/atomic-instrumented.h:21 [inline]
BUG: KASAN: use-after-free in atomic_fetch_add_unless include/linux/atomic.h:575 [inline]
BUG: KASAN: use-after-free in atomic_add_unless include/linux/atomic.h:597 [inline]
BUG: KASAN: use-after-free in dst_hold_safe include/net/dst.h:308 [inline]
BUG: KASAN: use-after-free in ip6_hold_safe+0xe6/0x670 net/ipv6/route.c:1029
Read of size 4 at addr ffff8801aea9a880 by task syz-executor129/4829

CPU: 0 PID: 4829 Comm: syz-executor129 Not tainted 4.18.0-rc7-next-20180802+ #30
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113
 print_address_description+0x6c/0x20b mm/kasan/report.c:256
 kasan_report_error mm/kasan/report.c:354 [inline]
 kasan_report.cold.7+0x242/0x30d mm/kasan/report.c:412
 check_memory_region_inline mm/kasan/kasan.c:260 [inline]
 check_memory_region+0x13e/0x1b0 mm/kasan/kasan.c:267
 kasan_check_read+0x11/0x20 mm/kasan/kasan.c:272
 atomic_read include/asm-generic/atomic-instrumented.h:21 [inline]
 atomic_fetch_add_unless include/linux/atomic.h:575 [inline]
 atomic_add_unless include/linux/atomic.h:597 [inline]
 dst_hold_safe include/net/dst.h:308 [inline]
 ip6_hold_safe+0xe6/0x670 net/ipv6/route.c:1029
 rt6_get_pcpu_route net/ipv6/route.c:1249 [inline]
 ip6_pol_route+0x354/0xd20 net/ipv6/route.c:1922
 ip6_pol_route_output+0x54/0x70 net/ipv6/route.c:2098
 fib6_rule_lookup+0x283/0x890 net/ipv6/fib6_rules.c:122
 ip6_route_output_flags+0x2c5/0x350 net/ipv6/route.c:2126
 ip6_dst_lookup_tail+0x1278/0x1da0 net/ipv6/ip6_output.c:978
 ip6_dst_lookup_flow+0xc8/0x270 net/ipv6/ip6_output.c:1079
 ip6_sk_dst_lookup_flow+0x5ed/0xc50 net/ipv6/ip6_output.c:1117
 udpv6_sendmsg+0x2163/0x36b0 net/ipv6/udp.c:1354
 inet_sendmsg+0x1a1/0x690 net/ipv4/af_inet.c:798
 sock_sendmsg_nosec net/socket.c:622 [inline]
 sock_sendmsg+0xd5/0x120 net/socket.c:632
 ___sys_sendmsg+0x51d/0x930 net/socket.c:2115
 __sys_sendmmsg+0x240/0x6f0 net/socket.c:2210
 __do_sys_sendmmsg net/socket.c:2239 [inline]
 __se_sys_sendmmsg net/socket.c:2236 [inline]
 __x64_sys_sendmmsg+0x9d/0x100 net/socket.c:2236
 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x446a29
Code: e8 ac b8 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 eb 08 fc ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007f4de5532db8 EFLAGS: 00000246 ORIG_RAX: 0000000000000133
RAX: ffffffffffffffda RBX: 00000000006dcc38 RCX: 0000000000446a29
RDX: 00000000000000b8 RSI: 0000000020001b00 RDI: 0000000000000003
RBP: 00000000006dcc30 R08: 00007f4de5533700 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00000000006dcc3c
R13: 00007ffe2b830fdf R14: 00007f4de55339c0 R15: 0000000000000001

Fixes: 71b1391a4128 ("l2tp: ensure sk->dst is still valid")
Reported-by: syzbot+05f840f3b04f211bad55@syzkaller.appspotmail.com
Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Cc: Guillaume Nault <g.nault@alphalink.fr>
Cc: David Ahern <dsahern@gmail.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/l2tp/l2tp_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 92df832a1896..591d18785285 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1145,7 +1145,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 
 	/* Get routing info from the tunnel socket */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, dst_clone(__sk_dst_check(sk, 0)));
+	skb_dst_set(skb, sk_dst_check(sk, 0));
 
 	inet = inet_sk(sk);
 	fl = &inet->cork.fl;

From 813fb06fe60d0a56a5481fb81793142fc00bf4bd Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Tue, 7 Aug 2018 12:41:38 -0700
Subject: [PATCH 333/519] llc: use refcount_inc_not_zero() for llc_sap_find()

[ Upstream commit 0dcb82254d65f72333aa50ad626d1e9665ad093b ]

llc_sap_put() decreases the refcnt before deleting sap
from the global list. Therefore, there is a chance
llc_sap_find() could find a sap with zero refcnt
in this global list.

Close this race condition by checking if refcnt is zero
or not in llc_sap_find(), if it is zero then it is being
removed so we can just treat it as gone.

Reported-by: <syzbot+278893f3f7803871f7ce@syzkaller.appspotmail.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/llc.h  | 5 +++++
 net/llc/llc_core.c | 4 ++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/include/net/llc.h b/include/net/llc.h
index e8e61d4fb458..82d989995d18 100644
--- a/include/net/llc.h
+++ b/include/net/llc.h
@@ -116,6 +116,11 @@ static inline void llc_sap_hold(struct llc_sap *sap)
 	atomic_inc(&sap->refcnt);
 }
 
+static inline bool llc_sap_hold_safe(struct llc_sap *sap)
+{
+	return atomic_inc_not_zero(&sap->refcnt);
+}
+
 void llc_sap_close(struct llc_sap *sap);
 
 static inline void llc_sap_put(struct llc_sap *sap)
diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index 842851cef698..e896a2c53b12 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -73,8 +73,8 @@ struct llc_sap *llc_sap_find(unsigned char sap_value)
 
 	rcu_read_lock_bh();
 	sap = __llc_sap_find(sap_value);
-	if (sap)
-		llc_sap_hold(sap);
+	if (!sap || !llc_sap_hold_safe(sap))
+		sap = NULL;
 	rcu_read_unlock_bh();
 	return sap;
 }

From 0adfdb9af8ec7cc0c8a35dfe8a20d6f8d27ddd08 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Mon, 13 Aug 2018 18:44:04 +0800
Subject: [PATCH 334/519] net_sched: Fix missing res info when create new
 tc_index filter

[ Upstream commit 008369dcc5f7bfba526c98054f8525322acf0ea3 ]

Li Shuang reported the following warn:

[  733.484610] WARNING: CPU: 6 PID: 21123 at net/sched/sch_cbq.c:1418 cbq_destroy_class+0x5d/0x70 [sch_cbq]
[  733.495190] Modules linked in: sch_cbq cls_tcindex sch_dsmark rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat l
[  733.574155]  syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm igb ixgbe ahci libahci i2c_algo_bit libata i40e i2c_core dca mdio megaraid_sas dm_mirror dm_region_hash dm_log dm_mod
[  733.592500] CPU: 6 PID: 21123 Comm: tc Not tainted 4.18.0-rc8.latest+ #131
[  733.600169] Hardware name: Dell Inc. PowerEdge R730/0WCJNT, BIOS 2.1.5 04/11/2016
[  733.608518] RIP: 0010:cbq_destroy_class+0x5d/0x70 [sch_cbq]
[  733.614734] Code: e7 d9 d2 48 8b 7b 48 e8 61 05 da d2 48 8d bb f8 00 00 00 e8 75 ae d5 d2 48 39 eb 74 0a 48 89 df 5b 5d e9 16 6c 94 d2 5b 5d c3 <0f> 0b eb b6 0f 1f 44 00 00 66 2e 0f 1f 84
[  733.635798] RSP: 0018:ffffbfbb066bb9d8 EFLAGS: 00010202
[  733.641627] RAX: 0000000000000001 RBX: ffff9cdd17392800 RCX: 000000008010000f
[  733.649588] RDX: ffff9cdd1df547e0 RSI: ffff9cdd17392800 RDI: ffff9cdd0f84c800
[  733.657547] RBP: ffff9cdd0f84c800 R08: 0000000000000001 R09: 0000000000000000
[  733.665508] R10: ffff9cdd0f84d000 R11: 0000000000000001 R12: 0000000000000001
[  733.673469] R13: 0000000000000000 R14: 0000000000000001 R15: ffff9cdd17392200
[  733.681430] FS:  00007f911890a740(0000) GS:ffff9cdd1f8c0000(0000) knlGS:0000000000000000
[  733.690456] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  733.696864] CR2: 0000000000b5544c CR3: 0000000859374002 CR4: 00000000001606e0
[  733.704826] Call Trace:
[  733.707554]  cbq_destroy+0xa1/0xd0 [sch_cbq]
[  733.712318]  qdisc_destroy+0x62/0x130
[  733.716401]  dsmark_destroy+0x2a/0x70 [sch_dsmark]
[  733.721745]  qdisc_destroy+0x62/0x130
[  733.725829]  qdisc_graft+0x3ba/0x470
[  733.729817]  tc_get_qdisc+0x2a6/0x2c0
[  733.733901]  ? cred_has_capability+0x7d/0x130
[  733.738761]  rtnetlink_rcv_msg+0x263/0x2d0
[  733.743330]  ? rtnl_calcit.isra.30+0x110/0x110
[  733.748287]  netlink_rcv_skb+0x4d/0x130
[  733.752576]  netlink_unicast+0x1a3/0x250
[  733.756949]  netlink_sendmsg+0x2ae/0x3a0
[  733.761324]  sock_sendmsg+0x36/0x40
[  733.765213]  ___sys_sendmsg+0x26f/0x2d0
[  733.769493]  ? handle_pte_fault+0x586/0xdf0
[  733.774158]  ? __handle_mm_fault+0x389/0x500
[  733.778919]  ? __sys_sendmsg+0x5e/0xa0
[  733.783099]  __sys_sendmsg+0x5e/0xa0
[  733.787087]  do_syscall_64+0x5b/0x180
[  733.791171]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[  733.796805] RIP: 0033:0x7f9117f23f10
[  733.800791] Code: c3 48 8b 05 82 6f 2c 00 f7 db 64 89 18 48 83 cb ff eb dd 0f 1f 80 00 00 00 00 83 3d 8d d0 2c 00 00 75 10 b8 2e 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8
[  733.821873] RSP: 002b:00007ffe96818398 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
[  733.830319] RAX: ffffffffffffffda RBX: 000000005b71244c RCX: 00007f9117f23f10
[  733.838280] RDX: 0000000000000000 RSI: 00007ffe968183e0 RDI: 0000000000000003
[  733.846241] RBP: 00007ffe968183e0 R08: 000000000000ffff R09: 0000000000000003
[  733.854202] R10: 00007ffe96817e20 R11: 0000000000000246 R12: 0000000000000000
[  733.862161] R13: 0000000000662ee0 R14: 0000000000000000 R15: 0000000000000000
[  733.870121] ---[ end trace 28edd4aad712ddca ]---

This is because we didn't update f->result.res when create new filter. Then in
tcindex_delete() -> tcf_unbind_filter(), we will failed to find out the res
and unbind filter, which will trigger the WARN_ON() in cbq_destroy_class().

Fix it by updating f->result.res when create new filter.

Fixes: 6e0565697a106 ("net_sched: fix another crash in cls_tcindex")
Reported-by: Li Shuang <shuali@redhat.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/sched/cls_tcindex.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 403746b20263..e3b48ddadb45 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -398,6 +398,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 		struct tcindex_filter *nfp;
 		struct tcindex_filter __rcu **fp;
 
+		f->result.res = r->res;
 		tcf_exts_change(tp, &f->result.exts, &r->exts);
 
 		fp = cp->h + (handle % cp->hash);

From 62209d1f272c2b134765301f279fda7364b07ddd Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon, 6 Aug 2018 11:06:02 -0700
Subject: [PATCH 335/519] vsock: split dwork to avoid reinitializations

[ Upstream commit 455f05ecd2b219e9a216050796d30c830d9bc393 ]

syzbot reported that we reinitialize an active delayed
work in vsock_stream_connect():

	ODEBUG: init active (active state 0) object type: timer_list hint:
	delayed_work_timer_fn+0x0/0x90 kernel/workqueue.c:1414
	WARNING: CPU: 1 PID: 11518 at lib/debugobjects.c:329
	debug_print_object+0x16a/0x210 lib/debugobjects.c:326

The pattern is apparently wrong, we should only initialize
the dealyed work once and could repeatly schedule it. So we
have to move out the initializations to allocation side.
And to avoid confusion, we can split the shared dwork
into two, instead of re-using the same one.

Fixes: d021c344051a ("VSOCK: Introduce VM Sockets")
Reported-by: <syzbot+8a9b1bd330476a4f3db6@syzkaller.appspotmail.com>
Cc: Andy king <acking@vmware.com>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Cc: Jorgen Hansen <jhansen@vmware.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/af_vsock.h         |  4 ++--
 net/vmw_vsock/af_vsock.c       | 15 ++++++++-------
 net/vmw_vsock/vmci_transport.c |  3 +--
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index e9eb2d6791b3..f7a35fcaaaf6 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -62,7 +62,8 @@ struct vsock_sock {
 	struct list_head pending_links;
 	struct list_head accept_queue;
 	bool rejected;
-	struct delayed_work dwork;
+	struct delayed_work connect_work;
+	struct delayed_work pending_work;
 	u32 peer_shutdown;
 	bool sent_request;
 	bool ignore_connecting_rst;
@@ -73,7 +74,6 @@ struct vsock_sock {
 
 s64 vsock_stream_has_data(struct vsock_sock *vsk);
 s64 vsock_stream_has_space(struct vsock_sock *vsk);
-void vsock_pending_work(struct work_struct *work);
 struct sock *__vsock_create(struct net *net,
 			    struct socket *sock,
 			    struct sock *parent,
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 60324f7c72bd..7f1d166ce612 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -430,14 +430,14 @@ static int vsock_send_shutdown(struct sock *sk, int mode)
 	return transport->shutdown(vsock_sk(sk), mode);
 }
 
-void vsock_pending_work(struct work_struct *work)
+static void vsock_pending_work(struct work_struct *work)
 {
 	struct sock *sk;
 	struct sock *listener;
 	struct vsock_sock *vsk;
 	bool cleanup;
 
-	vsk = container_of(work, struct vsock_sock, dwork.work);
+	vsk = container_of(work, struct vsock_sock, pending_work.work);
 	sk = sk_vsock(vsk);
 	listener = vsk->listener;
 	cleanup = true;
@@ -477,7 +477,6 @@ void vsock_pending_work(struct work_struct *work)
 	sock_put(sk);
 	sock_put(listener);
 }
-EXPORT_SYMBOL_GPL(vsock_pending_work);
 
 /**** SOCKET OPERATIONS ****/
 
@@ -576,6 +575,8 @@ static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr)
 	return retval;
 }
 
+static void vsock_connect_timeout(struct work_struct *work);
+
 struct sock *__vsock_create(struct net *net,
 			    struct socket *sock,
 			    struct sock *parent,
@@ -618,6 +619,8 @@ struct sock *__vsock_create(struct net *net,
 	vsk->sent_request = false;
 	vsk->ignore_connecting_rst = false;
 	vsk->peer_shutdown = 0;
+	INIT_DELAYED_WORK(&vsk->connect_work, vsock_connect_timeout);
+	INIT_DELAYED_WORK(&vsk->pending_work, vsock_pending_work);
 
 	psk = parent ? vsock_sk(parent) : NULL;
 	if (parent) {
@@ -1094,7 +1097,7 @@ static void vsock_connect_timeout(struct work_struct *work)
 	struct sock *sk;
 	struct vsock_sock *vsk;
 
-	vsk = container_of(work, struct vsock_sock, dwork.work);
+	vsk = container_of(work, struct vsock_sock, connect_work.work);
 	sk = sk_vsock(vsk);
 
 	lock_sock(sk);
@@ -1195,9 +1198,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
 			 * timeout fires.
 			 */
 			sock_hold(sk);
-			INIT_DELAYED_WORK(&vsk->dwork,
-					  vsock_connect_timeout);
-			schedule_delayed_work(&vsk->dwork, timeout);
+			schedule_delayed_work(&vsk->connect_work, timeout);
 
 			/* Skip ahead to preserve error code set above. */
 			goto out_wait;
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 662bdd20a748..589c8b9908a5 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1099,8 +1099,7 @@ static int vmci_transport_recv_listen(struct sock *sk,
 	vpending->listener = sk;
 	sock_hold(sk);
 	sock_hold(pending);
-	INIT_DELAYED_WORK(&vpending->dwork, vsock_pending_work);
-	schedule_delayed_work(&vpending->dwork, HZ);
+	schedule_delayed_work(&vpending->pending_work, HZ);
 
 out:
 	return err;

From 51f6a134cfd1731baccecd49b8221833f782e4c9 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Mon, 13 Aug 2018 18:44:03 +0800
Subject: [PATCH 336/519] net_sched: fix NULL pointer dereference when delete
 tcindex filter

[ Upstream commit 2df8bee5654bb2b7312662ca6810d4dc16b0b67f ]

Li Shuang reported the following crash:

[   71.267724] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004
[   71.276456] PGD 800000085d9bd067 P4D 800000085d9bd067 PUD 859a0b067 PMD 0
[   71.284127] Oops: 0000 [#1] SMP PTI
[   71.288015] CPU: 12 PID: 2386 Comm: tc Not tainted 4.18.0-rc8.latest+ #131
[   71.295686] Hardware name: Dell Inc. PowerEdge R730/0WCJNT, BIOS 2.1.5 04/11/2016
[   71.304037] RIP: 0010:tcindex_delete+0x72/0x280 [cls_tcindex]
[   71.310446] Code: 00 31 f6 48 87 75 20 48 85 f6 74 11 48 8b 47 18 48 8b 40 08 48 8b 40 50 e8 fb a6 f8 fc 48 85 db 0f 84 dc 00 00 00 48 8b 73 18 <8b> 56 04 48 8d 7e 04 85 d2 0f 84 7b 01 00
[   71.331517] RSP: 0018:ffffb45207b3f898 EFLAGS: 00010282
[   71.337345] RAX: ffff8ad3d72d6360 RBX: ffff8acc84393680 RCX: 000000000000002e
[   71.345306] RDX: ffff8ad3d72c8570 RSI: 0000000000000000 RDI: ffff8ad847a45800
[   71.353277] RBP: ffff8acc84393688 R08: ffff8ad3d72c8400 R09: 0000000000000000
[   71.361238] R10: ffff8ad3de786e00 R11: 0000000000000000 R12: ffffb45207b3f8c7
[   71.369199] R13: ffff8ad3d93bd2a0 R14: 000000000000002e R15: ffff8ad3d72c9600
[   71.377161] FS:  00007f9d3ec3e740(0000) GS:ffff8ad3df980000(0000) knlGS:0000000000000000
[   71.386188] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   71.392597] CR2: 0000000000000004 CR3: 0000000852f06003 CR4: 00000000001606e0
[   71.400558] Call Trace:
[   71.403299]  tcindex_destroy_element+0x25/0x40 [cls_tcindex]
[   71.409611]  tcindex_walk+0xbb/0x110 [cls_tcindex]
[   71.414953]  tcindex_destroy+0x44/0x90 [cls_tcindex]
[   71.420492]  ? tcindex_delete+0x280/0x280 [cls_tcindex]
[   71.426323]  tcf_proto_destroy+0x16/0x40
[   71.430696]  tcf_chain_flush+0x51/0x70
[   71.434876]  tcf_block_put_ext.part.30+0x8f/0x1b0
[   71.440122]  tcf_block_put+0x4d/0x70
[   71.444108]  cbq_destroy+0x4d/0xd0 [sch_cbq]
[   71.448869]  qdisc_destroy+0x62/0x130
[   71.452951]  dsmark_destroy+0x2a/0x70 [sch_dsmark]
[   71.458300]  qdisc_destroy+0x62/0x130
[   71.462373]  qdisc_graft+0x3ba/0x470
[   71.466359]  tc_get_qdisc+0x2a6/0x2c0
[   71.470443]  ? cred_has_capability+0x7d/0x130
[   71.475307]  rtnetlink_rcv_msg+0x263/0x2d0
[   71.479875]  ? rtnl_calcit.isra.30+0x110/0x110
[   71.484832]  netlink_rcv_skb+0x4d/0x130
[   71.489109]  netlink_unicast+0x1a3/0x250
[   71.493482]  netlink_sendmsg+0x2ae/0x3a0
[   71.497859]  sock_sendmsg+0x36/0x40
[   71.501748]  ___sys_sendmsg+0x26f/0x2d0
[   71.506029]  ? handle_pte_fault+0x586/0xdf0
[   71.510694]  ? __handle_mm_fault+0x389/0x500
[   71.515457]  ? __sys_sendmsg+0x5e/0xa0
[   71.519636]  __sys_sendmsg+0x5e/0xa0
[   71.523626]  do_syscall_64+0x5b/0x180
[   71.527711]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[   71.533345] RIP: 0033:0x7f9d3e257f10
[   71.537331] Code: c3 48 8b 05 82 6f 2c 00 f7 db 64 89 18 48 83 cb ff eb dd 0f 1f 80 00 00 00 00 83 3d 8d d0 2c 00 00 75 10 b8 2e 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8
[   71.558401] RSP: 002b:00007fff6f893398 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
[   71.566848] RAX: ffffffffffffffda RBX: 000000005b71274d RCX: 00007f9d3e257f10
[   71.574810] RDX: 0000000000000000 RSI: 00007fff6f8933e0 RDI: 0000000000000003
[   71.582770] RBP: 00007fff6f8933e0 R08: 000000000000ffff R09: 0000000000000003
[   71.590729] R10: 00007fff6f892e20 R11: 0000000000000246 R12: 0000000000000000
[   71.598689] R13: 0000000000662ee0 R14: 0000000000000000 R15: 0000000000000000
[   71.606651] Modules linked in: sch_cbq cls_tcindex sch_dsmark xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_coni
[   71.685425]  libahci i2c_algo_bit i2c_core i40e libata dca mdio megaraid_sas dm_mirror dm_region_hash dm_log dm_mod
[   71.697075] CR2: 0000000000000004
[   71.700792] ---[ end trace f604eb1acacd978b ]---

Reproducer:
tc qdisc add dev lo handle 1:0 root dsmark indices 64 set_tc_index
tc filter add dev lo parent 1:0 protocol ip prio 1 tcindex mask 0xfc shift 2
tc qdisc add dev lo parent 1:0 handle 2:0 cbq bandwidth 10Mbit cell 8 avpkt 1000 mpu 64
tc class add dev lo parent 2:0 classid 2:1 cbq bandwidth 10Mbit rate 1500Kbit avpkt 1000 prio 1 bounded isolated allot 1514 weight 1 maxburst 10
tc filter add dev lo parent 2:0 protocol ip prio 1 handle 0x2e tcindex classid 2:1 pass_on
tc qdisc add dev lo parent 2:1 pfifo limit 5
tc qdisc del dev lo root

This is because in tcindex_set_parms, when there is no old_r, we set new
exts to cr.exts. And we didn't set it to filter when r == &new_filter_result.

Then in tcindex_delete() -> tcf_exts_get_net(), we will get NULL pointer
dereference as we didn't init exts.

Fix it by moving tcf_exts_change() after "if (old_r && old_r != r)" check.
Then we don't need "cr" as there is no errout after that.

Fixes: bf63ac73b3e13 ("net_sched: fix an oops in tcindex filter")
Reported-by: Li Shuang <shuali@redhat.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/sched/cls_tcindex.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index e3b48ddadb45..040d853f48b9 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -382,16 +382,13 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 		tcf_bind_filter(tp, &cr.res, base);
 	}
 
-	if (old_r)
-		tcf_exts_change(tp, &r->exts, &e);
-	else
-		tcf_exts_change(tp, &cr.exts, &e);
-
 	if (old_r && old_r != r)
 		tcindex_filter_result_init(old_r);
 
 	oldp = p;
 	r->res = cr.res;
+	tcf_exts_change(tp, &r->exts, &e);
+
 	rcu_assign_pointer(tp->root, cp);
 
 	if (r == &new_filter_result) {

From 1b692b786b0c9b43959c7627fae2985fda1bed56 Mon Sep 17 00:00:00 2001
From: Park Ju Hyung <qkrwngud825@gmail.com>
Date: Sat, 28 Jul 2018 03:16:42 +0900
Subject: [PATCH 337/519] ALSA: hda - Sleep for 10ms after entering D3 on
 Conexant codecs

commit f59cf9a0551dd954ad8b752461cf19d9789f4b1d upstream.

On rare occasions, we are still noticing that the internal speaker
spitting out spurious noises even after adding the problematic codec
to the list.

Adding a 10ms artificial delay before rebooting fixes the issue entirely.

Patch for Realtek codecs also adds the same amount of delay after
entering D3.

Signed-off-by: Park Ju Hyung <qkrwngud825@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_conexant.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index cb19af145f46..fa9f85c1c6fb 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -219,6 +219,7 @@ static void cx_auto_reboot_notify(struct hda_codec *codec)
 	snd_hda_codec_set_power_to_all(codec, codec->core.afg, AC_PWRST_D3);
 	snd_hda_codec_write(codec, codec->core.afg, 0,
 			    AC_VERB_SET_POWER_STATE, AC_PWRST_D3);
+	msleep(10);
 }
 
 static void cx_auto_free(struct hda_codec *codec)

From caf8fe5173f9bd20b14612c6a44221707f8a628d Mon Sep 17 00:00:00 2001
From: Park Ju Hyung <qkrwngud825@gmail.com>
Date: Sat, 28 Jul 2018 03:16:21 +0900
Subject: [PATCH 338/519] ALSA: hda - Turn CX8200 into D3 as well upon reboot

commit d77a4b4a5b0b2ebcbc9840995d91311ef28302ab upstream.

As an equivalent codec with CX20724,
CX8200 is also subject to the reboot bug.

Late 2017 and 2018 LG Gram and some HP Spectre laptops are known victims
to this issue, causing extremely loud noises upon reboot.

Now that we know that this bug is subject to multiple codecs,
fix the comment as well.

Signed-off-by: Park Ju Hyung <qkrwngud825@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_conexant.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index fa9f85c1c6fb..a1a3ce8c3f56 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -205,6 +205,7 @@ static void cx_auto_reboot_notify(struct hda_codec *codec)
 	struct conexant_spec *spec = codec->spec;
 
 	switch (codec->core.vendor_id) {
+	case 0x14f12008: /* CX8200 */
 	case 0x14f150f2: /* CX20722 */
 	case 0x14f150f4: /* CX20724 */
 		break;
@@ -212,7 +213,7 @@ static void cx_auto_reboot_notify(struct hda_codec *codec)
 		return;
 	}
 
-	/* Turn the CX20722 codec into D3 to avoid spurious noises
+	/* Turn the problematic codec into D3 to avoid spurious noises
 	   from the internal speaker during (and after) reboot */
 	cx_auto_turn_eapd(codec, spec->num_eapds, spec->eapds, false);
 

From c48a18584d81b2fbdc1d7deba41525bfc9114255 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 25 Jul 2018 17:10:11 +0200
Subject: [PATCH 339/519] ALSA: vx222: Fix invalid endian conversions

commit fff71a4c050ba46e305d910c837b99ba1728135e upstream.

The endian conversions used in vx2_dma_read() and vx2_dma_write() are
superfluous and even wrong on big-endian machines, as inl() and outl()
already do conversions.  Kill them.

Spotted by sparse, a warning like:
  sound/pci/vx222/vx222_ops.c:278:30: warning: incorrect type in argument 1 (different base types)

Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/vx222/vx222_ops.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sound/pci/vx222/vx222_ops.c b/sound/pci/vx222/vx222_ops.c
index 8e457ea27f89..1997bb048d8b 100644
--- a/sound/pci/vx222/vx222_ops.c
+++ b/sound/pci/vx222/vx222_ops.c
@@ -275,7 +275,7 @@ static void vx2_dma_write(struct vx_core *chip, struct snd_pcm_runtime *runtime,
 		length >>= 2; /* in 32bit words */
 		/* Transfer using pseudo-dma. */
 		for (; length > 0; length--) {
-			outl(cpu_to_le32(*addr), port);
+			outl(*addr, port);
 			addr++;
 		}
 		addr = (u32 *)runtime->dma_area;
@@ -285,7 +285,7 @@ static void vx2_dma_write(struct vx_core *chip, struct snd_pcm_runtime *runtime,
 	count >>= 2; /* in 32bit words */
 	/* Transfer using pseudo-dma. */
 	for (; count > 0; count--) {
-		outl(cpu_to_le32(*addr), port);
+		outl(*addr, port);
 		addr++;
 	}
 
@@ -313,7 +313,7 @@ static void vx2_dma_read(struct vx_core *chip, struct snd_pcm_runtime *runtime,
 		length >>= 2; /* in 32bit words */
 		/* Transfer using pseudo-dma. */
 		for (; length > 0; length--)
-			*addr++ = le32_to_cpu(inl(port));
+			*addr++ = inl(port);
 		addr = (u32 *)runtime->dma_area;
 		pipe->hw_ptr = 0;
 	}
@@ -321,7 +321,7 @@ static void vx2_dma_read(struct vx_core *chip, struct snd_pcm_runtime *runtime,
 	count >>= 2; /* in 32bit words */
 	/* Transfer using pseudo-dma. */
 	for (; count > 0; count--)
-		*addr++ = le32_to_cpu(inl(port));
+		*addr++ = inl(port);
 
 	vx2_release_pseudo_dma(chip);
 }

From 8419b74a43fe4d6a3a4a6d33b00746f5446fb472 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 26 Jul 2018 14:27:59 +0200
Subject: [PATCH 340/519] ALSA: virmidi: Fix too long output trigger loop

commit 50e9ffb1996a5d11ff5040a266585bad4ceeca0a upstream.

The virmidi output trigger tries to parse the all available bytes and
process sequencer events as much as possible.  In a normal situation,
this is supposed to be relatively short, but a program may give a huge
buffer and it'll take a long time in a single spin lock, which may
eventually lead to a soft lockup.

This patch simply adds a workaround, a cond_resched() call in the loop
if applicable.  A better solution would be to move the event processor
into a work, but let's put a duct-tape quickly at first.

Reported-and-tested-by: Dae R. Jeong <threeearcat@gmail.com>
Reported-by: syzbot+619d9f40141d826b097e@syzkaller.appspotmail.com
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/core/seq/seq_virmidi.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/sound/core/seq/seq_virmidi.c b/sound/core/seq/seq_virmidi.c
index ef494ffc1369..975a7c939d2f 100644
--- a/sound/core/seq/seq_virmidi.c
+++ b/sound/core/seq/seq_virmidi.c
@@ -163,6 +163,7 @@ static void snd_virmidi_output_trigger(struct snd_rawmidi_substream *substream,
 	int count, res;
 	unsigned char buf[32], *pbuf;
 	unsigned long flags;
+	bool check_resched = !in_atomic();
 
 	if (up) {
 		vmidi->trigger = 1;
@@ -200,6 +201,15 @@ static void snd_virmidi_output_trigger(struct snd_rawmidi_substream *substream,
 					vmidi->event.type = SNDRV_SEQ_EVENT_NONE;
 				}
 			}
+			if (!check_resched)
+				continue;
+			/* do temporary unlock & cond_resched() for avoiding
+			 * CPU soft lockup, which may happen via a write from
+			 * a huge rawmidi buffer
+			 */
+			spin_unlock_irqrestore(&substream->runtime->lock, flags);
+			cond_resched();
+			spin_lock_irqsave(&substream->runtime->lock, flags);
 		}
 	out:
 		spin_unlock_irqrestore(&substream->runtime->lock, flags);

From dbc8ab895259cc04ccd3406ccf4280418cd75b6b Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 25 Jul 2018 17:59:26 +0200
Subject: [PATCH 341/519] ALSA: cs5535audio: Fix invalid endian conversion

commit 69756930f2de0457d51db7d505a1e4f40e9fd116 upstream.

One place in cs5535audio_build_dma_packets() does an extra conversion
via cpu_to_le32(); namely jmpprd_addr is passed to setup_prd() ops,
which writes the value via cs_writel().  That is, the callback does
the conversion by itself, and we don't need to convert beforehand.

This patch fixes that bogus conversion.

Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/cs5535audio/cs5535audio.h     | 6 +++---
 sound/pci/cs5535audio/cs5535audio_pcm.c | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/sound/pci/cs5535audio/cs5535audio.h b/sound/pci/cs5535audio/cs5535audio.h
index 0579daa62215..425d1b664029 100644
--- a/sound/pci/cs5535audio/cs5535audio.h
+++ b/sound/pci/cs5535audio/cs5535audio.h
@@ -66,9 +66,9 @@ struct cs5535audio_dma_ops {
 };
 
 struct cs5535audio_dma_desc {
-	u32 addr;
-	u16 size;
-	u16 ctlreserved;
+	__le32 addr;
+	__le16 size;
+	__le16 ctlreserved;
 };
 
 struct cs5535audio_dma {
diff --git a/sound/pci/cs5535audio/cs5535audio_pcm.c b/sound/pci/cs5535audio/cs5535audio_pcm.c
index 9c2dc911d8d7..709f1c584d3e 100644
--- a/sound/pci/cs5535audio/cs5535audio_pcm.c
+++ b/sound/pci/cs5535audio/cs5535audio_pcm.c
@@ -158,8 +158,8 @@ static int cs5535audio_build_dma_packets(struct cs5535audio *cs5535au,
 	lastdesc->addr = cpu_to_le32((u32) dma->desc_buf.addr);
 	lastdesc->size = 0;
 	lastdesc->ctlreserved = cpu_to_le16(PRD_JMP);
-	jmpprd_addr = cpu_to_le32(lastdesc->addr +
-				  (sizeof(struct cs5535audio_dma_desc)*periods));
+	jmpprd_addr = (u32)dma->desc_buf.addr +
+		sizeof(struct cs5535audio_dma_desc) * periods;
 
 	dma->substream = substream;
 	dma->period_bytes = period_bytes;

From ad76ea373eb5f1232767242076ffef8b23518602 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 2 Aug 2018 14:04:45 +0200
Subject: [PATCH 342/519] ALSA: hda: Correct Asrock B85M-ITX power_save
 blacklist entry

commit 8e82a728792bf66b9f0a29c9d4c4b0630f7b9c79 upstream.

I added the subsys product-id for the HDMI HDA device rather then for
the PCH one, this commit fixes this.

BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1525104
Cc: stable@vger.kernel.org
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/hda_intel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index d0b55c866370..cabccb10210e 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2069,7 +2069,7 @@ static int azx_probe(struct pci_dev *pci,
  */
 static struct snd_pci_quirk power_save_blacklist[] = {
 	/* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
-	SND_PCI_QUIRK(0x1849, 0x0c0c, "Asrock B85M-ITX", 0),
+	SND_PCI_QUIRK(0x1849, 0xc892, "Asrock B85M-ITX", 0),
 	/* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
 	SND_PCI_QUIRK(0x1043, 0x8733, "Asus Prime X370-Pro", 0),
 	/* https://bugzilla.redhat.com/show_bug.cgi?id=1572975 */

From dd65a18b208fb2e03824ecf044e41c74b74423c1 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 19 Jul 2018 11:01:04 +0200
Subject: [PATCH 343/519] ALSA: memalloc: Don't exceed over the requested size

commit dfef01e150824b0e6da750cacda8958188d29aea upstream.

snd_dma_alloc_pages_fallback() tries to allocate pages again when the
allocation fails with reduced size.  But the first try actually
*increases* the size to power-of-two, which may give back a larger
chunk than the requested size.  This confuses the callers, e.g. sgbuf
assumes that the size is equal or less, and it may result in a bad
loop due to the underflow and eventually lead to Oops.

The code of this function seems incorrectly assuming the usage of
get_order().  We need to decrease at first, then align to
power-of-two.

Reported-and-tested-by: he, bo <bo.he@intel.com>
Reported-by: zhang jun <jun.zhang@intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/core/memalloc.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c
index f05cb6a8cbe0..78ffe445d775 100644
--- a/sound/core/memalloc.c
+++ b/sound/core/memalloc.c
@@ -239,16 +239,12 @@ int snd_dma_alloc_pages_fallback(int type, struct device *device, size_t size,
 	int err;
 
 	while ((err = snd_dma_alloc_pages(type, device, size, dmab)) < 0) {
-		size_t aligned_size;
 		if (err != -ENOMEM)
 			return err;
 		if (size <= PAGE_SIZE)
 			return -ENOMEM;
-		aligned_size = PAGE_SIZE << get_order(size);
-		if (size != aligned_size)
-			size = aligned_size;
-		else
-			size >>= 1;
+		size >>= 1;
+		size = PAGE_SIZE << get_order(size);
 	}
 	if (! dmab->area)
 		return -ENOMEM;

From 09b56641dddc92088bc2dd8d61ec229ce4bab8b0 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 25 Jul 2018 17:11:38 +0200
Subject: [PATCH 344/519] ALSA: vxpocket: Fix invalid endian conversions

commit 3acd3e3bab95ec3622ff98da313290ee823a0f68 upstream.

The endian conversions used in vxp_dma_read() and vxp_dma_write() are
superfluous and even wrong on big-endian machines, as inw() and outw()
already do conversions.  Kill them.

Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pcmcia/vx/vxp_ops.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/sound/pcmcia/vx/vxp_ops.c b/sound/pcmcia/vx/vxp_ops.c
index 56aa1ba73ccc..49a883341eff 100644
--- a/sound/pcmcia/vx/vxp_ops.c
+++ b/sound/pcmcia/vx/vxp_ops.c
@@ -375,7 +375,7 @@ static void vxp_dma_write(struct vx_core *chip, struct snd_pcm_runtime *runtime,
 		length >>= 1; /* in 16bit words */
 		/* Transfer using pseudo-dma. */
 		for (; length > 0; length--) {
-			outw(cpu_to_le16(*addr), port);
+			outw(*addr, port);
 			addr++;
 		}
 		addr = (unsigned short *)runtime->dma_area;
@@ -385,7 +385,7 @@ static void vxp_dma_write(struct vx_core *chip, struct snd_pcm_runtime *runtime,
 	count >>= 1; /* in 16bit words */
 	/* Transfer using pseudo-dma. */
 	for (; count > 0; count--) {
-		outw(cpu_to_le16(*addr), port);
+		outw(*addr, port);
 		addr++;
 	}
 	vx_release_pseudo_dma(chip);
@@ -417,7 +417,7 @@ static void vxp_dma_read(struct vx_core *chip, struct snd_pcm_runtime *runtime,
 		length >>= 1; /* in 16bit words */
 		/* Transfer using pseudo-dma. */
 		for (; length > 0; length--)
-			*addr++ = le16_to_cpu(inw(port));
+			*addr++ = inw(port);
 		addr = (unsigned short *)runtime->dma_area;
 		pipe->hw_ptr = 0;
 	}
@@ -425,12 +425,12 @@ static void vxp_dma_read(struct vx_core *chip, struct snd_pcm_runtime *runtime,
 	count >>= 1; /* in 16bit words */
 	/* Transfer using pseudo-dma. */
 	for (; count > 1; count--)
-		*addr++ = le16_to_cpu(inw(port));
+		*addr++ = inw(port);
 	/* Disable DMA */
 	pchip->regDIALOG &= ~VXP_DLG_DMAREAD_SEL_MASK;
 	vx_outb(chip, DIALOG, pchip->regDIALOG);
 	/* Read the last word (16 bits) */
-	*addr = le16_to_cpu(inw(port));
+	*addr = inw(port);
 	/* Disable 16-bit accesses */
 	pchip->regDIALOG &= ~VXP_DLG_DMA16_SEL_MASK;
 	vx_outb(chip, DIALOG, pchip->regDIALOG);

From a8587cb96a79f90514b2c538498ed57541eb68ab Mon Sep 17 00:00:00 2001
From: John Ogness <john.ogness@linutronix.de>
Date: Sun, 24 Jun 2018 00:32:11 +0200
Subject: [PATCH 345/519] USB: serial: sierra: fix potential deadlock at close

commit e60870012e5a35b1506d7b376fddfb30e9da0b27 upstream.

The portdata spinlock can be taken in interrupt context (via
sierra_outdat_callback()).
Disable interrupts when taking the portdata spinlock when discarding
deferred URBs during close to prevent a possible deadlock.

Fixes: 014333f77c0b ("USB: sierra: fix urb and memory leak on disconnect")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
[ johan: amend commit message and add fixes and stable tags ]
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/sierra.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
index 07d1ecd564f7..8960a46c83bb 100644
--- a/drivers/usb/serial/sierra.c
+++ b/drivers/usb/serial/sierra.c
@@ -790,9 +790,9 @@ static void sierra_close(struct usb_serial_port *port)
 		kfree(urb->transfer_buffer);
 		usb_free_urb(urb);
 		usb_autopm_put_interface_async(serial->interface);
-		spin_lock(&portdata->lock);
+		spin_lock_irq(&portdata->lock);
 		portdata->outstanding_urbs--;
-		spin_unlock(&portdata->lock);
+		spin_unlock_irq(&portdata->lock);
 	}
 
 	sierra_stop_rx_urbs(port);

From 39cd328e84cbafa28a068e9a82e05dfb8b5c7512 Mon Sep 17 00:00:00 2001
From: Aleksander Morgado <aleksander@aleksander.es>
Date: Tue, 24 Jul 2018 01:34:01 +0200
Subject: [PATCH 346/519] USB: option: add support for DW5821e

commit 7bab01ecc6c43da882333c6db39741cb43677004 upstream.

The device exposes AT, NMEA and DIAG ports in both USB configurations.

The patch explicitly ignores interfaces 0 and 1, as they're bound to
other drivers already; and also interface 6, which is a GNSS interface
for which we don't have a driver yet.

T:  Bus=01 Lev=03 Prnt=04 Port=00 Cnt=01 Dev#= 18 Spd=480 MxCh= 0
D:  Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs=  2
P:  Vendor=413c ProdID=81d7 Rev=03.18
S:  Manufacturer=DELL
S:  Product=DW5821e Snapdragon X20 LTE
S:  SerialNumber=0123456789ABCDEF
C:  #Ifs= 7 Cfg#= 2 Atr=a0 MxPwr=500mA
I:  If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim
I:  If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim
I:  If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
I:  If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
I:  If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
I:  If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
I:  If#= 6 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none)

T:  Bus=01 Lev=03 Prnt=04 Port=00 Cnt=01 Dev#= 16 Spd=480 MxCh= 0
D:  Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs=  2
P:  Vendor=413c ProdID=81d7 Rev=03.18
S:  Manufacturer=DELL
S:  Product=DW5821e Snapdragon X20 LTE
S:  SerialNumber=0123456789ABCDEF
C:  #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA
I:  If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan
I:  If#= 1 Alt= 0 #EPs= 1 Cls=03(HID  ) Sub=00 Prot=00 Driver=usbhid
I:  If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
I:  If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
I:  If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
I:  If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option

Signed-off-by: Aleksander Morgado <aleksander@aleksander.es>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/option.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index d982c455e18e..2b81939fecd7 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -199,6 +199,8 @@ static void option_instat_callback(struct urb *urb);
 #define DELL_PRODUCT_5800_V2_MINICARD_VZW	0x8196  /* Novatel E362 */
 #define DELL_PRODUCT_5804_MINICARD_ATT		0x819b  /* Novatel E371 */
 
+#define DELL_PRODUCT_5821E			0x81d7
+
 #define KYOCERA_VENDOR_ID			0x0c88
 #define KYOCERA_PRODUCT_KPC650			0x17da
 #define KYOCERA_PRODUCT_KPC680			0x180a
@@ -1033,6 +1035,8 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, DELL_PRODUCT_5800_MINICARD_VZW, 0xff, 0xff, 0xff) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, DELL_PRODUCT_5800_V2_MINICARD_VZW, 0xff, 0xff, 0xff) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, DELL_PRODUCT_5804_MINICARD_ATT, 0xff, 0xff, 0xff) },
+	{ USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5821E),
+	  .driver_info = RSVD(0) | RSVD(1) | RSVD(6) },
 	{ USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_E100A) },	/* ADU-E100, ADU-310 */
 	{ USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_500A) },
 	{ USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_620UW) },

From 3f9ca472b2a7fc0e78b714fe1687c82206b20a8e Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Mon, 16 Jan 2017 10:55:45 +0800
Subject: [PATCH 347/519] ACPI: save NVS memory for Lenovo G50-45

commit cbc00c1310d34139a63946482b40a6b261a03fb9 upstream.

In commit 821d6f0359b0 (ACPI / sleep: Do not save NVS for new machines to
accelerate S3), to optimize S3 suspend/resume speed, code is introduced
to ignore NVS memory saving during S3 for all the platforms later than
2012.

But, Lenovo G50-45, a platform released in 2015, still needs NVS memory
saving during S3. A quirk is introduced for this platform.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=189431
Tested-by: Przemek <soprwa@gmail.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
[ rjw: Drop unnecessary code ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/sleep.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index e3322adaaae0..84ddae30fc34 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -124,6 +124,12 @@ void __init acpi_nvs_nosave_s3(void)
 	nvs_nosave_s3 = true;
 }
 
+static int __init init_nvs_save_s3(const struct dmi_system_id *d)
+{
+	nvs_nosave_s3 = false;
+	return 0;
+}
+
 /*
  * ACPI 1.0 wants us to execute _PTS before suspending devices, so we allow the
  * user to request that behavior by using the 'acpi_old_suspend_ordering'
@@ -318,6 +324,19 @@ static struct dmi_system_id acpisleep_dmi_table[] __initdata = {
 		DMI_MATCH(DMI_PRODUCT_NAME, "K54HR"),
 		},
 	},
+	/*
+	 * https://bugzilla.kernel.org/show_bug.cgi?id=189431
+	 * Lenovo G50-45 is a platform later than 2012, but needs nvs memory
+	 * saving during S3.
+	 */
+	{
+	.callback = init_nvs_save_s3,
+	.ident = "Lenovo G50-45",
+	.matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+		DMI_MATCH(DMI_PRODUCT_NAME, "80E3"),
+		},
+	},
 	{},
 };
 

From c2650d43a4dae0a6e678af42d8056a484784f29c Mon Sep 17 00:00:00 2001
From: Willy Tarreau <w@1wt.eu>
Date: Mon, 9 Jul 2018 14:03:55 +0200
Subject: [PATCH 348/519] ACPI / PM: save NVS memory for ASUS 1025C laptop

commit 231f9415001138a000cd0f881c46654b7ea3f8c5 upstream.

Every time I tried to upgrade my laptop from 3.10.x to 4.x I faced an
issue by which the fan would run at full speed upon resume. Bisecting
it showed me the issue was introduced in 3.17 by commit 821d6f0359b0
(ACPI / sleep: Do not save NVS for new machines to accelerate S3). This
code only affects machines built starting as of 2012, but this Asus
1025C laptop was made in 2012 and apparently needs the NVS data to be
saved, otherwise the CPU's thermal state is not properly reported on
resume and the fan runs at full speed upon resume.

Here's a very simple way to check if such a machine is affected :

  # cat /sys/class/thermal/thermal_zone0/temp
  55000

  ( now suspend, wait one second and resume )

  # cat /sys/class/thermal/thermal_zone0/temp
  0

  (and after ~15 seconds the fan starts to spin)

Let's apply the same quirk as commit cbc00c13 (ACPI: save NVS memory
for Lenovo G50-45) and reuse the function it provides. Note that this
commit was already backported to 4.9.x but not 4.4.x.

Cc: 3.17+ <stable@vger.kernel.org> # 3.17+: requires cbc00c13
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/sleep.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 84ddae30fc34..4f07029de209 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -324,6 +324,14 @@ static struct dmi_system_id acpisleep_dmi_table[] __initdata = {
 		DMI_MATCH(DMI_PRODUCT_NAME, "K54HR"),
 		},
 	},
+	{
+	.callback = init_nvs_save_s3,
+	.ident = "Asus 1025C",
+	.matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+		DMI_MATCH(DMI_PRODUCT_NAME, "1025C"),
+		},
+	},
 	/*
 	 * https://bugzilla.kernel.org/show_bug.cgi?id=189431
 	 * Lenovo G50-45 is a platform later than 2012, but needs nvs memory

From b4d2c57717fd0c905a4b8ebd5c534e770aeb93a2 Mon Sep 17 00:00:00 2001
From: Chen Hu <hu1.chen@intel.com>
Date: Fri, 27 Jul 2018 18:32:41 +0800
Subject: [PATCH 349/519] serial: 8250_dw: always set baud rate in
 dw8250_set_termios

commit dfcab6ba573445c703235ab6c83758eec12d7f28 upstream.

dw8250_set_termios() doesn't set baud rate if the arg "old ktermios" is
NULL. This happens during resume.
Call Trace:
...
[   54.928108] dw8250_set_termios+0x162/0x170
[   54.928114] serial8250_set_termios+0x17/0x20
[   54.928117] uart_change_speed+0x64/0x160
[   54.928119] uart_resume_port
...

So the baud rate is not restored after S3 and breaks the apps who use
UART, for example, console and bluetooth etc.

We address this issue by setting the baud rate irrespective of arg
"old", just like the drivers for other 8250 IPs. This is tested with
Intel Broxton platform.

Signed-off-by: Chen Hu <hu1.chen@intel.com>
Fixes: 4e26b134bd17 ("serial: 8250_dw: clock rate handling for all ACPI platforms")
Cc: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Cc: stable <stable@vger.kernel.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_dw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c
index 8435c3f204c1..a30d68c4b689 100644
--- a/drivers/tty/serial/8250/8250_dw.c
+++ b/drivers/tty/serial/8250/8250_dw.c
@@ -224,7 +224,7 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios,
 	unsigned int rate;
 	int ret;
 
-	if (IS_ERR(d->clk) || !old)
+	if (IS_ERR(d->clk))
 		goto out;
 
 	clk_disable_unprepare(d->clk);

From 5069ddd8f9ace901f385fd42a75246e7a3727bc5 Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Mon, 17 Jul 2017 16:10:06 -0500
Subject: [PATCH 350/519] x86/mm: Simplify p[g4um]d_page() macros
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit fd7e315988b784509ba3f1b42f539bd0b1fca9bb upstream.

Create a pgd_pfn() macro similar to the p[4um]d_pfn() macros and then
use the p[g4um]d_pfn() macros in the p[g4um]d_page() macros instead of
duplicating the code.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Borislav Petkov <bp@suse.de>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brijesh Singh <brijesh.singh@amd.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Toshimitsu Kani <toshi.kani@hpe.com>
Cc: kasan-dev@googlegroups.com
Cc: kvm@vger.kernel.org
Cc: linux-arch@vger.kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-efi@vger.kernel.org
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/e61eb533a6d0aac941db2723d8aa63ef6b882dee.1500319216.git.thomas.lendacky@amd.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
[Backported to 4.9 stable by AK, suggested by Michael Hocko]
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/pgtable.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 4de6c282c02a..68a55273ce0f 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -173,6 +173,11 @@ static inline unsigned long pud_pfn(pud_t pud)
 	return (pfn & pud_pfn_mask(pud)) >> PAGE_SHIFT;
 }
 
+static inline unsigned long pgd_pfn(pgd_t pgd)
+{
+	return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT;
+}
+
 #define pte_page(pte)	pfn_to_page(pte_pfn(pte))
 
 static inline int pmd_large(pmd_t pte)
@@ -578,8 +583,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
  * Currently stuck as a macro due to indirect forward reference to
  * linux/mmzone.h's __section_mem_map_addr() definition:
  */
-#define pmd_page(pmd)		\
-	pfn_to_page((pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT)
+#define pmd_page(pmd)	pfn_to_page(pmd_pfn(pmd))
 
 /*
  * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
@@ -647,8 +651,7 @@ static inline unsigned long pud_page_vaddr(pud_t pud)
  * Currently stuck as a macro due to indirect forward reference to
  * linux/mmzone.h's __section_mem_map_addr() definition:
  */
-#define pud_page(pud)		\
-	pfn_to_page((pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT)
+#define pud_page(pud)	pfn_to_page(pud_pfn(pud))
 
 /* Find an entry in the second-level page table.. */
 static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
@@ -688,7 +691,7 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd)
  * Currently stuck as a macro due to indirect forward reference to
  * linux/mmzone.h's __section_mem_map_addr() definition:
  */
-#define pgd_page(pgd)		pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT)
+#define pgd_page(pgd)		pfn_to_page(pgd_pfn(pgd))
 
 /* to find an entry in a page-table-directory. */
 static inline unsigned long pud_index(unsigned long address)

From 9aeef6b667f2ffc4c5725ea05e180e62fb307a31 Mon Sep 17 00:00:00 2001
From: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Date: Sun, 15 Jul 2018 20:36:50 +0100
Subject: [PATCH 351/519] Bluetooth: avoid killing an already killed socket

commit 4e1a720d0312fd510699032c7694a362a010170f upstream.

slub debug reported:

[  440.648642] =============================================================================
[  440.648649] BUG kmalloc-1024 (Tainted: G    BU     O   ): Poison overwritten
[  440.648651] -----------------------------------------------------------------------------

[  440.648655] INFO: 0xe70f4bec-0xe70f4bec. First byte 0x6a instead of 0x6b
[  440.648665] INFO: Allocated in sk_prot_alloc+0x6b/0xc6 age=33155 cpu=1 pid=1047
[  440.648671] 	___slab_alloc.constprop.24+0x1fc/0x292
[  440.648675] 	__slab_alloc.isra.18.constprop.23+0x1c/0x25
[  440.648677] 	__kmalloc+0xb6/0x17f
[  440.648680] 	sk_prot_alloc+0x6b/0xc6
[  440.648683] 	sk_alloc+0x1e/0xa1
[  440.648700] 	sco_sock_alloc.constprop.6+0x26/0xaf [bluetooth]
[  440.648716] 	sco_connect_cfm+0x166/0x281 [bluetooth]
[  440.648731] 	hci_conn_request_evt.isra.53+0x258/0x281 [bluetooth]
[  440.648746] 	hci_event_packet+0x28b/0x2326 [bluetooth]
[  440.648759] 	hci_rx_work+0x161/0x291 [bluetooth]
[  440.648764] 	process_one_work+0x163/0x2b2
[  440.648767] 	worker_thread+0x1a9/0x25c
[  440.648770] 	kthread+0xf8/0xfd
[  440.648774] 	ret_from_fork+0x2e/0x38
[  440.648779] INFO: Freed in __sk_destruct+0xd3/0xdf age=3815 cpu=1 pid=1047
[  440.648782] 	__slab_free+0x4b/0x27a
[  440.648784] 	kfree+0x12e/0x155
[  440.648787] 	__sk_destruct+0xd3/0xdf
[  440.648790] 	sk_destruct+0x27/0x29
[  440.648793] 	__sk_free+0x75/0x91
[  440.648795] 	sk_free+0x1c/0x1e
[  440.648810] 	sco_sock_kill+0x5a/0x5f [bluetooth]
[  440.648825] 	sco_conn_del+0x8e/0xba [bluetooth]
[  440.648840] 	sco_disconn_cfm+0x3a/0x41 [bluetooth]
[  440.648855] 	hci_event_packet+0x45e/0x2326 [bluetooth]
[  440.648868] 	hci_rx_work+0x161/0x291 [bluetooth]
[  440.648872] 	process_one_work+0x163/0x2b2
[  440.648875] 	worker_thread+0x1a9/0x25c
[  440.648877] 	kthread+0xf8/0xfd
[  440.648880] 	ret_from_fork+0x2e/0x38
[  440.648884] INFO: Slab 0xf4718580 objects=27 used=27 fp=0x  (null) flags=0x40008100
[  440.648886] INFO: Object 0xe70f4b88 @offset=19336 fp=0xe70f54f8

When KASAN was enabled, it reported:

[  210.096613] ==================================================================
[  210.096634] BUG: KASAN: use-after-free in ex_handler_refcount+0x5b/0x127
[  210.096641] Write of size 4 at addr ffff880107e17160 by task kworker/u9:1/2040

[  210.096651] CPU: 1 PID: 2040 Comm: kworker/u9:1 Tainted: G     U     O    4.14.47-20180606+ #2
[  210.096654] Hardware name: , BIOS 2017.01-00087-g43e04de 08/30/2017
[  210.096693] Workqueue: hci0 hci_rx_work [bluetooth]
[  210.096698] Call Trace:
[  210.096711]  dump_stack+0x46/0x59
[  210.096722]  print_address_description+0x6b/0x23b
[  210.096729]  ? ex_handler_refcount+0x5b/0x127
[  210.096736]  kasan_report+0x220/0x246
[  210.096744]  ex_handler_refcount+0x5b/0x127
[  210.096751]  ? ex_handler_clear_fs+0x85/0x85
[  210.096757]  fixup_exception+0x8c/0x96
[  210.096766]  do_trap+0x66/0x2c1
[  210.096773]  do_error_trap+0x152/0x180
[  210.096781]  ? fixup_bug+0x78/0x78
[  210.096817]  ? hci_debugfs_create_conn+0x244/0x26a [bluetooth]
[  210.096824]  ? __schedule+0x113b/0x1453
[  210.096830]  ? sysctl_net_exit+0xe/0xe
[  210.096837]  ? __wake_up_common+0x343/0x343
[  210.096843]  ? insert_work+0x107/0x163
[  210.096850]  invalid_op+0x1b/0x40
[  210.096888] RIP: 0010:hci_debugfs_create_conn+0x244/0x26a [bluetooth]
[  210.096892] RSP: 0018:ffff880094a0f970 EFLAGS: 00010296
[  210.096898] RAX: 0000000000000000 RBX: ffff880107e170e8 RCX: ffff880107e17160
[  210.096902] RDX: 000000000000002f RSI: ffff88013b80ed40 RDI: ffffffffa058b940
[  210.096906] RBP: ffff88011b2b0578 R08: 00000000852f0ec9 R09: ffffffff81cfcf9b
[  210.096909] R10: 00000000d21bdad7 R11: 0000000000000001 R12: ffff8800967b0488
[  210.096913] R13: ffff880107e17168 R14: 0000000000000068 R15: ffff8800949c0008
[  210.096920]  ? __sk_destruct+0x2c6/0x2d4
[  210.096959]  hci_event_packet+0xff5/0x7de2 [bluetooth]
[  210.096969]  ? __local_bh_enable_ip+0x43/0x5b
[  210.097004]  ? l2cap_sock_recv_cb+0x158/0x166 [bluetooth]
[  210.097039]  ? hci_le_meta_evt+0x2bb3/0x2bb3 [bluetooth]
[  210.097075]  ? l2cap_ertm_init+0x94e/0x94e [bluetooth]
[  210.097093]  ? xhci_urb_enqueue+0xbd8/0xcf5 [xhci_hcd]
[  210.097102]  ? __accumulate_pelt_segments+0x24/0x33
[  210.097109]  ? __accumulate_pelt_segments+0x24/0x33
[  210.097115]  ? __update_load_avg_se.isra.2+0x217/0x3a4
[  210.097122]  ? set_next_entity+0x7c3/0x12cd
[  210.097128]  ? pick_next_entity+0x25e/0x26c
[  210.097135]  ? pick_next_task_fair+0x2ca/0xc1a
[  210.097141]  ? switch_mm_irqs_off+0x346/0xb4f
[  210.097147]  ? __switch_to+0x769/0xbc4
[  210.097153]  ? compat_start_thread+0x66/0x66
[  210.097188]  ? hci_conn_check_link_mode+0x1cd/0x1cd [bluetooth]
[  210.097195]  ? finish_task_switch+0x392/0x431
[  210.097228]  ? hci_rx_work+0x154/0x487 [bluetooth]
[  210.097260]  hci_rx_work+0x154/0x487 [bluetooth]
[  210.097269]  process_one_work+0x579/0x9e9
[  210.097277]  worker_thread+0x68f/0x804
[  210.097285]  kthread+0x31c/0x32b
[  210.097292]  ? rescuer_thread+0x70c/0x70c
[  210.097299]  ? kthread_create_on_node+0xa3/0xa3
[  210.097306]  ret_from_fork+0x35/0x40

[  210.097314] Allocated by task 2040:
[  210.097323]  kasan_kmalloc.part.1+0x51/0xc7
[  210.097328]  __kmalloc+0x17f/0x1b6
[  210.097335]  sk_prot_alloc+0xf2/0x1a3
[  210.097340]  sk_alloc+0x22/0x297
[  210.097375]  sco_sock_alloc.constprop.7+0x23/0x202 [bluetooth]
[  210.097410]  sco_connect_cfm+0x2d0/0x566 [bluetooth]
[  210.097443]  hci_conn_request_evt.isra.53+0x6d3/0x762 [bluetooth]
[  210.097476]  hci_event_packet+0x85e/0x7de2 [bluetooth]
[  210.097507]  hci_rx_work+0x154/0x487 [bluetooth]
[  210.097512]  process_one_work+0x579/0x9e9
[  210.097517]  worker_thread+0x68f/0x804
[  210.097523]  kthread+0x31c/0x32b
[  210.097529]  ret_from_fork+0x35/0x40

[  210.097533] Freed by task 2040:
[  210.097539]  kasan_slab_free+0xb3/0x15e
[  210.097544]  kfree+0x103/0x1a9
[  210.097549]  __sk_destruct+0x2c6/0x2d4
[  210.097584]  sco_conn_del.isra.1+0xba/0x10e [bluetooth]
[  210.097617]  hci_event_packet+0xff5/0x7de2 [bluetooth]
[  210.097648]  hci_rx_work+0x154/0x487 [bluetooth]
[  210.097653]  process_one_work+0x579/0x9e9
[  210.097658]  worker_thread+0x68f/0x804
[  210.097663]  kthread+0x31c/0x32b
[  210.097670]  ret_from_fork+0x35/0x40

[  210.097676] The buggy address belongs to the object at ffff880107e170e8
 which belongs to the cache kmalloc-1024 of size 1024
[  210.097681] The buggy address is located 120 bytes inside of
 1024-byte region [ffff880107e170e8, ffff880107e174e8)
[  210.097683] The buggy address belongs to the page:
[  210.097689] page:ffffea00041f8400 count:1 mapcount:0 mapping:          (null) index:0xffff880107e15b68 compound_mapcount: 0
[  210.110194] flags: 0x8000000000008100(slab|head)
[  210.115441] raw: 8000000000008100 0000000000000000 ffff880107e15b68 0000000100170016
[  210.115448] raw: ffffea0004a47620 ffffea0004b48e20 ffff88013b80ed40 0000000000000000
[  210.115451] page dumped because: kasan: bad access detected

[  210.115454] Memory state around the buggy address:
[  210.115460]  ffff880107e17000: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[  210.115465]  ffff880107e17080: fc fc fc fc fc fc fc fc fc fc fc fc fc fb fb fb
[  210.115469] >ffff880107e17100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  210.115472]                                                        ^
[  210.115477]  ffff880107e17180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  210.115481]  ffff880107e17200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  210.115483] ==================================================================

And finally when BT_DBG() and ftrace was enabled it showed:

       <...>-14979 [001] ....   186.104191: sco_sock_kill <-sco_sock_close
       <...>-14979 [001] ....   186.104191: sco_sock_kill <-sco_sock_release
       <...>-14979 [001] ....   186.104192: sco_sock_kill: sk ef0497a0 state 9
       <...>-14979 [001] ....   186.104193: bt_sock_unlink <-sco_sock_kill
kworker/u9:2-792   [001] ....   186.104246: sco_sock_kill <-sco_conn_del
kworker/u9:2-792   [001] ....   186.104248: sco_sock_kill: sk ef0497a0 state 9
kworker/u9:2-792   [001] ....   186.104249: bt_sock_unlink <-sco_sock_kill
kworker/u9:2-792   [001] ....   186.104250: sco_sock_destruct <-__sk_destruct
kworker/u9:2-792   [001] ....   186.104250: sco_sock_destruct: sk ef0497a0
kworker/u9:2-792   [001] ....   186.104860: hci_conn_del <-hci_event_packet
kworker/u9:2-792   [001] ....   186.104864: hci_conn_del: hci0 hcon ef0484c0 handle 266

Only in the failed case, sco_sock_kill() gets called with the same sock
pointer two times. Add a check for SOCK_DEAD to avoid continue killing
a socket which has already been killed.

Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/bluetooth/sco.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index f52bcbf2e58c..2209fd2ff2e3 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -392,7 +392,8 @@ static void sco_sock_cleanup_listen(struct sock *parent)
  */
 static void sco_sock_kill(struct sock *sk)
 {
-	if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket)
+	if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket ||
+	    sock_flag(sk, SOCK_DEAD))
 		return;
 
 	BT_DBG("sk %p state %d", sk, sk->sk_state);

From 3b6393e30ec95bb5044cb4506ac468fb08ccf647 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 15 Aug 2018 12:14:05 -0700
Subject: [PATCH 352/519] isdn: Disable IIOCDBGVAR

[ Upstream commit 5e22002aa8809e2efab2da95855f73f63e14a36c ]

It was possible to directly leak the kernel address where the isdn_dev
structure pointer was stored. This is a kernel ASLR bypass for anyone
with access to the ioctl. The code had been present since the beginning
of git history, though this shouldn't ever be needed for normal operation,
therefore remove it.

Reported-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Karsten Keil <isdn@linux-pingi.de>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/isdn/i4l/isdn_common.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/isdn/i4l/isdn_common.c b/drivers/isdn/i4l/isdn_common.c
index e4c43a17b333..8088c34336aa 100644
--- a/drivers/isdn/i4l/isdn_common.c
+++ b/drivers/isdn/i4l/isdn_common.c
@@ -1655,13 +1655,7 @@ isdn_ioctl(struct file *file, uint cmd, ulong arg)
 			} else
 				return -EINVAL;
 		case IIOCDBGVAR:
-			if (arg) {
-				if (copy_to_user(argp, &dev, sizeof(ulong)))
-					return -EFAULT;
-				return 0;
-			} else
-				return -EINVAL;
-			break;
+			return -EINVAL;
 		default:
 			if ((cmd & IIOCDRVCTL) == IIOCDRVCTL)
 				cmd = ((cmd >> _IOC_NRSHIFT) & _IOC_NRMASK) & ISDN_DRVIOCTL_MASK;

From 78f654f6cce3442937b8c7eb4b640357871363c1 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 22 Aug 2018 07:48:38 +0200
Subject: [PATCH 353/519] Linux 4.4.151

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 7789195c6a59..04199cf99dd5 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 4
-SUBLEVEL = 150
+SUBLEVEL = 151
 EXTRAVERSION =
 NAME = Blurry Fish Butt
 

From 97aaf73394cce0cf85c393c2a5e1d9ad6ddb2245 Mon Sep 17 00:00:00 2001
From: Alexey Brodkin <abrodkin@synopsys.com>
Date: Fri, 1 Jun 2018 14:34:33 +0300
Subject: [PATCH 354/519] ARC: Explicitly add -mmedium-calls to CFLAGS

[ Upstream commit 74c11e300c103af47db5b658fdcf28002421e250 ]

GCC built for arc*-*-linux has "-mmedium-calls" implicitly enabled by default
thus we don't see any problems during Linux kernel compilation.
----------------------------->8------------------------
arc-linux-gcc -mcpu=arc700 -Q --help=target | grep calls
  -mlong-calls                          [disabled]
  -mmedium-calls                        [enabled]
----------------------------->8------------------------

But if we try to use so-called Elf32 toolchain with GCC configured for
arc*-*-elf* then we'd see the following failure:
----------------------------->8------------------------
init/do_mounts.o: In function 'init_rootfs':
do_mounts.c:(.init.text+0x108): relocation truncated to fit: R_ARC_S21W_PCREL
against symbol 'unregister_filesystem' defined in .text section in fs/filesystems.o

arc-elf32-ld: final link failed: Symbol needs debug section which does not exist
make: *** [vmlinux] Error 1
----------------------------->8------------------------

That happens because neither "-mmedium-calls" nor "-mlong-calls" are enabled in
Elf32 GCC:
----------------------------->8------------------------
arc-elf32-gcc -mcpu=arc700 -Q --help=target | grep calls
  -mlong-calls                          [disabled]
  -mmedium-calls                        [disabled]
----------------------------->8------------------------

Now to make it possible to use Elf32 toolchain for building Linux kernel
we're explicitly add "-mmedium-calls" to CFLAGS.

And since we add "-mmedium-calls" to the global CFLAGS there's no point in
having per-file copies thus removing them.

Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>

Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arc/Makefile | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index c05ea2b54276..b9f7306412e5 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -14,7 +14,7 @@ endif
 
 KBUILD_DEFCONFIG := nsim_700_defconfig
 
-cflags-y	+= -fno-common -pipe -fno-builtin -D__linux__
+cflags-y	+= -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__
 cflags-$(CONFIG_ISA_ARCOMPACT)	+= -mA7
 cflags-$(CONFIG_ISA_ARCV2)	+= -mcpu=archs
 
@@ -137,16 +137,3 @@ dtbs: scripts
 
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
-
-# Hacks to enable final link due to absence of link-time branch relexation
-# and gcc choosing optimal(shorter) branches at -O3
-#
-# vineetg Feb 2010: -mlong-calls switched off for overall kernel build
-# However lib/decompress_inflate.o (.init.text) calls
-# zlib_inflate_workspacesize (.text) causing relocation errors.
-# Thus forcing all exten calls in this file to be long calls
-export CFLAGS_decompress_inflate.o = -mmedium-calls
-export CFLAGS_initramfs.o = -mmedium-calls
-ifdef CONFIG_SMP
-export CFLAGS_core.o = -mmedium-calls
-endif

From 8747d9e7d45420350524dc8a8343837b8ac92776 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 13 Jun 2018 10:11:56 -0700
Subject: [PATCH 355/519] netfilter: ipv6: nf_defrag: reduce struct net memory
 waste

[ Upstream commit 9ce7bc036ae4cfe3393232c86e9e1fea2153c237 ]

It is a waste of memory to use a full "struct netns_sysctl_ipv6"
while only one pointer is really used, considering netns_sysctl_ipv6
keeps growing.

Also, since "struct netns_frags" has cache line alignment,
it is better to move the frags_hdr pointer outside, otherwise
we spend a full cache line for this pointer.

This saves 192 bytes of memory per netns.

Fixes: c038a767cd69 ("ipv6: add a new namespace for nf_conntrack_reasm")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/net_namespace.h             | 1 +
 include/net/netns/ipv6.h                | 1 -
 net/ipv6/netfilter/nf_conntrack_reasm.c | 6 +++---
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 93328c61934a..6965dfe7e88b 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -115,6 +115,7 @@ struct net {
 #endif
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
 	struct netns_nf_frag	nf_frag;
+	struct ctl_table_header *nf_frag_frags_hdr;
 #endif
 	struct sock		*nfnl;
 	struct sock		*nfnl_stash;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index c0368db6df54..d235722c0d92 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -86,7 +86,6 @@ struct netns_ipv6 {
 
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
 struct netns_nf_frag {
-	struct netns_sysctl_ipv6 sysctl;
 	struct netns_frags	frags;
 };
 #endif
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index eb2dc39f7066..838b65a59a73 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -118,7 +118,7 @@ static int nf_ct_frag6_sysctl_register(struct net *net)
 	if (hdr == NULL)
 		goto err_reg;
 
-	net->nf_frag.sysctl.frags_hdr = hdr;
+	net->nf_frag_frags_hdr = hdr;
 	return 0;
 
 err_reg:
@@ -132,8 +132,8 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
 {
 	struct ctl_table *table;
 
-	table = net->nf_frag.sysctl.frags_hdr->ctl_table_arg;
-	unregister_net_sysctl_table(net->nf_frag.sysctl.frags_hdr);
+	table = net->nf_frag_frags_hdr->ctl_table_arg;
+	unregister_net_sysctl_table(net->nf_frag_frags_hdr);
 	if (!net_eq(net, &init_net))
 		kfree(table);
 }

From c22a9ad9d0dfa4a72aeb08bc5275239d4281f077 Mon Sep 17 00:00:00 2001
From: "Shuah Khan (Samsung OSG)" <shuah@kernel.org>
Date: Tue, 12 Jun 2018 16:46:03 -0600
Subject: [PATCH 356/519] selftests: pstore: return Kselftest Skip code for
 skipped tests

[ Upstream commit 856e7c4b619af622d56b3b454f7bec32a170ac99 ]

When pstore_post_reboot test gets skipped because of unmet dependencies
and/or unsupported configuration, it returns 0 which is treated as a pass
by the Kselftest framework. This leads to false positive result even when
the test could not be run.

Change it to return kselftest skip code when a test gets skipped to clearly
report that the test could not be run.

Kselftest framework SKIP code is 4 and the framework prints appropriate
messages to indicate that the test is skipped.

Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/testing/selftests/pstore/pstore_post_reboot_tests | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/pstore/pstore_post_reboot_tests b/tools/testing/selftests/pstore/pstore_post_reboot_tests
index 6ccb154cb4aa..22f8df1ad7d4 100755
--- a/tools/testing/selftests/pstore/pstore_post_reboot_tests
+++ b/tools/testing/selftests/pstore/pstore_post_reboot_tests
@@ -7,13 +7,16 @@
 #
 # Released under the terms of the GPL v2.
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 . ./common_tests
 
 if [ -e $REBOOT_FLAG  ]; then
     rm $REBOOT_FLAG
 else
     prlog "pstore_crash_test has not been executed yet. we skip further tests."
-    exit 0
+    exit $ksft_skip
 fi
 
 prlog -n "Mounting pstore filesystem ... "

From 68f5b8ff63022b1d6b7019092c66c916a499dc84 Mon Sep 17 00:00:00 2001
From: "Shuah Khan (Samsung OSG)" <shuah@kernel.org>
Date: Tue, 12 Jun 2018 17:40:31 -0600
Subject: [PATCH 357/519] selftests: static_keys: return Kselftest Skip code
 for skipped tests

[ Upstream commit 8781578087b8fb8829558bac96c3c24e5ba26f82 ]

When static_keys test is skipped because of unmet dependencies and/or
unsupported configuration, it exits with error which is treated as a fail
by the Kselftest framework. This leads to false negative result even when
the test could not be run.

Change it to return kselftest skip code when a test gets skipped to clearly
report that the test could not be run.

Added an explicit searches for test_static_key_base and test_static_keys
modules and return skip code if they aren't found to differentiate between
the failure to load the module condition and module not found condition.

Kselftest framework SKIP code is 4 and the framework prints appropriate
messages to indicate that the test is skipped.

Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../selftests/static_keys/test_static_keys.sh       | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tools/testing/selftests/static_keys/test_static_keys.sh b/tools/testing/selftests/static_keys/test_static_keys.sh
index 1261e3fa1e3a..5bba7796fb34 100755
--- a/tools/testing/selftests/static_keys/test_static_keys.sh
+++ b/tools/testing/selftests/static_keys/test_static_keys.sh
@@ -1,6 +1,19 @@
 #!/bin/sh
 # Runs static keys kernel module tests
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if ! /sbin/modprobe -q -n test_static_key_base; then
+	echo "static_key: module test_static_key_base is not found [SKIP]"
+	exit $ksft_skip
+fi
+
+if ! /sbin/modprobe -q -n test_static_keys; then
+	echo "static_key: module test_static_keys is not found [SKIP]"
+	exit $ksft_skip
+fi
+
 if /sbin/modprobe -q test_static_key_base; then
 	if /sbin/modprobe -q test_static_keys; then
 		echo "static_key: ok"

From 01024c01171b1c5c18f1269c124886a273230db4 Mon Sep 17 00:00:00 2001
From: "Shuah Khan (Samsung OSG)" <shuah@kernel.org>
Date: Wed, 13 Jun 2018 21:10:48 -0600
Subject: [PATCH 358/519] selftests: user: return Kselftest Skip code for
 skipped tests

[ Upstream commit d7d5311d4aa9611fe1a5a851e6f75733237a668a ]

When user test is skipped because of unmet dependencies and/or
unsupported configuration, it exits with error which is treated as
a fail by the Kselftest framework. This leads to false negative result
even when the test could not be run.

Change it to return kselftest skip code when a test gets skipped to
clearly report that the test could not be run. Add an explicit check
for module presence and return skip code if module isn't present.

Kselftest framework SKIP code is 4 and the framework prints appropriate
messages to indicate that the test is skipped.

Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/testing/selftests/user/test_user_copy.sh | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tools/testing/selftests/user/test_user_copy.sh b/tools/testing/selftests/user/test_user_copy.sh
index 350107f40c1d..0409270f998c 100755
--- a/tools/testing/selftests/user/test_user_copy.sh
+++ b/tools/testing/selftests/user/test_user_copy.sh
@@ -1,6 +1,13 @@
 #!/bin/sh
 # Runs copy_to/from_user infrastructure using test_user_copy kernel module
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if ! /sbin/modprobe -q -n test_user_copy; then
+	echo "user: module test_user_copy is not found [SKIP]"
+	exit $ksft_skip
+fi
 if /sbin/modprobe -q test_user_copy; then
 	/sbin/modprobe -q -r test_user_copy
 	echo "user_copy: ok"

From 8e54d87ffc5170acfe72b371198ddcb685f88e63 Mon Sep 17 00:00:00 2001
From: "Shuah Khan (Samsung OSG)" <shuah@kernel.org>
Date: Thu, 14 Jun 2018 16:56:13 -0600
Subject: [PATCH 359/519] selftests: zram: return Kselftest Skip code for
 skipped tests

[ Upstream commit 685814466bf8398192cf855415a0bb2cefc1930e ]

When zram test is skipped because of unmet dependencies and/or
unsupported configuration, it exits with error which is treated as
a fail by the Kselftest framework. This leads to false negative result
even when the test could not be run.

Change it to return kselftest skip code when a test gets skipped to
clearly report that the test could not be run.

Kselftest framework SKIP code is 4 and the framework prints appropriate
messages to indicate that the test is skipped.

Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/testing/selftests/zram/zram.sh     | 5 ++++-
 tools/testing/selftests/zram/zram_lib.sh | 5 ++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/zram/zram.sh b/tools/testing/selftests/zram/zram.sh
index 683a292e3290..9399c4aeaa26 100755
--- a/tools/testing/selftests/zram/zram.sh
+++ b/tools/testing/selftests/zram/zram.sh
@@ -1,6 +1,9 @@
 #!/bin/bash
 TCID="zram.sh"
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 . ./zram_lib.sh
 
 run_zram () {
@@ -23,5 +26,5 @@ elif [ -b /dev/zram0 ]; then
 else
 	echo "$TCID : No zram.ko module or /dev/zram0 device file not found"
 	echo "$TCID : CONFIG_ZRAM is not set"
-	exit 1
+	exit $ksft_skip
 fi
diff --git a/tools/testing/selftests/zram/zram_lib.sh b/tools/testing/selftests/zram/zram_lib.sh
index f6a9c73e7a44..9e73a4fb9b0a 100755
--- a/tools/testing/selftests/zram/zram_lib.sh
+++ b/tools/testing/selftests/zram/zram_lib.sh
@@ -18,6 +18,9 @@ MODULE=0
 dev_makeswap=-1
 dev_mounted=-1
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 trap INT
 
 check_prereqs()
@@ -27,7 +30,7 @@ check_prereqs()
 
 	if [ $uid -ne 0 ]; then
 		echo $msg must be run as root >&2
-		exit 0
+		exit $ksft_skip
 	fi
 }
 

From 67726a2e9cd7e16503e8d54c54b5885a8737663d Mon Sep 17 00:00:00 2001
From: Fathi Boudra <fathi.boudra@linaro.org>
Date: Thu, 14 Jun 2018 11:57:08 +0200
Subject: [PATCH 360/519] selftests: sync: add config fragment for testing sync
 framework

[ Upstream commit d6a3e55131fcb1e5ca1753f4b6f297a177b2fc91 ]

Unless the software synchronization objects (CONFIG_SW_SYNC) is enabled,
the sync test will be skipped:

TAP version 13
1..0 # Skipped: Sync framework not supported by kernel

Add a config fragment file to be able to run "make kselftest-merge" to
enable relevant configuration required in order to run the sync test.

Signed-off-by: Fathi Boudra <fathi.boudra@linaro.org>
Link: https://lkml.org/lkml/2017/5/5/14
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/testing/selftests/sync/config | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 tools/testing/selftests/sync/config

diff --git a/tools/testing/selftests/sync/config b/tools/testing/selftests/sync/config
new file mode 100644
index 000000000000..1ab7e8130db2
--- /dev/null
+++ b/tools/testing/selftests/sync/config
@@ -0,0 +1,4 @@
+CONFIG_STAGING=y
+CONFIG_ANDROID=y
+CONFIG_SYNC=y
+CONFIG_SW_SYNC=y

From 0a6914e81cfaf4b77ba6ebeb09f87dab4ad2336c Mon Sep 17 00:00:00 2001
From: Ray Jui <ray.jui@broadcom.com>
Date: Tue, 12 Jun 2018 13:21:27 -0700
Subject: [PATCH 361/519] ARM: dts: Cygnus: Fix I2C controller interrupt type

[ Upstream commit 71ca3409703b62b6a092d0d9d13f366c121bc5d3 ]

Fix I2C controller interrupt to use IRQ_TYPE_LEVEL_HIGH for Broadcom
Cygnus SoC.

Fixes: b51c05a331ff ("ARM: dts: add I2C device nodes for Broadcom Cygnus")
Signed-off-by: Ray Jui <ray.jui@broadcom.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/bcm-cygnus.dtsi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/bcm-cygnus.dtsi b/arch/arm/boot/dts/bcm-cygnus.dtsi
index 2778533502d9..5ce200860c89 100644
--- a/arch/arm/boot/dts/bcm-cygnus.dtsi
+++ b/arch/arm/boot/dts/bcm-cygnus.dtsi
@@ -110,7 +110,7 @@ i2c0: i2c@18008000 {
 			reg = <0x18008000 0x100>;
 			#address-cells = <1>;
 			#size-cells = <0>;
-			interrupts = <GIC_SPI 85 IRQ_TYPE_NONE>;
+			interrupts = <GIC_SPI 85 IRQ_TYPE_LEVEL_HIGH>;
 			clock-frequency = <100000>;
 			status = "disabled";
 		};
@@ -138,7 +138,7 @@ i2c1: i2c@1800b000 {
 			reg = <0x1800b000 0x100>;
 			#address-cells = <1>;
 			#size-cells = <0>;
-			interrupts = <GIC_SPI 86 IRQ_TYPE_NONE>;
+			interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
 			clock-frequency = <100000>;
 			status = "disabled";
 		};

From 879384b1fc452c7de5f3c027c7530f0379a8a3a2 Mon Sep 17 00:00:00 2001
From: William Wu <william.wu@rock-chips.com>
Date: Fri, 11 May 2018 17:46:32 +0800
Subject: [PATCH 362/519] usb: dwc2: fix isoc split in transfer with no data

[ Upstream commit 70c3c8cb83856758025c2a211dd022bc0478922a ]

If isoc split in transfer with no data (the length of DATA0
packet is zero), we can't simply return immediately. Because
the DATA0 can be the first transaction or the second transaction
for the isoc split in transaction. If the DATA0 packet with no
data is in the first transaction, we can return immediately.
But if the DATA0 packet with no data is in the second transaction
of isoc split in transaction sequence, we need to increase the
qtd->isoc_frame_index and giveback urb to device driver if needed,
otherwise, the MDATA packet will be lost.

A typical test case is that connect the dwc2 controller with an
usb hs Hub (GL852G-12), and plug an usb fs audio device (Plantronics
headset) into the downstream port of Hub. Then use the usb mic
to record, we can find noise when playback.

In the case, the isoc split in transaction sequence like this:

- SSPLIT IN transaction
- CSPLIT IN transaction
  - MDATA packet (176 bytes)
- CSPLIT IN transaction
  - DATA0 packet (0 byte)

This patch use both the length of DATA0 and qtd->isoc_split_offset
to check if the DATA0 is in the second transaction.

Tested-by: Gevorg Sahakyan <sahakyan@synopsys.com>
Tested-by: Heiko Stuebner <heiko@sntech.de>
Acked-by: Minas Harutyunyan hminas@synopsys.com>
Signed-off-by: William Wu <william.wu@rock-chips.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc2/hcd_intr.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/usb/dwc2/hcd_intr.c b/drivers/usb/dwc2/hcd_intr.c
index bda0b21b850f..51866f3f2052 100644
--- a/drivers/usb/dwc2/hcd_intr.c
+++ b/drivers/usb/dwc2/hcd_intr.c
@@ -931,9 +931,8 @@ static int dwc2_xfercomp_isoc_split_in(struct dwc2_hsotg *hsotg,
 	frame_desc = &qtd->urb->iso_descs[qtd->isoc_frame_index];
 	len = dwc2_get_actual_xfer_length(hsotg, chan, chnum, qtd,
 					  DWC2_HC_XFER_COMPLETE, NULL);
-	if (!len) {
+	if (!len && !qtd->isoc_split_offset) {
 		qtd->complete_split = 0;
-		qtd->isoc_split_offset = 0;
 		return 0;
 	}
 

From 3c29ae7ce74b4a3fa9719c4ccc244b59c0906a65 Mon Sep 17 00:00:00 2001
From: Chunfeng Yun <chunfeng.yun@mediatek.com>
Date: Fri, 25 May 2018 17:24:57 +0800
Subject: [PATCH 363/519] usb: gadget: composite: fix delayed_status race
 condition when set_interface

[ Upstream commit 980900d6318066b9f8314bfb87329a20fd0d1ca4 ]

It happens when enable debug log, if set_alt() returns
USB_GADGET_DELAYED_STATUS and usb_composite_setup_continue()
is called before increasing count of @delayed_status,
so fix it by using spinlock of @cdev->lock.

Signed-off-by: Chunfeng Yun <chunfeng.yun@mediatek.com>
Tested-by: Jay Hsu <shih-chieh.hsu@mediatek.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/composite.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index eb445c2ab15e..58f5fbdb6959 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -1619,6 +1619,8 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl)
 		 */
 		if (w_value && !f->get_alt)
 			break;
+
+		spin_lock(&cdev->lock);
 		value = f->set_alt(f, w_index, w_value);
 		if (value == USB_GADGET_DELAYED_STATUS) {
 			DBG(cdev,
@@ -1628,6 +1630,7 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl)
 			DBG(cdev, "delayed_status count %d\n",
 					cdev->delayed_status);
 		}
+		spin_unlock(&cdev->lock);
 		break;
 	case USB_REQ_GET_INTERFACE:
 		if (ctrl->bRequestType != (USB_DIR_IN|USB_RECIP_INTERFACE))

From d51ce9e03e6341a036b44e38d91adaf48c82ec9b Mon Sep 17 00:00:00 2001
From: Grigor Tovmasyan <Grigor.Tovmasyan@synopsys.com>
Date: Thu, 24 May 2018 18:22:30 +0400
Subject: [PATCH 364/519] usb: gadget: dwc2: fix memory leak in gadget_init()

[ Upstream commit 9bb073a053f0464ea74a4d4c331fdb7da58568d6 ]

Freed allocated request for ep0 to prevent memory leak in case when
dwc2_driver_probe() failed.

Cc: Stefan Wahren <stefan.wahren@i2se.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Stefan Wahren <stefan.wahren@i2se.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Acked-by: Minas Harutyunyan <hminas@synopsys.com>
Signed-off-by: Grigor Tovmasyan <tovmasya@synopsys.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc2/gadget.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
index 98705b83d2dc..842c1ae7a291 100644
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -3657,9 +3657,11 @@ int dwc2_gadget_init(struct dwc2_hsotg *hsotg, int irq)
 	}
 
 	ret = usb_add_gadget_udc(dev, &hsotg->gadget);
-	if (ret)
+	if (ret) {
+		dwc2_hsotg_ep_free_request(&hsotg->eps_out[0]->ep,
+					   hsotg->ctrl_req);
 		return ret;
-
+	}
 	dwc2_hsotg_dump(hsotg);
 
 	return 0;
@@ -3672,6 +3674,7 @@ int dwc2_gadget_init(struct dwc2_hsotg *hsotg, int irq)
 int dwc2_hsotg_remove(struct dwc2_hsotg *hsotg)
 {
 	usb_del_gadget_udc(&hsotg->gadget);
+	dwc2_hsotg_ep_free_request(&hsotg->eps_out[0]->ep, hsotg->ctrl_req);
 
 	return 0;
 }

From 7d082f109ec260ad0f13b9e77bd41b38d0c34326 Mon Sep 17 00:00:00 2001
From: Zhouyang Jia <jiazhouyang09@gmail.com>
Date: Sat, 16 Jun 2018 01:05:01 +0800
Subject: [PATCH 365/519] scsi: xen-scsifront: add error handling for
 xenbus_printf

[ Upstream commit 93efbd39870474cc536b9caf4a6efeb03b0bc56f ]

When xenbus_printf fails, the lack of error-handling code may
cause unexpected results.

This patch adds error-handling code after calling xenbus_printf.

Signed-off-by: Zhouyang Jia <jiazhouyang09@gmail.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/xen-scsifront.c | 33 ++++++++++++++++++++++++++-------
 1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c
index 9dc8687bf048..e1b32ed0aa20 100644
--- a/drivers/scsi/xen-scsifront.c
+++ b/drivers/scsi/xen-scsifront.c
@@ -676,10 +676,17 @@ static int scsifront_dev_reset_handler(struct scsi_cmnd *sc)
 static int scsifront_sdev_configure(struct scsi_device *sdev)
 {
 	struct vscsifrnt_info *info = shost_priv(sdev->host);
+	int err;
 
-	if (info && current == info->curr)
-		xenbus_printf(XBT_NIL, info->dev->nodename,
+	if (info && current == info->curr) {
+		err = xenbus_printf(XBT_NIL, info->dev->nodename,
 			      info->dev_state_path, "%d", XenbusStateConnected);
+		if (err) {
+			xenbus_dev_error(info->dev, err,
+				"%s: writing dev_state_path", __func__);
+			return err;
+		}
+	}
 
 	return 0;
 }
@@ -687,10 +694,15 @@ static int scsifront_sdev_configure(struct scsi_device *sdev)
 static void scsifront_sdev_destroy(struct scsi_device *sdev)
 {
 	struct vscsifrnt_info *info = shost_priv(sdev->host);
+	int err;
 
-	if (info && current == info->curr)
-		xenbus_printf(XBT_NIL, info->dev->nodename,
+	if (info && current == info->curr) {
+		err = xenbus_printf(XBT_NIL, info->dev->nodename,
 			      info->dev_state_path, "%d", XenbusStateClosed);
+		if (err)
+			xenbus_dev_error(info->dev, err,
+				"%s: writing dev_state_path", __func__);
+	}
 }
 
 static struct scsi_host_template scsifront_sht = {
@@ -1025,9 +1037,12 @@ static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op)
 
 			if (scsi_add_device(info->host, chn, tgt, lun)) {
 				dev_err(&dev->dev, "scsi_add_device\n");
-				xenbus_printf(XBT_NIL, dev->nodename,
+				err = xenbus_printf(XBT_NIL, dev->nodename,
 					      info->dev_state_path,
 					      "%d", XenbusStateClosed);
+				if (err)
+					xenbus_dev_error(dev, err,
+						"%s: writing dev_state_path", __func__);
 			}
 			break;
 		case VSCSIFRONT_OP_DEL_LUN:
@@ -1041,10 +1056,14 @@ static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op)
 			}
 			break;
 		case VSCSIFRONT_OP_READD_LUN:
-			if (device_state == XenbusStateConnected)
-				xenbus_printf(XBT_NIL, dev->nodename,
+			if (device_state == XenbusStateConnected) {
+				err = xenbus_printf(XBT_NIL, dev->nodename,
 					      info->dev_state_path,
 					      "%d", XenbusStateConnected);
+				if (err)
+					xenbus_dev_error(dev, err,
+						"%s: writing dev_state_path", __func__);
+			}
 			break;
 		default:
 			break;

From d5a362036c85990648c726716f4a569a504b89fb Mon Sep 17 00:00:00 2001
From: Zhizhou Zhang <zhizhouzhang@asrmicro.com>
Date: Tue, 12 Jun 2018 17:07:37 +0800
Subject: [PATCH 366/519] arm64: make secondary_start_kernel() notrace

[ Upstream commit b154886f7892499d0d3054026e19dfb9a731df61 ]

We can't call function trace hook before setup percpu offset.
When entering secondary_start_kernel(), percpu offset has not
been initialized.  So this lead hotplug malfunction.
Here is the flow to reproduce this bug:

echo 0 > /sys/devices/system/cpu/cpu1/online
echo function > /sys/kernel/debug/tracing/current_tracer
echo 1 > /sys/kernel/debug/tracing/tracing_on
echo 1 > /sys/devices/system/cpu/cpu1/online

Acked-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Zhizhou Zhang <zhizhouzhang@asrmicro.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/kernel/smp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index f3c3d8fee5ba..03c0946b79d2 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -131,7 +131,7 @@ static void smp_store_cpu_info(unsigned int cpuid)
  * This is the secondary CPU boot entry.  We're using this CPUs
  * idle thread stack, but a set of temporary page tables.
  */
-asmlinkage void secondary_start_kernel(void)
+asmlinkage notrace void secondary_start_kernel(void)
 {
 	struct mm_struct *mm = &init_mm;
 	unsigned int cpu = smp_processor_id();

From f0e199f330ea7d059713be165edcf86a3f725b3e Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@cavium.com>
Date: Mon, 18 Jun 2018 21:58:01 -0700
Subject: [PATCH 367/519] qed: Add sanity check for SIMD fastpath handler.

[ Upstream commit 3935a70968820c3994db4de7e6e1c7e814bff875 ]

Avoid calling a SIMD fastpath handler if it is NULL. The check is needed
to handle an unlikely scenario where unsolicited interrupt is destined to
a PF in INTa mode.

Fixes: fe56b9e6a ("qed: Add module with basic common support")
Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Ariel Elior <ariel.elior@cavium.com>
Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/qlogic/qed/qed_main.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 688b6da5a9bb..35e1468d8196 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -461,8 +461,16 @@ static irqreturn_t qed_single_int(int irq, void *dev_instance)
 		/* Fastpath interrupts */
 		for (j = 0; j < 64; j++) {
 			if ((0x2ULL << j) & status) {
-				hwfn->simd_proto_handler[j].func(
-					hwfn->simd_proto_handler[j].token);
+				struct qed_simd_fp_handler *p_handler =
+					&hwfn->simd_proto_handler[j];
+
+				if (p_handler->func)
+					p_handler->func(p_handler->token);
+				else
+					DP_NOTICE(hwfn,
+						  "Not calling fastpath handler as it is NULL [handler #%d, status 0x%llx]\n",
+						  j, status);
+
 				status &= ~(0x2ULL << j);
 				rc = IRQ_HANDLED;
 			}

From d89657e052abc1ef49acd1bab3d66bfe3efd25e1 Mon Sep 17 00:00:00 2001
From: Govindarajulu Varadarajan <gvaradar@cisco.com>
Date: Tue, 19 Jun 2018 08:15:24 -0700
Subject: [PATCH 368/519] enic: initialize enic->rfs_h.lock in enic_probe

[ Upstream commit 3256d29fc7aecdf99feb1cb9475ed2252769a8a7 ]

lockdep spotted that we are using rfs_h.lock in enic_get_rxnfc() without
initializing. rfs_h.lock is initialized in enic_open(). But ethtool_ops
can be called when interface is down.

Move enic_rfs_flw_tbl_init to enic_probe.

INFO: trying to register non-static key.
the code is fine but needs lockdep annotation.
turning off the locking correctness validator.
CPU: 18 PID: 1189 Comm: ethtool Not tainted 4.17.0-rc7-devel+ #27
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-20171110_100015-anatol 04/01/2014
Call Trace:
dump_stack+0x85/0xc0
register_lock_class+0x550/0x560
? __handle_mm_fault+0xa8b/0x1100
__lock_acquire+0x81/0x670
lock_acquire+0xb9/0x1e0
?  enic_get_rxnfc+0x139/0x2b0 [enic]
_raw_spin_lock_bh+0x38/0x80
? enic_get_rxnfc+0x139/0x2b0 [enic]
enic_get_rxnfc+0x139/0x2b0 [enic]
ethtool_get_rxnfc+0x8d/0x1c0
dev_ethtool+0x16c8/0x2400
? __mutex_lock+0x64d/0xa00
? dev_load+0x6a/0x150
dev_ioctl+0x253/0x4b0
sock_do_ioctl+0x9a/0x130
sock_ioctl+0x1af/0x350
do_vfs_ioctl+0x8e/0x670
? syscall_trace_enter+0x1e2/0x380
ksys_ioctl+0x60/0x90
__x64_sys_ioctl+0x16/0x20
do_syscall_64+0x5a/0x170
entry_SYSCALL_64_after_hwframe+0x49/0xbe

Signed-off-by: Govindarajulu Varadarajan <gvaradar@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/cisco/enic/enic_clsf.c | 3 +--
 drivers/net/ethernet/cisco/enic/enic_main.c | 3 ++-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/cisco/enic/enic_clsf.c b/drivers/net/ethernet/cisco/enic/enic_clsf.c
index 3c677ed3c29e..4d9014d5b36d 100644
--- a/drivers/net/ethernet/cisco/enic/enic_clsf.c
+++ b/drivers/net/ethernet/cisco/enic/enic_clsf.c
@@ -78,7 +78,6 @@ void enic_rfs_flw_tbl_init(struct enic *enic)
 	enic->rfs_h.max = enic->config.num_arfs;
 	enic->rfs_h.free = enic->rfs_h.max;
 	enic->rfs_h.toclean = 0;
-	enic_rfs_timer_start(enic);
 }
 
 void enic_rfs_flw_tbl_free(struct enic *enic)
@@ -87,7 +86,6 @@ void enic_rfs_flw_tbl_free(struct enic *enic)
 
 	enic_rfs_timer_stop(enic);
 	spin_lock_bh(&enic->rfs_h.lock);
-	enic->rfs_h.free = 0;
 	for (i = 0; i < (1 << ENIC_RFS_FLW_BITSHIFT); i++) {
 		struct hlist_head *hhead;
 		struct hlist_node *tmp;
@@ -98,6 +96,7 @@ void enic_rfs_flw_tbl_free(struct enic *enic)
 			enic_delfltr(enic, n->fltr_id);
 			hlist_del(&n->node);
 			kfree(n);
+			enic->rfs_h.free++;
 		}
 	}
 	spin_unlock_bh(&enic->rfs_h.lock);
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index 029fa5bee520..8390597aecb8 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -1760,7 +1760,7 @@ static int enic_open(struct net_device *netdev)
 		vnic_intr_unmask(&enic->intr[i]);
 
 	enic_notify_timer_start(enic);
-	enic_rfs_flw_tbl_init(enic);
+	enic_rfs_timer_start(enic);
 
 	return 0;
 
@@ -2694,6 +2694,7 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	enic->notify_timer.function = enic_notify_timer;
 	enic->notify_timer.data = (unsigned long)enic;
 
+	enic_rfs_flw_tbl_init(enic);
 	enic_set_rx_coal_setting(enic);
 	INIT_WORK(&enic->reset, enic_reset);
 	INIT_WORK(&enic->tx_hang_reset, enic_tx_hang_reset);

From e5bfd8e707375eaa67c775e69ded5b8f104a80db Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Sun, 17 Jun 2018 23:40:53 +0200
Subject: [PATCH 369/519] net: hamradio: use eth_broadcast_addr

[ Upstream commit 4e8439aa34802deab11cee68b0ecb18f887fb153 ]

The array bpq_eth_addr is only used to get the size of an
address, whereas the bcast_addr is used to set the broadcast
address. This leads to a warning when using clang:
drivers/net/hamradio/bpqether.c:94:13: warning: variable 'bpq_eth_addr' is not
      needed and will not be emitted [-Wunneeded-internal-declaration]
static char bpq_eth_addr[6];
            ^

Remove both variables and use the common eth_broadcast_addr
to set the broadcast address.

Signed-off-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/hamradio/bpqether.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c
index d95a50ae996d..8748e8c9ce96 100644
--- a/drivers/net/hamradio/bpqether.c
+++ b/drivers/net/hamradio/bpqether.c
@@ -89,10 +89,6 @@
 static const char banner[] __initconst = KERN_INFO \
 	"AX.25: bpqether driver version 004\n";
 
-static char bcast_addr[6]={0xFF,0xFF,0xFF,0xFF,0xFF,0xFF};
-
-static char bpq_eth_addr[6];
-
 static int bpq_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *);
 static int bpq_device_event(struct notifier_block *, unsigned long, void *);
 
@@ -515,8 +511,8 @@ static int bpq_new_device(struct net_device *edev)
 	bpq->ethdev = edev;
 	bpq->axdev = ndev;
 
-	memcpy(bpq->dest_addr, bcast_addr, sizeof(bpq_eth_addr));
-	memcpy(bpq->acpt_addr, bcast_addr, sizeof(bpq_eth_addr));
+	eth_broadcast_addr(bpq->dest_addr);
+	eth_broadcast_addr(bpq->acpt_addr);
 
 	err = register_netdevice(ndev);
 	if (err)

From c5df06baac86afb3dfd69d7b865cf9e5d13c81f4 Mon Sep 17 00:00:00 2001
From: Li RongQing <lirongqing@baidu.com>
Date: Tue, 19 Jun 2018 17:23:17 +0800
Subject: [PATCH 370/519] net: propagate dev_get_valid_name return code

[ Upstream commit 7892bd081045222b9e4027fec279a28d6fe7aa66 ]

if dev_get_valid_name failed, propagate its return code

and remove the setting err to ENODEV, it will be set to
0 again before dev_change_net_namespace exits.

Signed-off-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/core/dev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 3bcbf931a910..191cf880d805 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -7420,7 +7420,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 		/* We get here if we can't use the current device name */
 		if (!pat)
 			goto out;
-		if (dev_get_valid_name(net, dev, pat) < 0)
+		err = dev_get_valid_name(net, dev, pat);
+		if (err < 0)
 			goto out;
 	}
 
@@ -7432,7 +7433,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	dev_close(dev);
 
 	/* And unlink it from device chain */
-	err = -ENODEV;
 	unlist_netdevice(dev);
 
 	synchronize_net();

From d68db3d9974fdc31664a46bbd12a98915eeec1f6 Mon Sep 17 00:00:00 2001
From: Alexey Brodkin <Alexey.Brodkin@synopsys.com>
Date: Wed, 29 Nov 2017 11:21:45 +0300
Subject: [PATCH 371/519] ARC: Enable machine_desc->init_per_cpu for
 !CONFIG_SMP

[ Upstream commit 2f24ef7413a4d91657ef04e77c27ce0b313e6c95 ]

machine_desc->init_per_cpu() hook is supposed to be per cpu
initialization and would seem to apply  equally to UP and/or SMP.
Infact the comment in header file seems to suggest it works for
UP too, which was not the case and this patch.

This enables !CONFIG_SMP build for platforms such as hsdk.

Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
[vgupta: trimmeed changelog]
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arc/include/asm/mach_desc.h | 2 --
 arch/arc/kernel/irq.c            | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/arc/include/asm/mach_desc.h b/arch/arc/include/asm/mach_desc.h
index c28e6c347b49..871f3cb16af9 100644
--- a/arch/arc/include/asm/mach_desc.h
+++ b/arch/arc/include/asm/mach_desc.h
@@ -34,9 +34,7 @@ struct machine_desc {
 	const char		*name;
 	const char		**dt_compat;
 	void			(*init_early)(void);
-#ifdef CONFIG_SMP
 	void			(*init_per_cpu)(unsigned int);
-#endif
 	void			(*init_machine)(void);
 	void			(*init_late)(void);
 
diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c
index ba17f85285cf..dd42c6feaba5 100644
--- a/arch/arc/kernel/irq.c
+++ b/arch/arc/kernel/irq.c
@@ -31,10 +31,10 @@ void __init init_IRQ(void)
 	/* a SMP H/w block could do IPI IRQ request here */
 	if (plat_smp_ops.init_per_cpu)
 		plat_smp_ops.init_per_cpu(smp_processor_id());
+#endif
 
 	if (machine_desc->init_per_cpu)
 		machine_desc->init_per_cpu(smp_processor_id());
-#endif
 }
 
 /*

From 39565a7d63fb3ccf98d5745499760c1000012599 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Wed, 20 Jun 2018 10:03:56 +0200
Subject: [PATCH 372/519] net: davinci_emac: match the mdio device against its
 compatible if possible

[ Upstream commit ea0820bb771175c7d4192fc6f5b5c56b3c6d5239 ]

Device tree based systems without of_dev_auxdata will have the mdio
device named differently than "davinci_mdio(.0)". In this case use the
device's parent's compatible string for matching

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/ti/davinci_emac.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index 33bd3b902304..6be315303d61 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -1517,6 +1517,10 @@ static int emac_devioctl(struct net_device *ndev, struct ifreq *ifrq, int cmd)
 
 static int match_first_device(struct device *dev, void *data)
 {
+	if (dev->parent && dev->parent->of_node)
+		return of_device_is_compatible(dev->parent->of_node,
+					       "ti,davinci_mdio");
+
 	return !strncmp(dev_name(dev), "davinci_mdio", 12);
 }
 

From c40dc96f7f7e29d7c1e520c46c87178c2d4b1dcc Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 4 Apr 2018 14:06:30 -0400
Subject: [PATCH 373/519] locking/lockdep: Do not record IRQ state within
 lockdep code

[ Upstream commit fcc784be837714a9173b372ff9fb9b514590dad9 ]

While debugging where things were going wrong with mapping
enabling/disabling interrupts with the lockdep state and actual real
enabling and disabling interrupts, I had to silent the IRQ
disabling/enabling in debug_check_no_locks_freed() because it was
always showing up as it was called before the splat was.

Use raw_local_irq_save/restore() for not only debug_check_no_locks_freed()
but for all internal lockdep functions, as they hide useful information
about where interrupts were used incorrectly last.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Link: https://lkml.kernel.org/lkml/20180404140630.3f4f4c7a@gandalf.local.home
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/locking/lockdep.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 0e2c4911ba61..6e171b547a80 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -1264,11 +1264,11 @@ unsigned long lockdep_count_forward_deps(struct lock_class *class)
 	this.parent = NULL;
 	this.class = class;
 
-	local_irq_save(flags);
+	raw_local_irq_save(flags);
 	arch_spin_lock(&lockdep_lock);
 	ret = __lockdep_count_forward_deps(&this);
 	arch_spin_unlock(&lockdep_lock);
-	local_irq_restore(flags);
+	raw_local_irq_restore(flags);
 
 	return ret;
 }
@@ -1291,11 +1291,11 @@ unsigned long lockdep_count_backward_deps(struct lock_class *class)
 	this.parent = NULL;
 	this.class = class;
 
-	local_irq_save(flags);
+	raw_local_irq_save(flags);
 	arch_spin_lock(&lockdep_lock);
 	ret = __lockdep_count_backward_deps(&this);
 	arch_spin_unlock(&lockdep_lock);
-	local_irq_restore(flags);
+	raw_local_irq_restore(flags);
 
 	return ret;
 }
@@ -4123,7 +4123,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len)
 	if (unlikely(!debug_locks))
 		return;
 
-	local_irq_save(flags);
+	raw_local_irq_save(flags);
 	for (i = 0; i < curr->lockdep_depth; i++) {
 		hlock = curr->held_locks + i;
 
@@ -4134,7 +4134,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len)
 		print_freed_lock_bug(curr, mem_from, mem_from + mem_len, hlock);
 		break;
 	}
-	local_irq_restore(flags);
+	raw_local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(debug_check_no_locks_freed);
 

From 55989af9dfa0ce11ef9694899e649fdf38b9aab2 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Thu, 21 Jun 2018 19:49:36 +0800
Subject: [PATCH 374/519] ipv6: mcast: fix unsolicited report interval after
 receiving querys

[ Upstream commit 6c6da92808442908287fae8ebb0ca041a52469f4 ]

After recieving MLD querys, we update idev->mc_maxdelay with max_delay
from query header. This make the later unsolicited reports have the same
interval with mc_maxdelay, which means we may send unsolicited reports with
long interval time instead of default configured interval time.

Also as we will not call ipv6_mc_reset() after device up. This issue will
be there even after leave the group and join other groups.

Fixes: fc4eba58b4c14 ("ipv6: make unsolicited report intervals configurable for mld")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/mcast.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 06640685ff43..091cee551cd9 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2061,7 +2061,8 @@ void ipv6_mc_dad_complete(struct inet6_dev *idev)
 		mld_send_initial_cr(idev);
 		idev->mc_dad_count--;
 		if (idev->mc_dad_count)
-			mld_dad_start_timer(idev, idev->mc_maxdelay);
+			mld_dad_start_timer(idev,
+					    unsolicited_report_interval(idev));
 	}
 }
 
@@ -2073,7 +2074,8 @@ static void mld_dad_timer_expire(unsigned long data)
 	if (idev->mc_dad_count) {
 		idev->mc_dad_count--;
 		if (idev->mc_dad_count)
-			mld_dad_start_timer(idev, idev->mc_maxdelay);
+			mld_dad_start_timer(idev,
+					    unsolicited_report_interval(idev));
 	}
 	in6_dev_put(idev);
 }
@@ -2431,7 +2433,8 @@ static void mld_ifc_timer_expire(unsigned long data)
 	if (idev->mc_ifc_count) {
 		idev->mc_ifc_count--;
 		if (idev->mc_ifc_count)
-			mld_ifc_start_timer(idev, idev->mc_maxdelay);
+			mld_ifc_start_timer(idev,
+					    unsolicited_report_interval(idev));
 	}
 	in6_dev_put(idev);
 }

From 091cb7dec89b1bbd4989a30f5ae9ab93248d2a93 Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Fri, 22 Jun 2018 10:54:45 -0700
Subject: [PATCH 375/519] Smack: Mark inode instant in smack_task_to_inode

[ Upstream commit 7b4e88434c4e7982fb053c49657e1c8bbb8692d9 ]

Smack: Mark inode instant in smack_task_to_inode

/proc clean-up in commit 1bbc55131e59bd099fdc568d3aa0b42634dbd188
resulted in smack_task_to_inode() being called before smack_d_instantiate.
This resulted in the smk_inode value being ignored, even while present
for files in /proc/self. Marking the inode as instant here fixes that.

Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: James Morris <james.morris@microsoft.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 security/smack/smack_lsm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 735a1a9386d6..c73361859d11 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -2260,6 +2260,7 @@ static void smack_task_to_inode(struct task_struct *p, struct inode *inode)
 	struct smack_known *skp = smk_of_task_struct(p);
 
 	isp->smk_inode = skp;
+	isp->smk_flags |= SMK_INODE_INSTANT;
 }
 
 /*

From 86c43df5bfc34c5dd910b9257f1d5cc03d367e15 Mon Sep 17 00:00:00 2001
From: Ganesh Goudar <ganeshgr@chelsio.com>
Date: Sat, 23 Jun 2018 20:28:26 +0530
Subject: [PATCH 376/519] cxgb4: when disabling dcb set txq dcb priority to 0

[ Upstream commit 5ce36338a30f9814fc4824f9fe6c20cd83d872c7 ]

When we are disabling DCB, store "0" in txq->dcb_prio
since that's used for future TX Work Request "OVLAN_IDX"
values. Setting non zero priority upon disabling DCB
would halt the traffic.

Reported-by: AMG Zollner Robert <robert@cloudmedia.eu>
CC: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: Casey Leedom <leedom@chelsio.com>
Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 090e00650601..a3e1498ca67c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -338,7 +338,7 @@ static void dcb_tx_queue_prio_enable(struct net_device *dev, int enable)
 				"Can't %s DCB Priority on port %d, TX Queue %d: err=%d\n",
 				enable ? "set" : "unset", pi->port_id, i, -err);
 		else
-			txq->dcb_prio = value;
+			txq->dcb_prio = enable ? value : 0;
 	}
 }
 #endif /* CONFIG_CHELSIO_T4_DCB */

From 852f7cdbc8f78a8e740d2164aad7e6ac6e355dc3 Mon Sep 17 00:00:00 2001
From: Michael Trimarchi <michael@amarulasolutions.com>
Date: Wed, 30 May 2018 11:06:34 +0200
Subject: [PATCH 377/519] brcmfmac: stop watchdog before detach and free
 everything

[ Upstream commit 373c83a801f15b1e3d02d855fad89112bd4ccbe0 ]

Using built-in in kernel image without a firmware in filesystem
or in the kernel image can lead to a kernel NULL pointer deference.
Watchdog need to be stopped in brcmf_sdio_remove

The system is going down NOW!
[ 1348.110759] Unable to handle kernel NULL pointer dereference at virtual address 000002f8
Sent SIGTERM to all processes
[ 1348.121412] Mem abort info:
[ 1348.126962]   ESR = 0x96000004
[ 1348.130023]   Exception class = DABT (current EL), IL = 32 bits
[ 1348.135948]   SET = 0, FnV = 0
[ 1348.138997]   EA = 0, S1PTW = 0
[ 1348.142154] Data abort info:
[ 1348.145045]   ISV = 0, ISS = 0x00000004
[ 1348.148884]   CM = 0, WnR = 0
[ 1348.151861] user pgtable: 4k pages, 48-bit VAs, pgdp = (____ptrval____)
[ 1348.158475] [00000000000002f8] pgd=0000000000000000
[ 1348.163364] Internal error: Oops: 96000004 [#1] PREEMPT SMP
[ 1348.168927] Modules linked in: ipv6
[ 1348.172421] CPU: 3 PID: 1421 Comm: brcmf_wdog/mmc0 Not tainted 4.17.0-rc5-next-20180517 #18
[ 1348.180757] Hardware name: Amarula A64-Relic (DT)
[ 1348.185455] pstate: 60000005 (nZCv daif -PAN -UAO)
[ 1348.190251] pc : brcmf_sdiod_freezer_count+0x0/0x20
[ 1348.195124] lr : brcmf_sdio_watchdog_thread+0x64/0x290
[ 1348.200253] sp : ffff00000b85be30
[ 1348.203561] x29: ffff00000b85be30 x28: 0000000000000000
[ 1348.208868] x27: ffff00000b6cb918 x26: ffff80003b990638
[ 1348.214176] x25: ffff0000087b1a20 x24: ffff80003b94f800
[ 1348.219483] x23: ffff000008e620c8 x22: ffff000008f0b660
[ 1348.224790] x21: ffff000008c6a858 x20: 00000000fffffe00
[ 1348.230097] x19: ffff80003b94f800 x18: 0000000000000001
[ 1348.235404] x17: 0000ffffab2e8a74 x16: ffff0000080d7de8
[ 1348.240711] x15: 0000000000000000 x14: 0000000000000400
[ 1348.246018] x13: 0000000000000400 x12: 0000000000000001
[ 1348.251324] x11: 00000000000002c4 x10: 0000000000000a10
[ 1348.256631] x9 : ffff00000b85bc40 x8 : ffff80003be11870
[ 1348.261937] x7 : ffff80003dfc7308 x6 : 000000078ff08b55
[ 1348.267243] x5 : 00000139e1058400 x4 : 0000000000000000
[ 1348.272550] x3 : dead000000000100 x2 : 958f2788d6618100
[ 1348.277856] x1 : 00000000fffffe00 x0 : 0000000000000000

Signed-off-by: Michael Trimarchi <michael@amarulasolutions.com>
Acked-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Tested-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/brcm80211/brcmfmac/sdio.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/wireless/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/brcm80211/brcmfmac/sdio.c
index 7e74ac3ad815..35f62b00f1df 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/sdio.c
@@ -4291,6 +4291,13 @@ void brcmf_sdio_remove(struct brcmf_sdio *bus)
 	brcmf_dbg(TRACE, "Enter\n");
 
 	if (bus) {
+		/* Stop watchdog task */
+		if (bus->watchdog_tsk) {
+			send_sig(SIGTERM, bus->watchdog_tsk, 1);
+			kthread_stop(bus->watchdog_tsk);
+			bus->watchdog_tsk = NULL;
+		}
+
 		/* De-register interrupt handler */
 		brcmf_sdiod_intr_unregister(bus->sdiodev);
 

From 27edb6791ce2ccf1345940b9dec72d4617dcf07c Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@zonque.org>
Date: Sun, 17 Jun 2018 13:53:09 +0200
Subject: [PATCH 378/519] ARM: dts: am437x: make edt-ft5x06 a wakeup source

[ Upstream commit 49a6ec5b807ea4ad7ebe1f58080ebb8497cb2d2c ]

The touchscreen driver no longer configures the device as wakeup source by
default. A "wakeup-source" property is needed.

Signed-off-by: Daniel Mack <daniel@zonque.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/am437x-sk-evm.dts | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/boot/dts/am437x-sk-evm.dts b/arch/arm/boot/dts/am437x-sk-evm.dts
index 63de2a1b4315..648236c5281b 100644
--- a/arch/arm/boot/dts/am437x-sk-evm.dts
+++ b/arch/arm/boot/dts/am437x-sk-evm.dts
@@ -508,6 +508,8 @@ edt-ft5306@38 {
 
 		touchscreen-size-x = <480>;
 		touchscreen-size-y = <272>;
+
+		wakeup-source;
 	};
 
 	tlv320aic3106: tlv320aic3106@1b {

From 1748d3f1e3fcb2d125d111cace612e1e39b665cf Mon Sep 17 00:00:00 2001
From: Ajay Gupta <ajaykuee@gmail.com>
Date: Thu, 21 Jun 2018 16:19:45 +0300
Subject: [PATCH 379/519] usb: xhci: increase CRS timeout value

[ Upstream commit 305886ca87be480ae159908c2affd135c04215cf ]

Some controllers take almost 55ms to complete controller
restore state (CRS).
There is no timeout limit mentioned in xhci specification so
fixing the issue by increasing the timeout limit to 100ms

[reformat code comment -Mathias]
Signed-off-by: Ajay Gupta <ajaykuee@gmail.com>
Signed-off-by: Nagaraj Annaiah <naga.annaiah@gmail.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 2d837b6bd495..128a3c0a9286 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1052,8 +1052,13 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
 		command = readl(&xhci->op_regs->command);
 		command |= CMD_CRS;
 		writel(command, &xhci->op_regs->command);
+		/*
+		 * Some controllers take up to 55+ ms to complete the controller
+		 * restore so setting the timeout to 100ms. Xhci specification
+		 * doesn't mention any timeout value.
+		 */
 		if (xhci_handshake(&xhci->op_regs->status,
-			      STS_RESTORE, 0, 10 * 1000)) {
+			      STS_RESTORE, 0, 100 * 1000)) {
 			xhci_warn(xhci, "WARN: xHC restore state timeout\n");
 			spin_unlock_irq(&xhci->lock);
 			return -ETIMEDOUT;

From 6ba7ddb12aed6bf43e9749842785dc8cd4f4e39b Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.ibm.com>
Date: Mon, 11 Jun 2018 09:31:53 +0200
Subject: [PATCH 380/519] perf test session topology: Fix test on s390

[ Upstream commit b930e62ecd362843002bdf84c2940439822af321 ]

On s390 this test case fails because the socket identifiction numbers
assigned to the CPU are higher than the CPU identification numbers.

F/ix this by adding the platform architecture into the perf data header
flag information. This helps identifiing the test platform and handles
s390 specifics in process_cpu_topology().

Before:

  [root@p23lp27 perf]# perf test -vvvvv -F 39
  39: Session topology                                      :
  --- start ---
  templ file: /tmp/perf-test-iUv755
  socket_id number is too big.You may need to upgrade the perf tool.
  ---- end ----
  Session topology: Skip
  [root@p23lp27 perf]#

After:

  [root@p23lp27 perf]# perf test -vvvvv -F 39
  39: Session topology                                      :
  --- start ---
  templ file: /tmp/perf-test-8X8VTs
  CPU 0, core 0, socket 6
  CPU 1, core 1, socket 3
  ---- end ----
  Session topology: Ok
  [root@p23lp27 perf]#

Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
Reviewed-by: Hendrik Brueckner <brueckner@linux.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Fixes: c84974ed9fb6 ("perf test: Add entry to test cpu topology")
Link: http://lkml.kernel.org/r/20180611073153.15592-2-tmricht@linux.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/tests/topology.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index f5bb096c3bd9..bf67343c7795 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -42,6 +42,7 @@ static int session_write_header(char *path)
 
 	perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY);
 	perf_header__set_feat(&session->header, HEADER_NRCPUS);
+	perf_header__set_feat(&session->header, HEADER_ARCH);
 
 	session->header.data_size += DATA_SIZE;
 

From 7cef243ef30b20c9ba802af138e0fa1c3e5d0ebc Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan@linux.ibm.com>
Date: Mon, 11 Jun 2018 16:10:49 +0530
Subject: [PATCH 381/519] perf report powerpc: Fix crash if callchain is empty

[ Upstream commit 143c99f6ac6812d23254e80844d6e34be897d3e1 ]

For some cases, the callchain provided by the kernel may be empty. So,
the callchain ip filtering code will cause a crash if we do not check
whether the struct ip_callchain pointer is NULL before accessing any
members.

This can be observed on a powerpc64le system running Fedora 27 as shown
below.

  # perf record -b -e cycles:u ls

Before:

  # perf report --branch-history

  perf: Segmentation fault
  -------- backtrace --------
  perf[0x1027615c]
  linux-vdso64.so.1(__kernel_sigtramp_rt64+0x0)[0x7fff856304d8]
  perf(arch_skip_callchain_idx+0x44)[0x10257c58]
  perf[0x1017f2e4]
  perf(thread__resolve_callchain+0x124)[0x1017ff5c]
  perf(sample__resolve_callchain+0xf0)[0x10172788]
  ...

After:

  # perf report --branch-history

  Samples: 25  of event 'cycles:u', Event count (approx.): 2306870
    Overhead  Source:Line            Symbol                   Shared Object
  +   11.60%  _init+35736            [.] _init                ls
  +    9.84%  strcoll_l.c:137        [.] __strcoll_l          libc-2.26.so
  +    9.16%  memcpy.S:175           [.] __memcpy_power7      libc-2.26.so
  +    9.01%  gconv_charset.h:54     [.] _nl_find_locale      libc-2.26.so
  +    8.87%  dl-addr.c:52           [.] _dl_addr             libc-2.26.so
  +    8.83%  _init+236              [.] _init                ls
  ...

Reported-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Signed-off-by: Sandipan Das <sandipan@linux.ibm.com>
Acked-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/20180611104049.11048-1-sandipan@linux.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/arch/powerpc/util/skip-callchain-idx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
index 0c370f81e002..bd630c222e65 100644
--- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c
+++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
@@ -243,7 +243,7 @@ int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain)
 	u64 ip;
 	u64 skip_slot = -1;
 
-	if (chain->nr < 3)
+	if (!chain || chain->nr < 3)
 		return skip_slot;
 
 	ip = chain->ips[2];

From 9a406f22b050bd41e803ede3ced9750d6eeaf61f Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Tue, 26 Jun 2018 22:17:17 -0700
Subject: [PATCH 382/519] selftests/x86/sigreturn/64: Fix spurious failures on
 AMD CPUs

[ Upstream commit ec348020566009d3da9b99f07c05814d13969c78 ]

When I wrote the sigreturn test, I didn't realize that AMD's busted
IRET behavior was different from Intel's busted IRET behavior:

On AMD CPUs, the CPU leaks the high 32 bits of the kernel stack pointer
to certain userspace contexts.  Gee, thanks.  There's very little
the kernel can do about it.  Modify the test so it passes.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/86e7fd3564497f657de30a36da4505799eebef01.1530076529.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/testing/selftests/x86/sigreturn.c | 46 ++++++++++++++++---------
 1 file changed, 29 insertions(+), 17 deletions(-)

diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c
index b5aa1bab7416..97ad2d40324a 100644
--- a/tools/testing/selftests/x86/sigreturn.c
+++ b/tools/testing/selftests/x86/sigreturn.c
@@ -456,19 +456,38 @@ static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss)
 		greg_t req = requested_regs[i], res = resulting_regs[i];
 		if (i == REG_TRAPNO || i == REG_IP)
 			continue;	/* don't care */
-		if (i == REG_SP) {
-			printf("\tSP: %llx -> %llx\n", (unsigned long long)req,
-			       (unsigned long long)res);
 
+		if (i == REG_SP) {
 			/*
-			 * In many circumstances, the high 32 bits of rsp
-			 * are zeroed.  For example, we could be a real
-			 * 32-bit program, or we could hit any of a number
-			 * of poorly-documented IRET or segmented ESP
-			 * oddities.  If this happens, it's okay.
+			 * If we were using a 16-bit stack segment, then
+			 * the kernel is a bit stuck: IRET only restores
+			 * the low 16 bits of ESP/RSP if SS is 16-bit.
+			 * The kernel uses a hack to restore bits 31:16,
+			 * but that hack doesn't help with bits 63:32.
+			 * On Intel CPUs, bits 63:32 end up zeroed, and, on
+			 * AMD CPUs, they leak the high bits of the kernel
+			 * espfix64 stack pointer.  There's very little that
+			 * the kernel can do about it.
+			 *
+			 * Similarly, if we are returning to a 32-bit context,
+			 * the CPU will often lose the high 32 bits of RSP.
 			 */
-			if (res == (req & 0xFFFFFFFF))
-				continue;  /* OK; not expected to work */
+
+			if (res == req)
+				continue;
+
+			if (cs_bits != 64 && ((res ^ req) & 0xFFFFFFFF) == 0) {
+				printf("[NOTE]\tSP: %llx -> %llx\n",
+				       (unsigned long long)req,
+				       (unsigned long long)res);
+				continue;
+			}
+
+			printf("[FAIL]\tSP mismatch: requested 0x%llx; got 0x%llx\n",
+			       (unsigned long long)requested_regs[i],
+			       (unsigned long long)resulting_regs[i]);
+			nerrs++;
+			continue;
 		}
 
 		bool ignore_reg = false;
@@ -507,13 +526,6 @@ static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss)
 		}
 
 		if (requested_regs[i] != resulting_regs[i] && !ignore_reg) {
-			/*
-			 * SP is particularly interesting here.  The
-			 * usual cause of failures is that we hit the
-			 * nasty IRET case of returning to a 16-bit SS,
-			 * in which case bits 16:31 of the *kernel*
-			 * stack pointer persist in ESP.
-			 */
 			printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n",
 			       i, (unsigned long long)requested_regs[i],
 			       (unsigned long long)resulting_regs[i]);

From d238b2e06f390de4a3a15058a9e0c538fb6e35e8 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Tue, 5 Jun 2018 15:37:51 +0530
Subject: [PATCH 383/519] ARM: dts: da850: Fix interrups property for gpio

[ Upstream commit 3eb1b955cd7ed1e621ace856710006c2a8a7f231 ]

The intc #interrupt-cells is equal to 1. Currently gpio
node has 2 cells per IRQ which is wrong. Remove the additional
cell for each of the interrupts.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Fixes: 2e38b946dc54 ("ARM: davinci: da850: add GPIO DT node")
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/da850.dtsi | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/arch/arm/boot/dts/da850.dtsi b/arch/arm/boot/dts/da850.dtsi
index 0bd98cd00816..4ef5c3410fcc 100644
--- a/arch/arm/boot/dts/da850.dtsi
+++ b/arch/arm/boot/dts/da850.dtsi
@@ -267,11 +267,7 @@ gpio: gpio@1e26000 {
 			compatible = "ti,dm6441-gpio";
 			gpio-controller;
 			reg = <0x226000 0x1000>;
-			interrupts = <42 IRQ_TYPE_EDGE_BOTH
-				43 IRQ_TYPE_EDGE_BOTH 44 IRQ_TYPE_EDGE_BOTH
-				45 IRQ_TYPE_EDGE_BOTH 46 IRQ_TYPE_EDGE_BOTH
-				47 IRQ_TYPE_EDGE_BOTH 48 IRQ_TYPE_EDGE_BOTH
-				49 IRQ_TYPE_EDGE_BOTH 50 IRQ_TYPE_EDGE_BOTH>;
+			interrupts = <42 43 44 45 46 47 48 49 50>;
 			ti,ngpio = <144>;
 			ti,davinci-gpio-unbanked = <0>;
 			status = "disabled";

From 0430caf5ccc880dfacba544accbddc839d70ded1 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 22 Jun 2018 14:15:47 +0300
Subject: [PATCH 384/519] dmaengine: k3dma: Off by one in
 k3_of_dma_simple_xlate()

[ Upstream commit c4c2b7644cc9a41f17a8cc8904efe3f66ae4c7ed ]

The d->chans[] array has d->dma_requests elements so the > should be
>= here.

Fixes: 8e6152bc660e ("dmaengine: Add hisilicon k3 DMA engine driver")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/dma/k3dma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/k3dma.c b/drivers/dma/k3dma.c
index 1ba2fd73852d..0f0c06ab414b 100644
--- a/drivers/dma/k3dma.c
+++ b/drivers/dma/k3dma.c
@@ -660,7 +660,7 @@ static struct dma_chan *k3_of_dma_simple_xlate(struct of_phandle_args *dma_spec,
 	struct k3_dma_dev *d = ofdma->of_dma_data;
 	unsigned int request = dma_spec->args[0];
 
-	if (request > d->dma_requests)
+	if (request >= d->dma_requests)
 		return NULL;
 
 	return dma_get_slave_channel(&(d->chans[request].vc.chan));

From 66de11067753fbc562b7d2dba550d563c02449ec Mon Sep 17 00:00:00 2001
From: BingJing Chang <bingjingc@synology.com>
Date: Thu, 28 Jun 2018 18:40:11 +0800
Subject: [PATCH 385/519] md/raid10: fix that replacement cannot complete
 recovery after reassemble

[ Upstream commit bda3153998f3eb2cafa4a6311971143628eacdbc ]

During assemble, the spare marked for replacement is not checked.
conf->fullsync cannot be updated to be 1. As a result, recovery will
treat it as a clean array. All recovering sectors are skipped. Original
device is replaced with the not-recovered spare.

mdadm -C /dev/md0 -l10 -n4 -pn2 /dev/loop[0123]
mdadm /dev/md0 -a /dev/loop4
mdadm /dev/md0 --replace /dev/loop0
mdadm -S /dev/md0 # stop array during recovery

mdadm -A /dev/md0 /dev/loop[01234]

After reassemble, you can see recovery go on, but it completes
immediately. In fact, recovery is not actually processed.

To solve this problem, we just add the missing logics for replacment
spares. (In raid1.c or raid5.c, they have already been checked.)

Reported-by: Alex Chen <alexchen@synology.com>
Reviewed-by: Alex Wu <alexwu@synology.com>
Reviewed-by: Chung-Chiang Cheng <cccheng@synology.com>
Signed-off-by: BingJing Chang <bingjingc@synology.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/raid10.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 7b6acedc89c1..8a731bdd268e 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -3691,6 +3691,13 @@ static int run(struct mddev *mddev)
 			    disk->rdev->saved_raid_disk < 0)
 				conf->fullsync = 1;
 		}
+
+		if (disk->replacement &&
+		    !test_bit(In_sync, &disk->replacement->flags) &&
+		    disk->replacement->saved_raid_disk < 0) {
+			conf->fullsync = 1;
+		}
+
 		disk->recovery_disabled = mddev->recovery_disabled - 1;
 	}
 

From 048115cfba050e34667d7e3d05ce5f09fd54524f Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 7 Jun 2018 13:06:13 +0200
Subject: [PATCH 386/519] drm/exynos: gsc: Fix support for NV16/61,
 YUV420/YVU420 and YUV422 modes

[ Upstream commit dd209ef809080ced903e7747ee3ef640c923a1d2 ]

Fix following issues related to planar YUV pixel format configuration:
- NV16/61 modes were incorrectly programmed as NV12/21,
- YVU420 was programmed as YUV420 on source,
- YVU420 and YUV422 were programmed as YUV420 on output.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/exynos/exynos_drm_gsc.c | 29 +++++++++++++++++--------
 drivers/gpu/drm/exynos/regs-gsc.h       |  1 +
 2 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
index 11b87d2a7913..ba69d1c72221 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
@@ -526,21 +526,25 @@ static int gsc_src_set_fmt(struct device *dev, u32 fmt)
 			GSC_IN_CHROMA_ORDER_CRCB);
 		break;
 	case DRM_FORMAT_NV21:
+		cfg |= (GSC_IN_CHROMA_ORDER_CRCB | GSC_IN_YUV420_2P);
+		break;
 	case DRM_FORMAT_NV61:
-		cfg |= (GSC_IN_CHROMA_ORDER_CRCB |
-			GSC_IN_YUV420_2P);
+		cfg |= (GSC_IN_CHROMA_ORDER_CRCB | GSC_IN_YUV422_2P);
 		break;
 	case DRM_FORMAT_YUV422:
 		cfg |= GSC_IN_YUV422_3P;
 		break;
 	case DRM_FORMAT_YUV420:
+		cfg |= (GSC_IN_CHROMA_ORDER_CBCR | GSC_IN_YUV420_3P);
+		break;
 	case DRM_FORMAT_YVU420:
-		cfg |= GSC_IN_YUV420_3P;
+		cfg |= (GSC_IN_CHROMA_ORDER_CRCB | GSC_IN_YUV420_3P);
 		break;
 	case DRM_FORMAT_NV12:
+		cfg |= (GSC_IN_CHROMA_ORDER_CBCR | GSC_IN_YUV420_2P);
+		break;
 	case DRM_FORMAT_NV16:
-		cfg |= (GSC_IN_CHROMA_ORDER_CBCR |
-			GSC_IN_YUV420_2P);
+		cfg |= (GSC_IN_CHROMA_ORDER_CBCR | GSC_IN_YUV422_2P);
 		break;
 	default:
 		dev_err(ippdrv->dev, "invalid target yuv order 0x%x.\n", fmt);
@@ -800,18 +804,25 @@ static int gsc_dst_set_fmt(struct device *dev, u32 fmt)
 			GSC_OUT_CHROMA_ORDER_CRCB);
 		break;
 	case DRM_FORMAT_NV21:
-	case DRM_FORMAT_NV61:
 		cfg |= (GSC_OUT_CHROMA_ORDER_CRCB | GSC_OUT_YUV420_2P);
 		break;
+	case DRM_FORMAT_NV61:
+		cfg |= (GSC_OUT_CHROMA_ORDER_CRCB | GSC_OUT_YUV422_2P);
+		break;
 	case DRM_FORMAT_YUV422:
+		cfg |= GSC_OUT_YUV422_3P;
+		break;
 	case DRM_FORMAT_YUV420:
+		cfg |= (GSC_OUT_CHROMA_ORDER_CBCR | GSC_OUT_YUV420_3P);
+		break;
 	case DRM_FORMAT_YVU420:
-		cfg |= GSC_OUT_YUV420_3P;
+		cfg |= (GSC_OUT_CHROMA_ORDER_CRCB | GSC_OUT_YUV420_3P);
 		break;
 	case DRM_FORMAT_NV12:
+		cfg |= (GSC_OUT_CHROMA_ORDER_CBCR | GSC_OUT_YUV420_2P);
+		break;
 	case DRM_FORMAT_NV16:
-		cfg |= (GSC_OUT_CHROMA_ORDER_CBCR |
-			GSC_OUT_YUV420_2P);
+		cfg |= (GSC_OUT_CHROMA_ORDER_CBCR | GSC_OUT_YUV422_2P);
 		break;
 	default:
 		dev_err(ippdrv->dev, "invalid target yuv order 0x%x.\n", fmt);
diff --git a/drivers/gpu/drm/exynos/regs-gsc.h b/drivers/gpu/drm/exynos/regs-gsc.h
index 9ad592707aaf..ade10966d6af 100644
--- a/drivers/gpu/drm/exynos/regs-gsc.h
+++ b/drivers/gpu/drm/exynos/regs-gsc.h
@@ -138,6 +138,7 @@
 #define GSC_OUT_YUV420_3P		(3 << 4)
 #define GSC_OUT_YUV422_1P		(4 << 4)
 #define GSC_OUT_YUV422_2P		(5 << 4)
+#define GSC_OUT_YUV422_3P		(6 << 4)
 #define GSC_OUT_YUV444			(7 << 4)
 #define GSC_OUT_TILE_TYPE_MASK		(1 << 2)
 #define GSC_OUT_TILE_C_16x8		(0 << 2)

From c46030269f686fc3ca92d93140c9f2957aefee3b Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 7 Jun 2018 13:07:40 +0200
Subject: [PATCH 387/519] drm/exynos: decon5433: Fix per-plane global alpha for
 XRGB modes

[ Upstream commit ab337fc274a1957ff0771f19e826c736253f7c39 ]

Set per-plane global alpha to maximum value to get proper blending of
XRGB and ARGB planes. This fixes the strange order of overlapping planes.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index 34cebcdc2fc4..784ef54f5a1b 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -278,8 +278,8 @@ static void decon_update_plane(struct exynos_drm_crtc *crtc,
 		COORDINATE_Y(plane->crtc_y + plane->crtc_h - 1);
 	writel(val, ctx->addr + DECON_VIDOSDxB(win));
 
-	val = VIDOSD_Wx_ALPHA_R_F(0x0) | VIDOSD_Wx_ALPHA_G_F(0x0) |
-		VIDOSD_Wx_ALPHA_B_F(0x0);
+	val = VIDOSD_Wx_ALPHA_R_F(0xff) | VIDOSD_Wx_ALPHA_G_F(0xff) |
+		VIDOSD_Wx_ALPHA_B_F(0xff);
 	writel(val, ctx->addr + DECON_VIDOSDxC(win));
 
 	val = VIDOSD_Wx_ALPHA_R_F(0x0) | VIDOSD_Wx_ALPHA_G_F(0x0) |

From acc83070ba75b3ab93bf46f711246e9b97ed46c0 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 7 Jun 2018 13:07:49 +0200
Subject: [PATCH 388/519] drm/exynos: decon5433: Fix WINCONx reset value

[ Upstream commit 7b7aa62c05eac9789c208b946f515983a9255d8d ]

The only bits that should be preserved in decon_win_set_fmt() is
WINCONx_ENWIN_F. All other bits depends on the selected pixel formats and
are set by the mentioned function.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index 784ef54f5a1b..9cae5f69b07c 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -190,7 +190,7 @@ static void decon_win_set_pixfmt(struct decon_context *ctx, unsigned int win,
 	unsigned long val;
 
 	val = readl(ctx->addr + DECON_WINCONx(win));
-	val &= ~WINCONx_BPPMODE_MASK;
+	val &= WINCONx_ENWIN_F;
 
 	switch (fb->pixel_format) {
 	case DRM_FORMAT_XRGB1555:

From aba71e6a936a62126d0c084d4add455db697ee24 Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@cavium.com>
Date: Thu, 28 Jun 2018 04:52:15 -0700
Subject: [PATCH 389/519] bnx2x: Fix receiving tx-timeout in error or recovery
 state.

[ Upstream commit 484c016d9392786ce5c74017c206c706f29f823d ]

Driver performs the internal reload when it receives tx-timeout event from
the OS. Internal reload might fail in some scenarios e.g., fatal HW issues.
In such cases OS still see the link, which would result in undesirable
functionalities such as re-generation of tx-timeouts.
The patch addresses this issue by indicating the link-down to OS when
tx-timeout is detected, and keeping the link in down state till the
internal reload is successful.

Please consider applying it to 'net' branch.

Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Ariel Elior <ariel.elior@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h      | 1 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c  | 6 ++++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 6 ++++++
 3 files changed, 13 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index b5e64b02200c..1ea068815419 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -1634,6 +1634,7 @@ struct bnx2x {
 	struct link_vars	link_vars;
 	u32			link_cnt;
 	struct bnx2x_link_report_data last_reported_link;
+	bool			force_link_down;
 
 	struct mdio_if_info	mdio;
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 949a82458a29..ebc4518d598a 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -1277,6 +1277,11 @@ void __bnx2x_link_report(struct bnx2x *bp)
 {
 	struct bnx2x_link_report_data cur_data;
 
+	if (bp->force_link_down) {
+		bp->link_vars.link_up = 0;
+		return;
+	}
+
 	/* reread mf_cfg */
 	if (IS_PF(bp) && !CHIP_IS_E1(bp))
 		bnx2x_read_mf_cfg(bp);
@@ -2840,6 +2845,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
 		bp->pending_max = 0;
 	}
 
+	bp->force_link_down = false;
 	if (bp->port.pmf) {
 		rc = bnx2x_initial_phy_init(bp, load_mode);
 		if (rc)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 8ddb68a3fdb6..403fa8d98aa3 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -10222,6 +10222,12 @@ static void bnx2x_sp_rtnl_task(struct work_struct *work)
 		bp->sp_rtnl_state = 0;
 		smp_mb();
 
+		/* Immediately indicate link as down */
+		bp->link_vars.link_up = 0;
+		bp->force_link_down = true;
+		netif_carrier_off(bp->dev);
+		BNX2X_ERR("Indicating link is down due to Tx-timeout\n");
+
 		bnx2x_nic_unload(bp, UNLOAD_NORMAL, true);
 		bnx2x_nic_load(bp, LOAD_NORMAL);
 

From 8f2f46791e28b7058a32fb7eab32e498ff838627 Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@linux-m68k.org>
Date: Mon, 18 Jun 2018 15:34:14 +1000
Subject: [PATCH 390/519] m68k: fix "bad page state" oops on ColdFire boot
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit ecd60532e060e45c63c57ecf1c8549b1d656d34d ]

Booting a ColdFire m68k core with MMU enabled causes a "bad page state"
oops since commit 1d40a5ea01d5 ("mm: mark pages in use for page tables"):

 BUG: Bad page state in process sh  pfn:01ce2
 page:004fefc8 count:0 mapcount:-1024 mapping:00000000 index:0x0
 flags: 0x0()
 raw: 00000000 00000000 00000000 fffffbff 00000000 00000100 00000200 00000000
 raw: 039c4000
 page dumped because: nonzero mapcount
 Modules linked in:
 CPU: 0 PID: 22 Comm: sh Not tainted 4.17.0-07461-g1d40a5ea01d5 #13

Fix by calling pgtable_page_dtor() in our __pte_free_tlb() code path,
so that the PG_table flag is cleared before we free the pte page.

Note that I had to change the type of pte_free() to be static from
extern. Otherwise you get a lot of warnings like this:

./arch/m68k/include/asm/mcf_pgalloc.h:80:2: warning: ‘pgtable_page_dtor’ is static but used in inline function ‘pte_free’ which is not static
  pgtable_page_dtor(page);
  ^

And making it static is consistent with our use of this in the other
m68k pgalloc definitions of pte_free().

Signed-off-by: Greg Ungerer <gerg@linux-m68k.org>
CC: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/m68k/include/asm/mcf_pgalloc.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
index f9924fbcfe42..456e3f75ef3b 100644
--- a/arch/m68k/include/asm/mcf_pgalloc.h
+++ b/arch/m68k/include/asm/mcf_pgalloc.h
@@ -43,6 +43,7 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
 				  unsigned long address)
 {
+	pgtable_page_dtor(page);
 	__free_page(page);
 }
 
@@ -73,8 +74,9 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 	return page;
 }
 
-extern inline void pte_free(struct mm_struct *mm, struct page *page)
+static inline void pte_free(struct mm_struct *mm, struct page *page)
 {
+	pgtable_page_dtor(page);
 	__free_page(page);
 }
 

From 1bdab67ddfa7b4e9e7a90637a22f9abc6ca88cf4 Mon Sep 17 00:00:00 2001
From: Jason Gerecke <killertofu@gmail.com>
Date: Tue, 26 Jun 2018 09:58:02 -0700
Subject: [PATCH 391/519] HID: wacom: Correct touch maximum XY of 2nd-gen
 Intuos

[ Upstream commit 3b8d573586d1b9dee33edf6cb6f2ca05f4bca568 ]

The touch sensors on the 2nd-gen Intuos tablets don't use a 4096x4096
sensor like other similar tablets (3rd-gen Bamboo, Intuos5, etc.).
The incorrect maximum XY values don't normally affect userspace since
touch input from these devices is typically relative rather than
absolute. It does, however, cause problems when absolute distances
need to be measured, e.g. for gesture recognition. Since the resolution
of the touch sensor on these devices is 10 units / mm (versus 100 for
the pen sensor), the proper maximum values can be calculated by simply
dividing by 10.

Fixes: b5fd2a3e92 ("Input: wacom - add support for three new Intuos devices")
Signed-off-by: Jason Gerecke <jason.gerecke@wacom.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hid/wacom_wac.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index b62c50d1b1e4..b184956bd430 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -2487,8 +2487,14 @@ void wacom_setup_device_quirks(struct wacom *wacom)
 			if (features->type >= INTUOSHT && features->type <= BAMBOO_PT)
 				features->device_type |= WACOM_DEVICETYPE_PAD;
 
-			features->x_max = 4096;
-			features->y_max = 4096;
+			if (features->type == INTUOSHT2) {
+				features->x_max = features->x_max / 10;
+				features->y_max = features->y_max / 10;
+			}
+			else {
+				features->x_max = 4096;
+				features->y_max = 4096;
+			}
 		}
 		else if (features->pktlen == WACOM_PKGLEN_BBTOUCH) {
 			features->device_type |= WACOM_DEVICETYPE_PAD;

From 0d0af17ae83d6feb29d676c72423461419df5110 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@nxp.com>
Date: Mon, 25 Jun 2018 09:34:03 -0300
Subject: [PATCH 392/519] ARM: imx_v6_v7_defconfig: Select ULPI support

[ Upstream commit 157bcc06094c3c5800d3f4676527047b79b618e7 ]

Select CONFIG_USB_CHIPIDEA_ULPI and CONFIG_USB_ULPI_BUS so that
USB ULPI can be functional on some boards like imx51-babbge.

This fixes a kernel hang in 4.18-rc1 on i.mx51-babbage, caused by commit
03e6275ae381 ("usb: chipidea: Fix ULPI on imx51").

Suggested-by: Andrey Smirnov <andrew.smirnov@gmail.com>
Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/configs/imx_v6_v7_defconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig
index 4187f69f6630..b3490c1c49d1 100644
--- a/arch/arm/configs/imx_v6_v7_defconfig
+++ b/arch/arm/configs/imx_v6_v7_defconfig
@@ -261,6 +261,7 @@ CONFIG_USB_STORAGE=y
 CONFIG_USB_CHIPIDEA=y
 CONFIG_USB_CHIPIDEA_UDC=y
 CONFIG_USB_CHIPIDEA_HOST=y
+CONFIG_USB_CHIPIDEA_ULPI=y
 CONFIG_USB_SERIAL=m
 CONFIG_USB_SERIAL_GENERIC=y
 CONFIG_USB_SERIAL_FTDI_SIO=m
@@ -287,6 +288,7 @@ CONFIG_USB_G_NCM=m
 CONFIG_USB_GADGETFS=m
 CONFIG_USB_MASS_STORAGE=m
 CONFIG_USB_G_SERIAL=m
+CONFIG_USB_ULPI_BUS=y
 CONFIG_MMC=y
 CONFIG_MMC_SDHCI=y
 CONFIG_MMC_SDHCI_PLTFM=y

From be38b9556d9ba051adae074367acb3ee362180b2 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@nxp.com>
Date: Tue, 26 Jun 2018 08:37:09 -0300
Subject: [PATCH 393/519] ARM: imx_v4_v5_defconfig: Select ULPI support

[ Upstream commit 2ceb2780b790b74bc408a949f6aedbad8afa693e ]

Select CONFIG_USB_CHIPIDEA_ULPI and CONFIG_USB_ULPI_BUS so that
USB ULPI can be functional on some boards like that use ULPI
interface.

Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/configs/imx_v4_v5_defconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/configs/imx_v4_v5_defconfig b/arch/arm/configs/imx_v4_v5_defconfig
index d3a8018639de..f4a2d28936e1 100644
--- a/arch/arm/configs/imx_v4_v5_defconfig
+++ b/arch/arm/configs/imx_v4_v5_defconfig
@@ -145,9 +145,11 @@ CONFIG_USB_STORAGE=y
 CONFIG_USB_CHIPIDEA=y
 CONFIG_USB_CHIPIDEA_UDC=y
 CONFIG_USB_CHIPIDEA_HOST=y
+CONFIG_USB_CHIPIDEA_ULPI=y
 CONFIG_NOP_USB_XCEIV=y
 CONFIG_USB_GADGET=y
 CONFIG_USB_ETH=m
+CONFIG_USB_ULPI_BUS=y
 CONFIG_MMC=y
 CONFIG_MMC_SDHCI=y
 CONFIG_MMC_SDHCI_PLTFM=y

From bfb1c3470bcb05537fca601a0101d759d054b822 Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Thu, 8 Mar 2018 21:58:43 +0100
Subject: [PATCH 394/519] tracing: Use __printf markup to silence compiler
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 26b68dd2f48fe7699a89f0cfbb9f4a650dc1c837 ]

Silence warnings (triggered at W=1) by adding relevant __printf attributes.

  CC      kernel/trace/trace.o
kernel/trace/trace.c: In function ‘__trace_array_vprintk’:
kernel/trace/trace.c:2979:2: warning: function might be possible candidate for ‘gnu_printf’ format attribute [-Wsuggest-attribute=format]
  len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
  ^~~
  AR      kernel/trace/built-in.o

Link: http://lkml.kernel.org/r/20180308205843.27447-1-malat@debian.org

Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/trace/trace.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1b980a8ef791..11761b3dd7ba 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2186,6 +2186,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 }
 EXPORT_SYMBOL_GPL(trace_vbprintk);
 
+__printf(3, 0)
 static int
 __trace_array_vprintk(struct ring_buffer *buffer,
 		      unsigned long ip, const char *fmt, va_list args)
@@ -2236,12 +2237,14 @@ __trace_array_vprintk(struct ring_buffer *buffer,
 	return len;
 }
 
+__printf(3, 0)
 int trace_array_vprintk(struct trace_array *tr,
 			unsigned long ip, const char *fmt, va_list args)
 {
 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
 }
 
+__printf(3, 0)
 int trace_array_printk(struct trace_array *tr,
 		       unsigned long ip, const char *fmt, ...)
 {
@@ -2257,6 +2260,7 @@ int trace_array_printk(struct trace_array *tr,
 	return ret;
 }
 
+__printf(3, 4)
 int trace_array_printk_buf(struct ring_buffer *buffer,
 			   unsigned long ip, const char *fmt, ...)
 {
@@ -2272,6 +2276,7 @@ int trace_array_printk_buf(struct ring_buffer *buffer,
 	return ret;
 }
 
+__printf(2, 0)
 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 {
 	return trace_array_vprintk(&global_trace, ip, fmt, args);

From 1acb2ad5d9d0fc66f18c74e22af3c07e41a5dbca Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Tue, 3 Jul 2018 17:02:46 -0700
Subject: [PATCH 395/519] kasan: fix shadow_size calculation error in
 kasan_module_alloc

[ Upstream commit 1e8e18f694a52d703665012ca486826f64bac29d ]

There is a special case that the size is "(N << KASAN_SHADOW_SCALE_SHIFT)
Pages plus X", the value of X is [1, KASAN_SHADOW_SCALE_SIZE-1].  The
operation "size >> KASAN_SHADOW_SCALE_SHIFT" will drop X, and the
roundup operation can not retrieve the missed one page.  For example:
size=0x28006, PAGE_SIZE=0x1000, KASAN_SHADOW_SCALE_SHIFT=3, we will get
shadow_size=0x5000, but actually we need 6 pages.

  shadow_size = round_up(size >> KASAN_SHADOW_SCALE_SHIFT, PAGE_SIZE);

This can lead to a kernel crash when kasan is enabled and the value of
mod->core_layout.size or mod->init_layout.size is like above.  Because
the shadow memory of X has not been allocated and mapped.

move_module:
  ptr = module_alloc(mod->core_layout.size);
  ...
  memset(ptr, 0, mod->core_layout.size);		//crashed

  Unable to handle kernel paging request at virtual address ffff0fffff97b000
  ......
  Call trace:
    __asan_storeN+0x174/0x1a8
    memset+0x24/0x48
    layout_and_allocate+0xcd8/0x1800
    load_module+0x190/0x23e8
    SyS_finit_module+0x148/0x180

Link: http://lkml.kernel.org/r/1529659626-12660-1-git-send-email-thunder.leizhen@huawei.com
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Reviewed-by: Dmitriy Vyukov <dvyukov@google.com>
Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Hanjun Guo <guohanjun@huawei.com>
Cc: Libin <huawei.libin@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/kasan/kasan.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index ba9adce1422a..b7397b459960 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -427,12 +427,13 @@ void kasan_kfree_large(const void *ptr)
 int kasan_module_alloc(void *addr, size_t size)
 {
 	void *ret;
+	size_t scaled_size;
 	size_t shadow_size;
 	unsigned long shadow_start;
 
 	shadow_start = (unsigned long)kasan_mem_to_shadow(addr);
-	shadow_size = round_up(size >> KASAN_SHADOW_SCALE_SHIFT,
-			PAGE_SIZE);
+	scaled_size = (size + KASAN_SHADOW_MASK) >> KASAN_SHADOW_SCALE_SHIFT;
+	shadow_size = round_up(scaled_size, PAGE_SIZE);
 
 	if (WARN_ON(!PAGE_ALIGNED(shadow_start)))
 		return -EINVAL;

From c7fda06308d6d1ed5d094a5f22b3e1e33852edbf Mon Sep 17 00:00:00 2001
From: Yuiko Oshino <yuiko.oshino@microchip.com>
Date: Tue, 3 Jul 2018 11:21:46 -0400
Subject: [PATCH 396/519] smsc75xx: Add workaround for gigabit link up hardware
 errata.

[ Upstream commit d461e3da905332189aad546b2ad9adbe6071c7cc ]

In certain conditions, the device may not be able to link in gigabit mode. This software workaround ensures that the device will not enter the failure state.

Fixes: d0cad871703b898a442e4049c532ec39168e5b57 ("SMSC75XX USB 2.0 Gigabit Ethernet Devices")
Signed-off-by: Yuiko Oshino <yuiko.oshino@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/usb/smsc75xx.c | 62 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index 7337e6c0e126..478937418a33 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -81,6 +81,9 @@ static bool turbo_mode = true;
 module_param(turbo_mode, bool, 0644);
 MODULE_PARM_DESC(turbo_mode, "Enable multiple frames per Rx transaction");
 
+static int smsc75xx_link_ok_nopm(struct usbnet *dev);
+static int smsc75xx_phy_gig_workaround(struct usbnet *dev);
+
 static int __must_check __smsc75xx_read_reg(struct usbnet *dev, u32 index,
 					    u32 *data, int in_pm)
 {
@@ -840,6 +843,9 @@ static int smsc75xx_phy_initialize(struct usbnet *dev)
 		return -EIO;
 	}
 
+	/* phy workaround for gig link */
+	smsc75xx_phy_gig_workaround(dev);
+
 	smsc75xx_mdio_write(dev->net, dev->mii.phy_id, MII_ADVERTISE,
 		ADVERTISE_ALL | ADVERTISE_CSMA | ADVERTISE_PAUSE_CAP |
 		ADVERTISE_PAUSE_ASYM);
@@ -978,6 +984,62 @@ static int smsc75xx_wait_ready(struct usbnet *dev, int in_pm)
 	return -EIO;
 }
 
+static int smsc75xx_phy_gig_workaround(struct usbnet *dev)
+{
+	struct mii_if_info *mii = &dev->mii;
+	int ret = 0, timeout = 0;
+	u32 buf, link_up = 0;
+
+	/* Set the phy in Gig loopback */
+	smsc75xx_mdio_write(dev->net, mii->phy_id, MII_BMCR, 0x4040);
+
+	/* Wait for the link up */
+	do {
+		link_up = smsc75xx_link_ok_nopm(dev);
+		usleep_range(10000, 20000);
+		timeout++;
+	} while ((!link_up) && (timeout < 1000));
+
+	if (timeout >= 1000) {
+		netdev_warn(dev->net, "Timeout waiting for PHY link up\n");
+		return -EIO;
+	}
+
+	/* phy reset */
+	ret = smsc75xx_read_reg(dev, PMT_CTL, &buf);
+	if (ret < 0) {
+		netdev_warn(dev->net, "Failed to read PMT_CTL: %d\n", ret);
+		return ret;
+	}
+
+	buf |= PMT_CTL_PHY_RST;
+
+	ret = smsc75xx_write_reg(dev, PMT_CTL, buf);
+	if (ret < 0) {
+		netdev_warn(dev->net, "Failed to write PMT_CTL: %d\n", ret);
+		return ret;
+	}
+
+	timeout = 0;
+	do {
+		usleep_range(10000, 20000);
+		ret = smsc75xx_read_reg(dev, PMT_CTL, &buf);
+		if (ret < 0) {
+			netdev_warn(dev->net, "Failed to read PMT_CTL: %d\n",
+				    ret);
+			return ret;
+		}
+		timeout++;
+	} while ((buf & PMT_CTL_PHY_RST) && (timeout < 100));
+
+	if (timeout >= 100) {
+		netdev_warn(dev->net, "timeout waiting for PHY Reset\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
 static int smsc75xx_reset(struct usbnet *dev)
 {
 	struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]);

From 7e8f97b07a3be3493072f1cabe888f2d770b8077 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 4 Jul 2018 20:25:32 +0200
Subject: [PATCH 397/519] netfilter: x_tables: set module owner for icmp(6)
 matches

[ Upstream commit d376bef9c29b3c65aeee4e785fffcd97ef0a9a81 ]

nft_compat relies on xt_request_find_match to increment
refcount of the module that provides the match/target.

The (builtin) icmp matches did't set the module owner so it
was possible to rmmod ip(6)tables while icmp extensions were still in use.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/netfilter/ip_tables.c  | 1 +
 net/ipv6/netfilter/ip6_tables.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 9363c1a70f16..8adb6e9ba8f5 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -2072,6 +2072,7 @@ static struct xt_match ipt_builtin_mt[] __read_mostly = {
 		.checkentry = icmp_checkentry,
 		.proto      = IPPROTO_ICMP,
 		.family     = NFPROTO_IPV4,
+		.me	    = THIS_MODULE,
 	},
 };
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 6cb9e35d23ac..96de322fe5e2 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -2073,6 +2073,7 @@ static struct xt_match ip6t_builtin_mt[] __read_mostly = {
 		.checkentry = icmp6_checkentry,
 		.proto      = IPPROTO_ICMPV6,
 		.family     = NFPROTO_IPV6,
+		.me	    = THIS_MODULE,
 	},
 };
 

From be4691a7c58b40ddcdad5f82fb652475afc3440e Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@zonque.org>
Date: Fri, 6 Jul 2018 22:15:00 +0200
Subject: [PATCH 398/519] ARM: pxa: irq: fix handling of ICMR registers in
 suspend/resume

[ Upstream commit 0c1049dcb4ceec640d8bd797335bcbebdcab44d2 ]

PXA3xx platforms have 56 interrupts that are stored in two ICMR
registers. The code in pxa_irq_suspend() and pxa_irq_resume() however
does a simple division by 32 which only leads to one register being
saved at suspend and restored at resume time. The NAND interrupt
setting, for instance, is lost.

Fix this by using DIV_ROUND_UP() instead.

Signed-off-by: Daniel Mack <daniel@zonque.org>
Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-pxa/irq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mach-pxa/irq.c b/arch/arm/mach-pxa/irq.c
index 9c10248fadcc..4e8c2116808e 100644
--- a/arch/arm/mach-pxa/irq.c
+++ b/arch/arm/mach-pxa/irq.c
@@ -185,7 +185,7 @@ static int pxa_irq_suspend(void)
 {
 	int i;
 
-	for (i = 0; i < pxa_internal_irq_nr / 32; i++) {
+	for (i = 0; i < DIV_ROUND_UP(pxa_internal_irq_nr, 32); i++) {
 		void __iomem *base = irq_base(i);
 
 		saved_icmr[i] = __raw_readl(base + ICMR);
@@ -204,7 +204,7 @@ static void pxa_irq_resume(void)
 {
 	int i;
 
-	for (i = 0; i < pxa_internal_irq_nr / 32; i++) {
+	for (i = 0; i < DIV_ROUND_UP(pxa_internal_irq_nr, 32); i++) {
 		void __iomem *base = irq_base(i);
 
 		__raw_writel(saved_icmr[i], base + ICMR);

From 691a13ac70e31e3004310bf56360ee69c62514cb Mon Sep 17 00:00:00 2001
From: Stefan Schmidt <stefan@datenfreihafen.org>
Date: Fri, 22 Sep 2017 14:13:53 +0200
Subject: [PATCH 399/519] ieee802154: at86rf230: switch from BUG_ON() to
 WARN_ON() on problem

[ Upstream commit 20f330452ad8814f2289a589baf65e21270879a7 ]

The check is valid but it does not warrant to crash the kernel. A
WARN_ON() is good enough here.
Found by checkpatch.

Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ieee802154/at86rf230.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c
index 0fbbba7a0cae..184185da2f8b 100644
--- a/drivers/net/ieee802154/at86rf230.c
+++ b/drivers/net/ieee802154/at86rf230.c
@@ -932,7 +932,7 @@ at86rf230_xmit(struct ieee802154_hw *hw, struct sk_buff *skb)
 static int
 at86rf230_ed(struct ieee802154_hw *hw, u8 *level)
 {
-	BUG_ON(!level);
+	WARN_ON(!level);
 	*level = 0xbe;
 	return 0;
 }

From 24e3a53c0d2c6be3385c5676056124b44f7c06c2 Mon Sep 17 00:00:00 2001
From: Stefan Schmidt <stefan@datenfreihafen.org>
Date: Fri, 22 Sep 2017 14:13:54 +0200
Subject: [PATCH 400/519] ieee802154: at86rf230: use __func__ macro for debug
 messages

[ Upstream commit 8a81388ec27c4c0adbdecd20e67bb5f411ab46b2 ]

Instead of having the function name hard-coded (it might change and we
forgot to update them in the debug output) we can use __func__ instead
and also shorter the line so we do not need to break it. Also fix an
extra blank line while being here.
Found by checkpatch.

Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ieee802154/at86rf230.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c
index 184185da2f8b..f72c2967ae82 100644
--- a/drivers/net/ieee802154/at86rf230.c
+++ b/drivers/net/ieee802154/at86rf230.c
@@ -1108,8 +1108,7 @@ at86rf230_set_hw_addr_filt(struct ieee802154_hw *hw,
 	if (changed & IEEE802154_AFILT_SADDR_CHANGED) {
 		u16 addr = le16_to_cpu(filt->short_addr);
 
-		dev_vdbg(&lp->spi->dev,
-			 "at86rf230_set_hw_addr_filt called for saddr\n");
+		dev_vdbg(&lp->spi->dev, "%s called for saddr\n", __func__);
 		__at86rf230_write(lp, RG_SHORT_ADDR_0, addr);
 		__at86rf230_write(lp, RG_SHORT_ADDR_1, addr >> 8);
 	}
@@ -1117,8 +1116,7 @@ at86rf230_set_hw_addr_filt(struct ieee802154_hw *hw,
 	if (changed & IEEE802154_AFILT_PANID_CHANGED) {
 		u16 pan = le16_to_cpu(filt->pan_id);
 
-		dev_vdbg(&lp->spi->dev,
-			 "at86rf230_set_hw_addr_filt called for pan id\n");
+		dev_vdbg(&lp->spi->dev, "%s called for pan id\n", __func__);
 		__at86rf230_write(lp, RG_PAN_ID_0, pan);
 		__at86rf230_write(lp, RG_PAN_ID_1, pan >> 8);
 	}
@@ -1127,15 +1125,13 @@ at86rf230_set_hw_addr_filt(struct ieee802154_hw *hw,
 		u8 i, addr[8];
 
 		memcpy(addr, &filt->ieee_addr, 8);
-		dev_vdbg(&lp->spi->dev,
-			 "at86rf230_set_hw_addr_filt called for IEEE addr\n");
+		dev_vdbg(&lp->spi->dev, "%s called for IEEE addr\n", __func__);
 		for (i = 0; i < 8; i++)
 			__at86rf230_write(lp, RG_IEEE_ADDR_0 + i, addr[i]);
 	}
 
 	if (changed & IEEE802154_AFILT_PANC_CHANGED) {
-		dev_vdbg(&lp->spi->dev,
-			 "at86rf230_set_hw_addr_filt called for panc change\n");
+		dev_vdbg(&lp->spi->dev, "%s called for panc change\n", __func__);
 		if (filt->pan_coord)
 			at86rf230_write_subreg(lp, SR_AACK_I_AM_COORD, 1);
 		else
@@ -1239,7 +1235,6 @@ at86rf230_set_cca_mode(struct ieee802154_hw *hw,
 	return at86rf230_write_subreg(lp, SR_CCA_MODE, val);
 }
 
-
 static int
 at86rf230_set_cca_ed_level(struct ieee802154_hw *hw, s32 mbm)
 {

From fe9ee61f5a1b9413ad3862bfa5a63c633d84f38a Mon Sep 17 00:00:00 2001
From: Stefan Schmidt <stefan@datenfreihafen.org>
Date: Fri, 22 Sep 2017 14:14:05 +0200
Subject: [PATCH 401/519] ieee802154: fakelb: switch from BUG_ON() to WARN_ON()
 on problem

[ Upstream commit 8f2fbc6c60ff213369e06a73610fc882a42fdf20 ]

The check is valid but it does not warrant to crash the kernel. A
WARN_ON() is good enough here.
Found by checkpatch.

Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ieee802154/fakelb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ieee802154/fakelb.c b/drivers/net/ieee802154/fakelb.c
index 43617ded3773..91de25c53274 100644
--- a/drivers/net/ieee802154/fakelb.c
+++ b/drivers/net/ieee802154/fakelb.c
@@ -49,7 +49,7 @@ struct fakelb_phy {
 
 static int fakelb_hw_ed(struct ieee802154_hw *hw, u8 *level)
 {
-	BUG_ON(!level);
+	WARN_ON(!level);
 	*level = 0xbe;
 
 	return 0;

From 2cb585f9c5d6b70bfcd12beb314d9ba060c3208a Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Sun, 24 Jun 2018 14:35:10 +0100
Subject: [PATCH 402/519] drm/armada: fix colorkey mode property

[ Upstream commit d378859a667edc99e3473704847698cae97ca2b1 ]

The colorkey mode property was not correctly disabling the colorkeying
when "disabled" mode was selected.  Arrange for this to work as one
would expect.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/armada/armada_hw.h      |  1 +
 drivers/gpu/drm/armada/armada_overlay.c | 30 ++++++++++++++++++-------
 2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/armada/armada_hw.h b/drivers/gpu/drm/armada/armada_hw.h
index 27319a8335e2..345dc4d0851e 100644
--- a/drivers/gpu/drm/armada/armada_hw.h
+++ b/drivers/gpu/drm/armada/armada_hw.h
@@ -160,6 +160,7 @@ enum {
 	CFG_ALPHAM_GRA		= 0x1 << 16,
 	CFG_ALPHAM_CFG		= 0x2 << 16,
 	CFG_ALPHA_MASK		= 0xff << 8,
+#define CFG_ALPHA(x)		((x) << 8)
 	CFG_PIXCMD_MASK		= 0xff,
 };
 
diff --git a/drivers/gpu/drm/armada/armada_overlay.c b/drivers/gpu/drm/armada/armada_overlay.c
index 5c22b380f8f3..f8a69ec63550 100644
--- a/drivers/gpu/drm/armada/armada_overlay.c
+++ b/drivers/gpu/drm/armada/armada_overlay.c
@@ -27,6 +27,7 @@ struct armada_ovl_plane_properties {
 	uint16_t contrast;
 	uint16_t saturation;
 	uint32_t colorkey_mode;
+	uint32_t colorkey_enable;
 };
 
 struct armada_ovl_plane {
@@ -62,11 +63,13 @@ armada_ovl_update_attr(struct armada_ovl_plane_properties *prop,
 	writel_relaxed(0x00002000, dcrtc->base + LCD_SPU_CBSH_HUE);
 
 	spin_lock_irq(&dcrtc->irq_lock);
-	armada_updatel(prop->colorkey_mode | CFG_ALPHAM_GRA,
-		     CFG_CKMODE_MASK | CFG_ALPHAM_MASK | CFG_ALPHA_MASK,
-		     dcrtc->base + LCD_SPU_DMA_CTRL1);
-
-	armada_updatel(ADV_GRACOLORKEY, 0, dcrtc->base + LCD_SPU_ADV_REG);
+	armada_updatel(prop->colorkey_mode,
+		       CFG_CKMODE_MASK | CFG_ALPHAM_MASK | CFG_ALPHA_MASK,
+		       dcrtc->base + LCD_SPU_DMA_CTRL1);
+	if (dcrtc->variant->has_spu_adv_reg)
+		armada_updatel(prop->colorkey_enable,
+			       ADV_GRACOLORKEY | ADV_VIDCOLORKEY,
+			       dcrtc->base + LCD_SPU_ADV_REG);
 	spin_unlock_irq(&dcrtc->irq_lock);
 }
 
@@ -339,8 +342,17 @@ static int armada_ovl_plane_set_property(struct drm_plane *plane,
 		dplane->prop.colorkey_vb |= K2B(val);
 		update_attr = true;
 	} else if (property == priv->colorkey_mode_prop) {
-		dplane->prop.colorkey_mode &= ~CFG_CKMODE_MASK;
-		dplane->prop.colorkey_mode |= CFG_CKMODE(val);
+		if (val == CKMODE_DISABLE) {
+			dplane->prop.colorkey_mode =
+				CFG_CKMODE(CKMODE_DISABLE) |
+				CFG_ALPHAM_CFG | CFG_ALPHA(255);
+			dplane->prop.colorkey_enable = 0;
+		} else {
+			dplane->prop.colorkey_mode =
+				CFG_CKMODE(val) |
+				CFG_ALPHAM_GRA | CFG_ALPHA(0);
+			dplane->prop.colorkey_enable = ADV_GRACOLORKEY;
+		}
 		update_attr = true;
 	} else if (property == priv->brightness_prop) {
 		dplane->prop.brightness = val - 256;
@@ -469,7 +481,9 @@ int armada_overlay_plane_create(struct drm_device *dev, unsigned long crtcs)
 	dplane->prop.colorkey_yr = 0xfefefe00;
 	dplane->prop.colorkey_ug = 0x01010100;
 	dplane->prop.colorkey_vb = 0x01010100;
-	dplane->prop.colorkey_mode = CFG_CKMODE(CKMODE_RGB);
+	dplane->prop.colorkey_mode = CFG_CKMODE(CKMODE_RGB) |
+				     CFG_ALPHAM_GRA | CFG_ALPHA(0);
+	dplane->prop.colorkey_enable = ADV_GRACOLORKEY;
 	dplane->prop.brightness = 0;
 	dplane->prop.contrast = 0x4000;
 	dplane->prop.saturation = 0x4000;

From 149751b516c07eb15f9378bbed175d23589b6215 Mon Sep 17 00:00:00 2001
From: Vikas Gupta <vikas.gupta@broadcom.com>
Date: Mon, 9 Jul 2018 02:24:52 -0400
Subject: [PATCH 403/519] bnxt_en: Fix for system hang if request_irq fails

[ Upstream commit c58387ab1614f6d7fb9e244f214b61e7631421fc ]

Fix bug in the error code path when bnxt_request_irq() returns failure.
bnxt_disable_napi() should not be called in this error path because
NAPI has not been enabled yet.

Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.")
Signed-off-by: Vikas Gupta <vikas.gupta@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 9904d768a20a..4ffacafddacb 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -4591,7 +4591,7 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
 		rc = bnxt_request_irq(bp);
 		if (rc) {
 			netdev_err(bp->dev, "bnxt_request_irq err: %x\n", rc);
-			goto open_err;
+			goto open_err_irq;
 		}
 	}
 
@@ -4629,6 +4629,8 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
 
 open_err:
 	bnxt_disable_napi(bp);
+
+open_err_irq:
 	bnxt_del_napi(bp);
 
 open_err_free_mem:

From c0cd6f4de95a8fee74131bac79c444f8120c93e9 Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@arm.com>
Date: Fri, 29 Jun 2018 12:46:52 -0500
Subject: [PATCH 404/519] perf llvm-utils: Remove bashism from kernel include
 fetch script

[ Upstream commit f6432b9f65001651412dbc3589d251534822d4ab ]

Like system(), popen() calls /bin/sh, which may/may not be bash.

Script when run on dash and encounters the line, yields:

 exit: Illegal number: -1

checkbashisms report on script content:

 possible bashism (exit|return with negative status code):
 exit -1

Remove the bashism and use the more portable non-zero failure
status code 1.

Signed-off-by: Kim Phillips <kim.phillips@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sandipan Das <sandipan@linux.vnet.ibm.com>
Cc: Thomas Richter <tmricht@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/20180629124652.8d0af7e2281fd3fd8262cacc@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/util/llvm-utils.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
index 00724d496d38..62f6d7dc2dda 100644
--- a/tools/perf/util/llvm-utils.c
+++ b/tools/perf/util/llvm-utils.c
@@ -254,16 +254,16 @@ static const char *kinc_fetch_script =
 "#!/usr/bin/env sh\n"
 "if ! test -d \"$KBUILD_DIR\"\n"
 "then\n"
-"	exit -1\n"
+"	exit 1\n"
 "fi\n"
 "if ! test -f \"$KBUILD_DIR/include/generated/autoconf.h\"\n"
 "then\n"
-"	exit -1\n"
+"	exit 1\n"
 "fi\n"
 "TMPDIR=`mktemp -d`\n"
 "if test -z \"$TMPDIR\"\n"
 "then\n"
-"    exit -1\n"
+"    exit 1\n"
 "fi\n"
 "cat << EOF > $TMPDIR/Makefile\n"
 "obj-y := dummy.o\n"

From 97d53c81980eaba74690868efd3160fb635b8d42 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Tue, 10 Jul 2018 08:22:40 +0100
Subject: [PATCH 405/519] ARM: 8780/1: ftrace: Only set kernel memory back to
 read-only after boot

[ Upstream commit b4c7e2bd2eb4764afe3af9409ff3b1b87116fa30 ]

Dynamic ftrace requires modifying the code segments that are usually
set to read-only. To do this, a per arch function is called both before
and after the ftrace modifications are performed. The "before" function
will set kernel code text to read-write to allow for ftrace to make the
modifications, and the "after" function will set the kernel code text
back to "read-only" to keep the kernel code text protected.

The issue happens when dynamic ftrace is tested at boot up. The test is
done before the kernel code text has been set to read-only. But the
"before" and "after" calls are still performed. The "after" call will
change the kernel code text to read-only prematurely, and other boot
code that expects this code to be read-write will fail.

The solution is to add a variable that is set when the kernel code text
is expected to be converted to read-only, and make the ftrace "before"
and "after" calls do nothing if that variable is not yet set. This is
similar to the x86 solution from commit 162396309745 ("ftrace, x86:
make kernel text writable only for conversions").

Link: http://lkml.kernel.org/r/20180620212906.24b7b66e@vmware.local.home

Reported-by: Stefan Agner <stefan@agner.ch>
Tested-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mm/init.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index c29ad610311b..a9f6705aea23 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -716,19 +716,28 @@ int __mark_rodata_ro(void *unused)
 	return 0;
 }
 
+static int kernel_set_to_readonly __read_mostly;
+
 void mark_rodata_ro(void)
 {
+	kernel_set_to_readonly = 1;
 	stop_machine(__mark_rodata_ro, NULL, NULL);
 }
 
 void set_kernel_text_rw(void)
 {
+	if (!kernel_set_to_readonly)
+		return;
+
 	set_section_perms(ro_perms, ARRAY_SIZE(ro_perms), false,
 				current->active_mm);
 }
 
 void set_kernel_text_ro(void)
 {
+	if (!kernel_set_to_readonly)
+		return;
+
 	set_section_perms(ro_perms, ARRAY_SIZE(ro_perms), true,
 				current->active_mm);
 }

From bcfa7262bbc0cf7b39ac112ae2ece9f9310ae4d9 Mon Sep 17 00:00:00 2001
From: Adam Ford <aford173@gmail.com>
Date: Wed, 11 Jul 2018 12:54:54 -0500
Subject: [PATCH 406/519] ARM: dts: am3517.dtsi: Disable reference to OMAP3 OTG
 controller

[ Upstream commit 923847413f7316b5ced3491769b3fefa6c56a79a ]

The AM3517 has a different OTG controller location than the OMAP3,
which is included from omap3.dtsi.  This results in a hwmod error.
Since the AM3517 has a different OTG controller address, this patch
disabes one that is isn't available.

Signed-off-by: Adam Ford <aford173@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/am3517.dtsi | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/arm/boot/dts/am3517.dtsi b/arch/arm/boot/dts/am3517.dtsi
index 5e3f5e86ffcf..cfcbf5baba4f 100644
--- a/arch/arm/boot/dts/am3517.dtsi
+++ b/arch/arm/boot/dts/am3517.dtsi
@@ -74,6 +74,11 @@ omap3_pmx_core2: pinmux@480025d8 {
 	};
 };
 
+/* Table Table 5-79 of the TRM shows 480ab000 is reserved */
+&usb_otg_hs {
+	status = "disabled";
+};
+
 &iva {
 	status = "disabled";
 };

From 8ab85f3dc1b45f9189b62c97c82c7e6e1a3de569 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Mon, 18 Jun 2018 12:02:00 -0400
Subject: [PATCH 407/519] ixgbe: Be more careful when modifying MAC filters

[ Upstream commit d14c780c11fbc10f66c43e7b64eefe87ca442bd3 ]

This change makes it so that we are much more explicit about the ordering
of updates to the receive address register (RAR) table. Prior to this patch
I believe we may have been updating the table while entries were still
active, or possibly allowing for reordering of things since we weren't
explicitly flushing writes to either the lower or upper portion of the
register prior to accessing the other half.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Reviewed-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 105dd00ddc1a..cd2afe92f1da 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -1814,7 +1814,12 @@ s32 ixgbe_set_rar_generic(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq,
 	if (enable_addr != 0)
 		rar_high |= IXGBE_RAH_AV;
 
+	/* Record lower 32 bits of MAC address and then make
+	 * sure that write is flushed to hardware before writing
+	 * the upper 16 bits and setting the valid bit.
+	 */
 	IXGBE_WRITE_REG(hw, IXGBE_RAL(index), rar_low);
+	IXGBE_WRITE_FLUSH(hw);
 	IXGBE_WRITE_REG(hw, IXGBE_RAH(index), rar_high);
 
 	return 0;
@@ -1846,8 +1851,13 @@ s32 ixgbe_clear_rar_generic(struct ixgbe_hw *hw, u32 index)
 	rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(index));
 	rar_high &= ~(0x0000FFFF | IXGBE_RAH_AV);
 
-	IXGBE_WRITE_REG(hw, IXGBE_RAL(index), 0);
+	/* Clear the address valid bit and upper 16 bits of the address
+	 * before clearing the lower bits. This way we aren't updating
+	 * a live filter.
+	 */
 	IXGBE_WRITE_REG(hw, IXGBE_RAH(index), rar_high);
+	IXGBE_WRITE_FLUSH(hw);
+	IXGBE_WRITE_REG(hw, IXGBE_RAL(index), 0);
 
 	/* clear VMDq pool/queue selection for this RAR */
 	hw->mac.ops.clear_vmdq(hw, index, IXGBE_CLEAR_VMDQ_ALL);

From 01a8ef2f327a6fe5075ee5027c9fa02df42c1c4e Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Wed, 11 Jul 2018 12:00:45 -0400
Subject: [PATCH 408/519] packet: reset network header if packet shorter than
 ll reserved space

[ Upstream commit 993675a3100b16a4c80dfd70cbcde8ea7127b31d ]

If variable length link layer headers result in a packet shorter
than dev->hard_header_len, reset the network header offset. Else
skb->mac_len may exceed skb->len after skb_mac_reset_len.

packet_sendmsg_spkt already has similar logic.

Fixes: b84bbaf7a6c8 ("packet: in packet_snd start writing at link layer allocation")
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/packet/af_packet.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 3a63f33698d3..8de4e6620c1c 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2780,6 +2780,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 			goto out_free;
 	} else if (reserve) {
 		skb_reserve(skb, -reserve);
+		if (len < reserve)
+			skb_reset_network_header(skb);
 	}
 
 	/* Returns -EFAULT on error */

From 7795ce1182d5317688750126958954e5d32e3eac Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 12 Jul 2018 15:23:45 +0300
Subject: [PATCH 409/519] qlogic: check kstrtoul() for errors

[ Upstream commit 5fc853cc01c68f84984ecc2d5fd777ecad78240f ]

We accidentally left out the error handling for kstrtoul().

Fixes: a520030e326a ("qlcnic: Implement flash sysfs callback for 83xx adapter")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
index ccbb04503b27..b53a18e365c2 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
@@ -1128,6 +1128,8 @@ static ssize_t qlcnic_83xx_sysfs_flash_write_handler(struct file *filp,
 	struct qlcnic_adapter *adapter = dev_get_drvdata(dev);
 
 	ret = kstrtoul(buf, 16, &data);
+	if (ret)
+		return ret;
 
 	switch (data) {
 	case QLC_83XX_FLASH_SECTOR_ERASE_CMD:

From 43707aa8c55fb165a1a56f590e0defb198ebdde9 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Thu, 12 Jul 2018 06:04:53 -0700
Subject: [PATCH 410/519] tcp: remove DELAYED ACK events in DCTCP

[ Upstream commit a69258f7aa2623e0930212f09c586fd06674ad79 ]

After fixing the way DCTCP tracking delayed ACKs, the delayed-ACK
related callbacks are no longer needed

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Lawrence Brakmo <brakmo@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/tcp.h     |  2 --
 net/ipv4/tcp_dctcp.c  | 25 -------------------------
 net/ipv4/tcp_output.c |  4 ----
 3 files changed, 31 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index cac4a6ad5db3..6c89238f192e 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -821,8 +821,6 @@ enum tcp_ca_event {
 	CA_EVENT_LOSS,		/* loss timeout */
 	CA_EVENT_ECN_NO_CE,	/* ECT set, but not CE marked */
 	CA_EVENT_ECN_IS_CE,	/* received CE marked IP packet */
-	CA_EVENT_DELAYED_ACK,	/* Delayed ack is sent */
-	CA_EVENT_NON_DELAYED_ACK,
 };
 
 /* Information about inbound ACK, passed to cong_ops->in_ack_event() */
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 6300edf90e60..62f90f6b7a9d 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -55,7 +55,6 @@ struct dctcp {
 	u32 dctcp_alpha;
 	u32 next_seq;
 	u32 ce_state;
-	u32 delayed_ack_reserved;
 	u32 loss_cwnd;
 };
 
@@ -96,7 +95,6 @@ static void dctcp_init(struct sock *sk)
 
 		ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
 
-		ca->delayed_ack_reserved = 0;
 		ca->loss_cwnd = 0;
 		ca->ce_state = 0;
 
@@ -230,25 +228,6 @@ static void dctcp_state(struct sock *sk, u8 new_state)
 	}
 }
 
-static void dctcp_update_ack_reserved(struct sock *sk, enum tcp_ca_event ev)
-{
-	struct dctcp *ca = inet_csk_ca(sk);
-
-	switch (ev) {
-	case CA_EVENT_DELAYED_ACK:
-		if (!ca->delayed_ack_reserved)
-			ca->delayed_ack_reserved = 1;
-		break;
-	case CA_EVENT_NON_DELAYED_ACK:
-		if (ca->delayed_ack_reserved)
-			ca->delayed_ack_reserved = 0;
-		break;
-	default:
-		/* Don't care for the rest. */
-		break;
-	}
-}
-
 static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
 {
 	switch (ev) {
@@ -258,10 +237,6 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
 	case CA_EVENT_ECN_NO_CE:
 		dctcp_ce_state_1_to_0(sk);
 		break;
-	case CA_EVENT_DELAYED_ACK:
-	case CA_EVENT_NON_DELAYED_ACK:
-		dctcp_update_ack_reserved(sk, ev);
-		break;
 	default:
 		/* Don't care for the rest. */
 		break;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 6fa749ce231f..2d3c9df8d75c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3316,8 +3316,6 @@ void tcp_send_delayed_ack(struct sock *sk)
 	int ato = icsk->icsk_ack.ato;
 	unsigned long timeout;
 
-	tcp_ca_event(sk, CA_EVENT_DELAYED_ACK);
-
 	if (ato > TCP_DELACK_MIN) {
 		const struct tcp_sock *tp = tcp_sk(sk);
 		int max_ato = HZ / 2;
@@ -3374,8 +3372,6 @@ void __tcp_send_ack(struct sock *sk, u32 rcv_nxt)
 	if (sk->sk_state == TCP_CLOSE)
 		return;
 
-	tcp_ca_event(sk, CA_EVENT_NON_DELAYED_ACK);
-
 	/* We are not putting this on the write queue, so
 	 * tcp_transmit_skb() will set the ownership to this
 	 * sock.

From 9c8f268dcdd5d3dacf504873861b9f18c70021b0 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 3 Jul 2018 15:30:56 +0300
Subject: [PATCH 411/519] drm/nouveau/gem: off by one bugs in
 nouveau_gem_pushbuf_reloc_apply()

[ Upstream commit 7f073d011f93e92d4d225526b9ab6b8b0bbd6613 ]

The bo array has req->nr_buffers elements so the > should be >= so we
don't read beyond the end of the array.

Fixes: a1606a9596e5 ("drm/nouveau: new gem pushbuf interface, bump to 0.0.16")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/nouveau/nouveau_gem.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 495c279da200..ae560f5977fc 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -602,7 +602,7 @@ nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli,
 		struct nouveau_bo *nvbo;
 		uint32_t data;
 
-		if (unlikely(r->bo_index > req->nr_buffers)) {
+		if (unlikely(r->bo_index >= req->nr_buffers)) {
 			NV_PRINTK(err, cli, "reloc bo index invalid\n");
 			ret = -EINVAL;
 			break;
@@ -612,7 +612,7 @@ nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli,
 		if (b->presumed.valid)
 			continue;
 
-		if (unlikely(r->reloc_bo_index > req->nr_buffers)) {
+		if (unlikely(r->reloc_bo_index >= req->nr_buffers)) {
 			NV_PRINTK(err, cli, "reloc container bo index invalid\n");
 			ret = -EINVAL;
 			break;

From 0821ddad494b97f0980db1877c4417e7d45c4925 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 13 Jul 2018 21:25:19 -0700
Subject: [PATCH 412/519] net/ethernet/freescale/fman: fix cross-build error

[ Upstream commit c133459765fae249ba482f62e12f987aec4376f0 ]

  CC [M]  drivers/net/ethernet/freescale/fman/fman.o
In file included from ../drivers/net/ethernet/freescale/fman/fman.c:35:
../include/linux/fsl/guts.h: In function 'guts_set_dmacr':
../include/linux/fsl/guts.h:165:2: error: implicit declaration of function 'clrsetbits_be32' [-Werror=implicit-function-declaration]
  clrsetbits_be32(&guts->dmacr, 3 << shift, device << shift);
  ^~~~~~~~~~~~~~~

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Madalin Bucur <madalin.bucur@nxp.com>
Cc: netdev@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/fsl/guts.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/fsl/guts.h b/include/linux/fsl/guts.h
index 84d971ff3fba..5d06e838e650 100644
--- a/include/linux/fsl/guts.h
+++ b/include/linux/fsl/guts.h
@@ -16,6 +16,7 @@
 #define __FSL_GUTS_H__
 
 #include <linux/types.h>
+#include <linux/io.h>
 
 /**
  * Global Utility Registers.

From 8cfe6f3afe83a2768563f718bb57c99ca249cf4c Mon Sep 17 00:00:00 2001
From: David Lechner <david@lechnology.com>
Date: Mon, 16 Jul 2018 17:58:10 -0500
Subject: [PATCH 413/519] net: usb: rtl8150: demote allmulti message to
 dev_dbg()

[ Upstream commit 3a9b0455062ffb9d2f6cd4473a76e3456f318c9f ]

This driver can spam the kernel log with multiple messages of:

    net eth0: eth0: allmulti set

Usually 4 or 8 at a time (probably because of using ConnMan).

This message doesn't seem useful, so let's demote it from dev_info()
to dev_dbg().

Signed-off-by: David Lechner <david@lechnology.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/usb/rtl8150.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c
index 39672984dde1..58b1e18fdd64 100644
--- a/drivers/net/usb/rtl8150.c
+++ b/drivers/net/usb/rtl8150.c
@@ -681,7 +681,7 @@ static void rtl8150_set_multicast(struct net_device *netdev)
 		   (netdev->flags & IFF_ALLMULTI)) {
 		rx_creg &= 0xfffe;
 		rx_creg |= 0x0002;
-		dev_info(&netdev->dev, "%s: allmulti set\n", netdev->name);
+		dev_dbg(&netdev->dev, "%s: allmulti set\n", netdev->name);
 	} else {
 		/* ~RX_MULTICAST, ~RX_PROMISCUOUS */
 		rx_creg &= 0x00fc;

From 8621e69878ba41ed24987a487eaf01a6505223c6 Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Wed, 18 Jul 2018 08:31:43 +0200
Subject: [PATCH 414/519] net: qca_spi: Avoid packet drop during initial sync

[ Upstream commit b2bab426dc715de147f8039a3fccff27d795f4eb ]

As long as the synchronization with the QCA7000 isn't finished, we
cannot accept packets from the upper layers. So let the SPI thread
enable the TX queue after sync and avoid unwanted packet drop.

Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for QCA7000")
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/qualcomm/qca_spi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index c90ae4d4be7d..7b903af19e57 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -635,7 +635,7 @@ qcaspi_netdev_open(struct net_device *dev)
 		return ret;
 	}
 
-	netif_start_queue(qca->net_dev);
+	/* SPI thread takes care of TX queue */
 
 	return 0;
 }

From e77b1523b93cbc8863cfe656ca0c9e82f7ba43c9 Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Wed, 18 Jul 2018 08:31:44 +0200
Subject: [PATCH 415/519] net: qca_spi: Make sure the QCA7000 reset is
 triggered

[ Upstream commit 711c62dfa6bdb4326ca6c587f295ea5c4f7269de ]

In case the SPI thread is not running, a simple reset of sync
state won't fix the transmit timeout. We also need to wake up the kernel
thread.

Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Fixes: ed7d42e24eff ("net: qca_spi: fix transmit queue timeout handling")
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/qualcomm/qca_spi.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index 7b903af19e57..25f673059c65 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -739,6 +739,9 @@ qcaspi_netdev_tx_timeout(struct net_device *dev)
 	qca->net_dev->stats.tx_errors++;
 	/* Trigger tx queue flush and QCA7000 reset */
 	qca->sync = QCASPI_SYNC_UNKNOWN;
+
+	if (qca->spi_thread)
+		wake_up_process(qca->spi_thread);
 }
 
 static int

From 780e559aaa6ae4b184d9af4acd0754f8608b3715 Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Wed, 18 Jul 2018 08:31:45 +0200
Subject: [PATCH 416/519] net: qca_spi: Fix log level if probe fails

[ Upstream commit 50973993260a6934f0a00da53d9b746cfbea89ab ]

In cases the probing fails the log level of the messages should
be an error.

Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/qualcomm/qca_spi.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index 25f673059c65..7886a8a5b55b 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -868,22 +868,22 @@ qca_spi_probe(struct spi_device *spi)
 
 	if ((qcaspi_clkspeed < QCASPI_CLK_SPEED_MIN) ||
 	    (qcaspi_clkspeed > QCASPI_CLK_SPEED_MAX)) {
-		dev_info(&spi->dev, "Invalid clkspeed: %d\n",
-			 qcaspi_clkspeed);
+		dev_err(&spi->dev, "Invalid clkspeed: %d\n",
+			qcaspi_clkspeed);
 		return -EINVAL;
 	}
 
 	if ((qcaspi_burst_len < QCASPI_BURST_LEN_MIN) ||
 	    (qcaspi_burst_len > QCASPI_BURST_LEN_MAX)) {
-		dev_info(&spi->dev, "Invalid burst len: %d\n",
-			 qcaspi_burst_len);
+		dev_err(&spi->dev, "Invalid burst len: %d\n",
+			qcaspi_burst_len);
 		return -EINVAL;
 	}
 
 	if ((qcaspi_pluggable < QCASPI_PLUGGABLE_MIN) ||
 	    (qcaspi_pluggable > QCASPI_PLUGGABLE_MAX)) {
-		dev_info(&spi->dev, "Invalid pluggable: %d\n",
-			 qcaspi_pluggable);
+		dev_err(&spi->dev, "Invalid pluggable: %d\n",
+			qcaspi_pluggable);
 		return -EINVAL;
 	}
 
@@ -944,8 +944,8 @@ qca_spi_probe(struct spi_device *spi)
 	}
 
 	if (register_netdev(qcaspi_devs)) {
-		dev_info(&spi->dev, "Unable to register net device %s\n",
-			 qcaspi_devs->name);
+		dev_err(&spi->dev, "Unable to register net device %s\n",
+			qcaspi_devs->name);
 		free_netdev(qcaspi_devs);
 		return -EFAULT;
 	}

From 81970da69122fe4bf2af5bb1bb4c7f62d4744e79 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 17 Jul 2018 18:27:45 -0700
Subject: [PATCH 417/519] tcp: identify cryptic messages as TCP seq # bugs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit e56b8ce363a36fb7b74b80aaa5cc9084f2c908b4 ]

Attempt to make cryptic TCP seq number error messages clearer by
(1) identifying the source of the message as "TCP", (2) identifying the
errors as "seq # bug", and (3) grouping the field identifiers and values
by separating them with commas.

E.g., the following message is changed from:

recvmsg bug 2: copied 73BCB6CD seq 70F17CBE rcvnxt 73BCB9AA fl 0
WARNING: CPU: 2 PID: 1501 at /linux/net/ipv4/tcp.c:1881 tcp_recvmsg+0x649/0xb90

to:

TCP recvmsg seq # bug 2: copied 73BCB6CD, seq 70F17CBE, rcvnxt 73BCB9AA, fl 0
WARNING: CPU: 2 PID: 1501 at /linux/net/ipv4/tcp.c:2011 tcp_recvmsg+0x694/0xba0

Suggested-by: 積丹尼 Dan Jacobson <jidanni@jidanni.org>
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a0f0a7db946b..5e162b8ab184 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1659,7 +1659,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 			 * shouldn't happen.
 			 */
 			if (WARN(before(*seq, TCP_SKB_CB(skb)->seq),
-				 "recvmsg bug: copied %X seq %X rcvnxt %X fl %X\n",
+				 "TCP recvmsg seq # bug: copied %X, seq %X, rcvnxt %X, fl %X\n",
 				 *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt,
 				 flags))
 				break;
@@ -1672,7 +1672,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 			if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
 				goto found_fin_ok;
 			WARN(!(flags & MSG_PEEK),
-			     "recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n",
+			     "TCP recvmsg seq # bug 2: copied %X, seq %X, rcvnxt %X, fl %X\n",
 			     *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags);
 		}
 

From b84ec04bae905901f5226a67968dabc52ab0c3a6 Mon Sep 17 00:00:00 2001
From: Daniel Rosenberg <drosen@google.com>
Date: Tue, 21 Aug 2018 13:31:50 -0700
Subject: [PATCH 418/519] staging: android: ion: check for kref overflow

This patch is against 4.4. It does not apply to master due to a large
rework of ion in 4.12 which removed the affected functions altogther.
4c23cbff073f3b9b ("staging: android: ion: Remove import interface")

Userspace can cause the kref to handles to increment
arbitrarily high. Ensure it does not overflow.

Signed-off-by: Daniel Rosenberg <drosen@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/android/ion/ion.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index 374f840f31a4..47cb163da9a0 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -15,6 +15,7 @@
  *
  */
 
+#include <linux/atomic.h>
 #include <linux/device.h>
 #include <linux/err.h>
 #include <linux/file.h>
@@ -387,6 +388,16 @@ static void ion_handle_get(struct ion_handle *handle)
 	kref_get(&handle->ref);
 }
 
+/* Must hold the client lock */
+static struct ion_handle *ion_handle_get_check_overflow(
+					struct ion_handle *handle)
+{
+	if (atomic_read(&handle->ref.refcount) + 1 == 0)
+		return ERR_PTR(-EOVERFLOW);
+	ion_handle_get(handle);
+	return handle;
+}
+
 static int ion_handle_put_nolock(struct ion_handle *handle)
 {
 	int ret;
@@ -433,9 +444,9 @@ static struct ion_handle *ion_handle_get_by_id_nolock(struct ion_client *client,
 
 	handle = idr_find(&client->idr, id);
 	if (handle)
-		ion_handle_get(handle);
+		return ion_handle_get_check_overflow(handle);
 
-	return handle ? handle : ERR_PTR(-EINVAL);
+	return ERR_PTR(-EINVAL);
 }
 
 struct ion_handle *ion_handle_get_by_id(struct ion_client *client,
@@ -1202,7 +1213,7 @@ struct ion_handle *ion_import_dma_buf(struct ion_client *client, int fd)
 	/* if a handle exists for this buffer just take a reference to it */
 	handle = ion_handle_lookup(client, buffer);
 	if (!IS_ERR(handle)) {
-		ion_handle_get(handle);
+		handle = ion_handle_get_check_overflow(handle);
 		mutex_unlock(&client->lock);
 		goto end;
 	}

From 1186a6ea75df00ec27b9cf2c5d0a5e4298739301 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 28 May 2018 13:31:13 +0200
Subject: [PATCH 419/519] KVM: irqfd: fix race between EPOLLHUP and
 irq_bypass_register_consumer

commit 9432a3175770e06cb83eada2d91fac90c977cb99 upstream.

A comment warning against this bug is there, but the code is not doing what
the comment says.  Therefore it is possible that an EPOLLHUP races against
irq_bypass_register_consumer.  The EPOLLHUP handler schedules irqfd_shutdown,
and if that runs soon enough, you get a use-after-free.

Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 virt/kvm/eventfd.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 1203829316b2..f509cfd37db5 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -405,11 +405,6 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 	if (events & POLLIN)
 		schedule_work(&irqfd->inject);
 
-	/*
-	 * do not drop the file until the irqfd is fully initialized, otherwise
-	 * we might race against the POLLHUP
-	 */
-	fdput(f);
 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
 	irqfd->consumer.token = (void *)irqfd->eventfd;
 	irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer;
@@ -423,6 +418,12 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 #endif
 
 	srcu_read_unlock(&kvm->irq_srcu, idx);
+
+	/*
+	 * do not drop the file until the irqfd is fully initialized, otherwise
+	 * we might race against the POLLHUP
+	 */
+	fdput(f);
 	return 0;
 
 fail:

From a89f83823b97b6da1ecf7a51184b28822e78cc07 Mon Sep 17 00:00:00 2001
From: Jeremy Cline <jcline@redhat.com>
Date: Thu, 2 Aug 2018 00:03:40 -0400
Subject: [PATCH 420/519] ext4: fix spectre gadget in
 ext4_mb_regular_allocator()

commit 1a5d5e5d51e75a5bca67dadbcea8c841934b7b85 upstream.

'ac->ac_g_ex.fe_len' is a user-controlled value which is used in the
derivation of 'ac->ac_2order'. 'ac->ac_2order', in turn, is used to
index arrays which makes it a potential spectre gadget. Fix this by
sanitizing the value assigned to 'ac->ac2_order'.  This covers the
following accesses found with the help of smatch:

* fs/ext4/mballoc.c:1896 ext4_mb_simple_scan_group() warn: potential
  spectre issue 'grp->bb_counters' [w] (local cap)

* fs/ext4/mballoc.c:445 mb_find_buddy() warn: potential spectre issue
  'EXT4_SB(e4b->bd_sb)->s_mb_offsets' [r] (local cap)

* fs/ext4/mballoc.c:446 mb_find_buddy() warn: potential spectre issue
  'EXT4_SB(e4b->bd_sb)->s_mb_maxs' [r] (local cap)

Suggested-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Jeremy Cline <jcline@redhat.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/mballoc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 75f79ff29ce0..828b4c080c38 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -26,6 +26,7 @@
 #include <linux/log2.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/nospec.h>
 #include <linux/backing-dev.h>
 #include <trace/events/ext4.h>
 
@@ -2144,7 +2145,8 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 		 * This should tell if fe_len is exactly power of 2
 		 */
 		if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
-			ac->ac_2order = i - 1;
+			ac->ac_2order = array_index_nospec(i - 1,
+							   sb->s_blocksize_bits + 2);
 	}
 
 	/* if stream allocation is enabled, use global goal */

From 49b3acf7ed1997af70ab95d95995eb2a1a6fdf93 Mon Sep 17 00:00:00 2001
From: John David Anglin <dave.anglin@bell.net>
Date: Sun, 12 Aug 2018 16:38:03 -0400
Subject: [PATCH 421/519] parisc: Remove ordered stores from syscall.S
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 7797167ffde1f00446301cb22b37b7c03194cfaf upstream.

Now that we use a sync prior to releasing the locks in syscall.S, we don't need
the PA 2.0 ordered stores used to release some locks.  Using an ordered store,
potentially slows the release and subsequent code.

There are a number of other ordered stores and loads that serve no purpose.  I
have converted these to normal stores.

Signed-off-by: John David Anglin <dave.anglin@bell.net>
Cc: stable@vger.kernel.org # 4.0+
Signed-off-by: Helge Deller <deller@gmx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/parisc/kernel/syscall.S | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index f68eedc72484..dd44022c3ae3 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -627,12 +627,12 @@ cas_action:
 	stw	%r1, 4(%sr2,%r20)
 #endif
 	/* The load and store could fail */
-1:	ldw,ma	0(%r26), %r28
+1:	ldw	0(%r26), %r28
 	sub,<>	%r28, %r25, %r0
-2:	stw,ma	%r24, 0(%r26)
+2:	stw	%r24, 0(%r26)
 	/* Free lock */
 	sync
-	stw,ma	%r20, 0(%sr2,%r20)
+	stw	%r20, 0(%sr2,%r20)
 #if ENABLE_LWS_DEBUG
 	/* Clear thread register indicator */
 	stw	%r0, 4(%sr2,%r20)
@@ -796,30 +796,30 @@ cas2_action:
 	ldo	1(%r0),%r28
 
 	/* 8bit CAS */
-13:	ldb,ma	0(%r26), %r29
+13:	ldb	0(%r26), %r29
 	sub,=	%r29, %r25, %r0
 	b,n	cas2_end
-14:	stb,ma	%r24, 0(%r26)
+14:	stb	%r24, 0(%r26)
 	b	cas2_end
 	copy	%r0, %r28
 	nop
 	nop
 
 	/* 16bit CAS */
-15:	ldh,ma	0(%r26), %r29
+15:	ldh	0(%r26), %r29
 	sub,=	%r29, %r25, %r0
 	b,n	cas2_end
-16:	sth,ma	%r24, 0(%r26)
+16:	sth	%r24, 0(%r26)
 	b	cas2_end
 	copy	%r0, %r28
 	nop
 	nop
 
 	/* 32bit CAS */
-17:	ldw,ma	0(%r26), %r29
+17:	ldw	0(%r26), %r29
 	sub,=	%r29, %r25, %r0
 	b,n	cas2_end
-18:	stw,ma	%r24, 0(%r26)
+18:	stw	%r24, 0(%r26)
 	b	cas2_end
 	copy	%r0, %r28
 	nop
@@ -827,10 +827,10 @@ cas2_action:
 
 	/* 64bit CAS */
 #ifdef CONFIG_64BIT
-19:	ldd,ma	0(%r26), %r29
+19:	ldd	0(%r26), %r29
 	sub,*=	%r29, %r25, %r0
 	b,n	cas2_end
-20:	std,ma	%r24, 0(%r26)
+20:	std	%r24, 0(%r26)
 	copy	%r0, %r28
 #else
 	/* Compare first word */
@@ -849,7 +849,7 @@ cas2_action:
 cas2_end:
 	/* Free lock */
 	sync
-	stw,ma	%r20, 0(%sr2,%r20)
+	stw	%r20, 0(%sr2,%r20)
 	/* Enable interrupts */
 	ssm	PSW_SM_I, %r0
 	/* Return to userspace, set no error */

From 3e6170d014af6d3e9608987a0dee6e7f01c074b3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 18 Jun 2018 21:35:07 -0700
Subject: [PATCH 422/519] xfrm_user: prevent leaking 2 bytes of kernel memory

commit 45c180bc29babbedd6b8c01b975780ef44d9d09c upstream.

struct xfrm_userpolicy_type has two holes, so we should not
use C99 style initializer.

KMSAN report:

BUG: KMSAN: kernel-infoleak in copyout lib/iov_iter.c:140 [inline]
BUG: KMSAN: kernel-infoleak in _copy_to_iter+0x1b14/0x2800 lib/iov_iter.c:571
CPU: 1 PID: 4520 Comm: syz-executor841 Not tainted 4.17.0+ #5
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x185/0x1d0 lib/dump_stack.c:113
 kmsan_report+0x188/0x2a0 mm/kmsan/kmsan.c:1117
 kmsan_internal_check_memory+0x138/0x1f0 mm/kmsan/kmsan.c:1211
 kmsan_copy_to_user+0x7a/0x160 mm/kmsan/kmsan.c:1253
 copyout lib/iov_iter.c:140 [inline]
 _copy_to_iter+0x1b14/0x2800 lib/iov_iter.c:571
 copy_to_iter include/linux/uio.h:106 [inline]
 skb_copy_datagram_iter+0x422/0xfa0 net/core/datagram.c:431
 skb_copy_datagram_msg include/linux/skbuff.h:3268 [inline]
 netlink_recvmsg+0x6f1/0x1900 net/netlink/af_netlink.c:1959
 sock_recvmsg_nosec net/socket.c:802 [inline]
 sock_recvmsg+0x1d6/0x230 net/socket.c:809
 ___sys_recvmsg+0x3fe/0x810 net/socket.c:2279
 __sys_recvmmsg+0x58e/0xe30 net/socket.c:2391
 do_sys_recvmmsg+0x2a6/0x3e0 net/socket.c:2472
 __do_sys_recvmmsg net/socket.c:2485 [inline]
 __se_sys_recvmmsg net/socket.c:2481 [inline]
 __x64_sys_recvmmsg+0x15d/0x1c0 net/socket.c:2481
 do_syscall_64+0x15b/0x230 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x446ce9
RSP: 002b:00007fc307918db8 EFLAGS: 00000293 ORIG_RAX: 000000000000012b
RAX: ffffffffffffffda RBX: 00000000006dbc24 RCX: 0000000000446ce9
RDX: 000000000000000a RSI: 0000000020005040 RDI: 0000000000000003
RBP: 00000000006dbc20 R08: 0000000020004e40 R09: 0000000000000000
R10: 0000000040000000 R11: 0000000000000293 R12: 0000000000000000
R13: 00007ffc8d2df32f R14: 00007fc3079199c0 R15: 0000000000000001

Uninit was stored to memory at:
 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:279 [inline]
 kmsan_save_stack mm/kmsan/kmsan.c:294 [inline]
 kmsan_internal_chain_origin+0x12b/0x210 mm/kmsan/kmsan.c:685
 kmsan_memcpy_origins+0x11d/0x170 mm/kmsan/kmsan.c:527
 __msan_memcpy+0x109/0x160 mm/kmsan/kmsan_instr.c:413
 __nla_put lib/nlattr.c:569 [inline]
 nla_put+0x276/0x340 lib/nlattr.c:627
 copy_to_user_policy_type net/xfrm/xfrm_user.c:1678 [inline]
 dump_one_policy+0xbe1/0x1090 net/xfrm/xfrm_user.c:1708
 xfrm_policy_walk+0x45a/0xd00 net/xfrm/xfrm_policy.c:1013
 xfrm_dump_policy+0x1c0/0x2a0 net/xfrm/xfrm_user.c:1749
 netlink_dump+0x9b5/0x1550 net/netlink/af_netlink.c:2226
 __netlink_dump_start+0x1131/0x1270 net/netlink/af_netlink.c:2323
 netlink_dump_start include/linux/netlink.h:214 [inline]
 xfrm_user_rcv_msg+0x8a3/0x9b0 net/xfrm/xfrm_user.c:2577
 netlink_rcv_skb+0x37e/0x600 net/netlink/af_netlink.c:2448
 xfrm_netlink_rcv+0xb2/0xf0 net/xfrm/xfrm_user.c:2598
 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
 netlink_unicast+0x1680/0x1750 net/netlink/af_netlink.c:1336
 netlink_sendmsg+0x104f/0x1350 net/netlink/af_netlink.c:1901
 sock_sendmsg_nosec net/socket.c:629 [inline]
 sock_sendmsg net/socket.c:639 [inline]
 ___sys_sendmsg+0xec8/0x1320 net/socket.c:2117
 __sys_sendmsg net/socket.c:2155 [inline]
 __do_sys_sendmsg net/socket.c:2164 [inline]
 __se_sys_sendmsg net/socket.c:2162 [inline]
 __x64_sys_sendmsg+0x331/0x460 net/socket.c:2162
 do_syscall_64+0x15b/0x230 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
Local variable description: ----upt.i@dump_one_policy
Variable was created at:
 dump_one_policy+0x78/0x1090 net/xfrm/xfrm_user.c:1689
 xfrm_policy_walk+0x45a/0xd00 net/xfrm/xfrm_policy.c:1013

Byte 130 of 137 is uninitialized
Memory access starts at ffff88019550407f

Fixes: c0144beaeca42 ("[XFRM] netlink: Use nla_put()/NLA_PUT() variantes")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/xfrm/xfrm_user.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 90270d7110a3..78c40bb681b9 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1624,9 +1624,11 @@ static inline size_t userpolicy_type_attrsize(void)
 #ifdef CONFIG_XFRM_SUB_POLICY
 static int copy_to_user_policy_type(u8 type, struct sk_buff *skb)
 {
-	struct xfrm_userpolicy_type upt = {
-		.type = type,
-	};
+	struct xfrm_userpolicy_type upt;
+
+	/* Sadly there are two holes in struct xfrm_userpolicy_type */
+	memset(&upt, 0, sizeof(upt));
+	upt.type = type;
 
 	return nla_put(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt);
 }

From 76cb5cc66114d2758796198fca7f3387a6f24b75 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 17 Jul 2018 21:03:15 +0200
Subject: [PATCH 423/519] netfilter: conntrack: dccp: treat SYNC/SYNCACK as
 invalid if no prior state

commit 6613b6173dee098997229caf1f3b961c49da75e6 upstream.

When first DCCP packet is SYNC or SYNCACK, we insert a new conntrack
that has an un-initialized timeout value, i.e. such entry could be
reaped at any time.

Mark them as INVALID and only ignore SYNC/SYNCACK when connection had
an old state.

Reported-by: syzbot+6f18401420df260e37ed@syzkaller.appspotmail.com
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/nf_conntrack_proto_dccp.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index fce1b1cca32d..99d0e9261a64 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -244,14 +244,14 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] =
 		 * We currently ignore Sync packets
 		 *
 		 *	sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
-			sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
+			sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
 		},
 		[DCCP_PKT_SYNCACK] = {
 		/*
 		 * We currently ignore SyncAck packets
 		 *
 		 *	sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
-			sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
+			sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
 		},
 	},
 	[CT_DCCP_ROLE_SERVER] = {
@@ -372,14 +372,14 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] =
 		 * We currently ignore Sync packets
 		 *
 		 *	sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
-			sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
+			sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
 		},
 		[DCCP_PKT_SYNCACK] = {
 		/*
 		 * We currently ignore SyncAck packets
 		 *
 		 *	sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
-			sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
+			sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
 		},
 	},
 };

From 62c4e369c9b98480a4b75b3a74a962a6b298120b Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 6 Aug 2018 10:38:34 -0400
Subject: [PATCH 424/519] packet: refine ring v3 block size test to hold one
 frame

commit 4576cd469d980317c4edd9173f8b694aa71ea3a3 upstream.

TPACKET_V3 stores variable length frames in fixed length blocks.
Blocks must be able to store a block header, optional private space
and at least one minimum sized frame.

Frames, even for a zero snaplen packet, store metadata headers and
optional reserved space.

In the block size bounds check, ensure that the frame of the
chosen configuration fits. This includes sockaddr_ll and optional
tp_reserve.

Syzbot was able to construct a ring with insuffient room for the
sockaddr_ll in the header of a zero-length frame, triggering an
out-of-bounds write in dev_parse_header.

Convert the comparison to less than, as zero is a valid snap len.
This matches the test for minimum tp_frame_size immediately below.

Fixes: f6fb8f100b80 ("af-packet: TPACKET_V3 flexible buffer implementation.")
Fixes: eb73190f4fbe ("net/packet: refine check for priv area size")
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/packet/af_packet.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8de4e6620c1c..07668f152a3a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -4176,6 +4176,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 	}
 
 	if (req->tp_block_nr) {
+		unsigned int min_frame_size;
+
 		/* Sanity tests and some calculations */
 		err = -EBUSY;
 		if (unlikely(rb->pg_vec))
@@ -4198,12 +4200,12 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 			goto out;
 		if (unlikely(!PAGE_ALIGNED(req->tp_block_size)))
 			goto out;
+		min_frame_size = po->tp_hdrlen + po->tp_reserve;
 		if (po->tp_version >= TPACKET_V3 &&
-		    req->tp_block_size <=
-		    BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv) + sizeof(struct tpacket3_hdr))
+		    req->tp_block_size <
+		    BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv) + min_frame_size)
 			goto out;
-		if (unlikely(req->tp_frame_size < po->tp_hdrlen +
-					po->tp_reserve))
+		if (unlikely(req->tp_frame_size < min_frame_size))
 			goto out;
 		if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
 			goto out;

From 6d124ea608ac800f46100741f7ccd79791c061c8 Mon Sep 17 00:00:00 2001
From: Elad Raz <eladr@mellanox.com>
Date: Wed, 6 Jan 2016 13:01:04 +0100
Subject: [PATCH 425/519] bridge: Propagate vlan add failure to user
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 08474cc1e6ea71237cab7e4a651a623c9dea1084 upstream.

Disallow adding interfaces to a bridge when vlan filtering operation
failed. Send the failure code to the user.

Signed-off-by: Elad Raz <eladr@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: SZ Lin (林上智) <sz.lin@moxa.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/bridge/br_if.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 3400b1e47668..50e84e634dfe 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -511,8 +511,11 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	if (br_fdb_insert(br, p, dev->dev_addr, 0))
 		netdev_err(dev, "failed insert local address bridge forwarding table\n");
 
-	if (nbp_vlan_init(p))
+	err = nbp_vlan_init(p);
+	if (err) {
 		netdev_err(dev, "failed to initialize vlan filtering on this port\n");
+		goto err6;
+	}
 
 	spin_lock_bh(&br->lock);
 	changed_addr = br_stp_recalculate_bridge_id(br);
@@ -533,6 +536,12 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 
 	return 0;
 
+err6:
+	list_del_rcu(&p->list);
+	br_fdb_delete_by_port(br, p, 0, 1);
+	nbp_update_port_count(br);
+	netdev_upper_dev_unlink(dev, br->dev);
+
 err5:
 	dev->priv_flags &= ~IFF_BRIDGE_PORT;
 	netdev_rx_handler_unregister(dev);

From 400db6fe74317d64c920025ed4de2de7b3522230 Mon Sep 17 00:00:00 2001
From: John David Anglin <dave.anglin@bell.net>
Date: Sun, 12 Aug 2018 16:31:17 -0400
Subject: [PATCH 426/519] parisc: Remove unnecessary barriers from spinlock.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 3b885ac1dc35b87a39ee176a6c7e2af9c789d8b8 upstream.

Now that mb() is an instruction barrier, it will slow performance if we issue
unnecessary barriers.

The spinlock defines have a number of unnecessary barriers.  The __ldcw()
define is both a hardware and compiler barrier.  The mb() barriers in the
routines using __ldcw() serve no purpose.

The only barrier needed is the one in arch_spin_unlock().  We need to ensure
all accesses are complete prior to releasing the lock.

Signed-off-by: John David Anglin <dave.anglin@bell.net>
Cc: stable@vger.kernel.org # 4.0+
Signed-off-by: Helge Deller <deller@gmx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/parisc/include/asm/spinlock.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h
index 64f2992e439f..617efa845054 100644
--- a/arch/parisc/include/asm/spinlock.h
+++ b/arch/parisc/include/asm/spinlock.h
@@ -21,7 +21,6 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *x,
 {
 	volatile unsigned int *a;
 
-	mb();
 	a = __ldcw_align(x);
 	while (__ldcw(a) == 0)
 		while (*a == 0)
@@ -31,16 +30,15 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *x,
 				local_irq_disable();
 			} else
 				cpu_relax();
-	mb();
 }
 
 static inline void arch_spin_unlock(arch_spinlock_t *x)
 {
 	volatile unsigned int *a;
-	mb();
+
 	a = __ldcw_align(x);
-	*a = 1;
 	mb();
+	*a = 1;
 }
 
 static inline int arch_spin_trylock(arch_spinlock_t *x)
@@ -48,10 +46,8 @@ static inline int arch_spin_trylock(arch_spinlock_t *x)
 	volatile unsigned int *a;
 	int ret;
 
-	mb();
 	a = __ldcw_align(x);
         ret = __ldcw(a) != 0;
-	mb();
 
 	return ret;
 }

From 8837163ebeba0ab5cd82d8eb284060e0e3cb4a35 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Thu, 19 Jul 2018 17:27:31 -0500
Subject: [PATCH 427/519] PCI: hotplug: Don't leak pci_slot on registration
 failure

commit 4ce6435820d1f1cc2c2788e232735eb244bcc8a3 upstream.

If addition of sysfs files fails on registration of a hotplug slot, the
struct pci_slot as well as the entry in the slot_list is leaked.  The
issue has been present since the hotplug core was introduced in 2002:
https://git.kernel.org/tglx/history/c/a8a2069f432c

Perhaps the idea was that even though sysfs addition fails, the slot
should still be usable.  But that's not how drivers use the interface,
they abort probe if a non-zero value is returned.

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: stable@vger.kernel.org # v2.4.15+
Cc: Greg Kroah-Hartman <greg@kroah.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pci/hotplug/pci_hotplug_core.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c
index d1fab97d6b01..6ce2a73fe0e4 100644
--- a/drivers/pci/hotplug/pci_hotplug_core.c
+++ b/drivers/pci/hotplug/pci_hotplug_core.c
@@ -457,8 +457,17 @@ int __pci_hp_register(struct hotplug_slot *slot, struct pci_bus *bus,
 	list_add(&slot->slot_list, &pci_hotplug_slot_list);
 
 	result = fs_add_slot(pci_slot);
+	if (result)
+		goto err_list_del;
+
 	kobject_uevent(&pci_slot->kobj, KOBJ_ADD);
 	dbg("Added slot %s to the list\n", name);
+	goto out;
+
+err_list_del:
+	list_del(&slot->slot_list);
+	pci_slot->hotplug = NULL;
+	pci_destroy_slot(pci_slot);
 out:
 	mutex_unlock(&pci_hp_mutex);
 	return result;

From cc7614a5e8ec4514aa27ee3874ad05a1057e644d Mon Sep 17 00:00:00 2001
From: Myron Stowe <myron.stowe@redhat.com>
Date: Mon, 13 Aug 2018 12:19:39 -0600
Subject: [PATCH 428/519] PCI: Skip MPS logic for Virtual Functions (VFs)

commit 3dbe97efe8bf450b183d6dee2305cbc032e6b8a4 upstream.

PCIe r4.0, sec 9.3.5.4, "Device Control Register", shows both
Max_Payload_Size (MPS) and Max_Read_request_Size (MRRS) to be 'RsvdP' for
VFs.  Just prior to the table it states:

  "PF and VF functionality is defined in Section 7.5.3.4 except where
   noted in Table 9-16.  For VF fields marked 'RsvdP', the PF setting
   applies to the VF."

All of which implies that with respect to Max_Payload_Size Supported
(MPSS), MPS, and MRRS values, we should not be paying any attention to the
VF's fields, but rather only to the PF's.  Only looking at the PF's fields
also logically makes sense as it's the sole physical interface to the PCIe
bus.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=200527
Fixes: 27d868b5e6cf ("PCI: Set MPS to match upstream bridge")
Signed-off-by: Myron Stowe <myron.stowe@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: stable@vger.kernel.org # 4.3+
Cc: Keith Busch <keith.busch@intel.com>
Cc: Sinan Kaya <okaya@kernel.org>
Cc: Dongdong Liu <liudongdong3@huawei.com>
Cc: Jon Mason <jdmason@kudzu.us>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pci/probe.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 566897f24dee..5f040619393f 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1338,6 +1338,10 @@ static void pci_configure_mps(struct pci_dev *dev)
 	if (!pci_is_pcie(dev) || !bridge || !pci_is_pcie(bridge))
 		return;
 
+	/* MPS and MRRS fields are of type 'RsvdP' for VFs, short-circuit out */
+	if (dev->is_virtfn)
+		return;
+
 	mps = pcie_get_mps(dev);
 	p_mps = pcie_get_mps(bridge);
 

From 131412f4f6f52b72c3a099c9cdac5d9c6034c76c Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Thu, 19 Jul 2018 17:27:32 -0500
Subject: [PATCH 429/519] PCI: pciehp: Fix use-after-free on unplug

commit 281e878eab191cce4259abbbf1a0322e3adae02c upstream.

When pciehp is unbound (e.g. on unplug of a Thunderbolt device), the
hotplug_slot struct is deregistered and thus freed before freeing the
IRQ.  The IRQ handler and the work items it schedules print the slot
name referenced from the freed structure in various informational and
debug log messages, each time resulting in a quadruple dereference of
freed pointers (hotplug_slot -> pci_slot -> kobject -> name).

At best the slot name is logged as "(null)", at worst kernel memory is
exposed in logs or the driver crashes:

  pciehp 0000:10:00.0:pcie204: Slot((null)): Card not present

An attacker may provoke the bug by unplugging multiple devices on a
Thunderbolt daisy chain at once.  Unplugging can also be simulated by
powering down slots via sysfs.  The bug is particularly easy to trigger
in poll mode.

It has been present since the driver's introduction in 2004:
https://git.kernel.org/tglx/history/c/c16b4b14d980

Fix by rearranging teardown such that the IRQ is freed first.  Run the
work items queued by the IRQ handler to completion before freeing the
hotplug_slot struct by draining the work queue from the ->release_slot
callback which is invoked by pci_hp_deregister().

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: stable@vger.kernel.org # v2.6.4
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pci/hotplug/pciehp.h      | 1 +
 drivers/pci/hotplug/pciehp_core.c | 7 +++++++
 drivers/pci/hotplug/pciehp_hpc.c  | 5 ++---
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index cbe58480b474..6b0f7e0d7dbd 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -132,6 +132,7 @@ int pciehp_unconfigure_device(struct slot *p_slot);
 void pciehp_queue_pushbutton_work(struct work_struct *work);
 struct controller *pcie_init(struct pcie_device *dev);
 int pcie_init_notification(struct controller *ctrl);
+void pcie_shutdown_notification(struct controller *ctrl);
 int pciehp_enable_slot(struct slot *p_slot);
 int pciehp_disable_slot(struct slot *p_slot);
 void pcie_reenable_notification(struct controller *ctrl);
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index 8f6ded43760a..47cc3568514e 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -77,6 +77,12 @@ static int reset_slot		(struct hotplug_slot *slot, int probe);
  */
 static void release_slot(struct hotplug_slot *hotplug_slot)
 {
+	struct slot *slot = hotplug_slot->private;
+
+	/* queued work needs hotplug_slot name */
+	cancel_delayed_work(&slot->work);
+	drain_workqueue(slot->wq);
+
 	kfree(hotplug_slot->ops);
 	kfree(hotplug_slot->info);
 	kfree(hotplug_slot);
@@ -276,6 +282,7 @@ static void pciehp_remove(struct pcie_device *dev)
 {
 	struct controller *ctrl = get_service_data(dev);
 
+	pcie_shutdown_notification(ctrl);
 	cleanup_slot(ctrl);
 	pciehp_release_ctrl(ctrl);
 }
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 63c6c7fce3eb..cd982778a6b8 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -741,7 +741,7 @@ int pcie_init_notification(struct controller *ctrl)
 	return 0;
 }
 
-static void pcie_shutdown_notification(struct controller *ctrl)
+void pcie_shutdown_notification(struct controller *ctrl)
 {
 	if (ctrl->notification_enabled) {
 		pcie_disable_notification(ctrl);
@@ -776,7 +776,7 @@ static int pcie_init_slot(struct controller *ctrl)
 static void pcie_cleanup_slot(struct controller *ctrl)
 {
 	struct slot *slot = ctrl->slot;
-	cancel_delayed_work(&slot->work);
+
 	destroy_workqueue(slot->wq);
 	kfree(slot);
 }
@@ -853,7 +853,6 @@ struct controller *pcie_init(struct pcie_device *dev)
 
 void pciehp_release_ctrl(struct controller *ctrl)
 {
-	pcie_shutdown_notification(ctrl);
 	pcie_cleanup_slot(ctrl);
 	kfree(ctrl);
 }

From 6e57e6c67fd4b568b180fdbd5c14043d39fe6cda Mon Sep 17 00:00:00 2001
From: Esben Haabendal <eha@deif.com>
Date: Thu, 16 Aug 2018 10:43:12 +0200
Subject: [PATCH 430/519] i2c: imx: Fix race condition in dma read
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit bed4ff1ed4d8f2ef5007c5c6ae1b29c5677a3632 upstream.

This fixes a race condition, where the DMAEN bit ends up being set after
I2C slave has transmitted a byte following the dummy read.  When that
happens, an interrupt is generated instead, and no DMA request is generated
to kickstart the DMA read, and a timeout happens after DMA_TIMEOUT (1 sec).

Fixed by setting the DMAEN bit before the dummy read.

Signed-off-by: Esben Haabendal <eha@deif.com>
Acked-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Cc: stable@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/i2c/busses/i2c-imx.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index a4abf7dc9576..cf1b57a054d0 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -677,9 +677,6 @@ static int i2c_imx_dma_read(struct imx_i2c_struct *i2c_imx,
 	struct imx_i2c_dma *dma = i2c_imx->dma;
 	struct device *dev = &i2c_imx->adapter.dev;
 
-	temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR);
-	temp |= I2CR_DMAEN;
-	imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR);
 
 	dma->chan_using = dma->chan_rx;
 	dma->dma_transfer_dir = DMA_DEV_TO_MEM;
@@ -792,6 +789,7 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs, bo
 	int i, result;
 	unsigned int temp;
 	int block_data = msgs->flags & I2C_M_RECV_LEN;
+	int use_dma = i2c_imx->dma && msgs->len >= DMA_THRESHOLD && !block_data;
 
 	dev_dbg(&i2c_imx->adapter.dev,
 		"<%s> write slave address: addr=0x%x\n",
@@ -818,12 +816,14 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs, bo
 	 */
 	if ((msgs->len - 1) || block_data)
 		temp &= ~I2CR_TXAK;
+	if (use_dma)
+		temp |= I2CR_DMAEN;
 	imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR);
 	imx_i2c_read_reg(i2c_imx, IMX_I2C_I2DR); /* dummy read */
 
 	dev_dbg(&i2c_imx->adapter.dev, "<%s> read data\n", __func__);
 
-	if (i2c_imx->dma && msgs->len >= DMA_THRESHOLD && !block_data)
+	if (use_dma)
 		return i2c_imx_dma_read(i2c_imx, msgs, is_lastmsg);
 
 	/* read data */

From 712254045c02edf3dc21714337a23bf361d0c5ee Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Tue, 21 Aug 2018 21:59:37 -0700
Subject: [PATCH 431/519] reiserfs: fix broken xattr handling (heap corruption,
 bad retval)

commit a13f085d111e90469faf2d9965eb39b11c114d7e upstream.

This fixes the following issues:

- When a buffer size is supplied to reiserfs_listxattr() such that each
  individual name fits, but the concatenation of all names doesn't fit,
  reiserfs_listxattr() overflows the supplied buffer.  This leads to a
  kernel heap overflow (verified using KASAN) followed by an out-of-bounds
  usercopy and is therefore a security bug.

- When a buffer size is supplied to reiserfs_listxattr() such that a
  name doesn't fit, -ERANGE should be returned.  But reiserfs instead just
  truncates the list of names; I have verified that if the only xattr on a
  file has a longer name than the supplied buffer length, listxattr()
  incorrectly returns zero.

With my patch applied, -ERANGE is returned in both cases and the memory
corruption doesn't happen anymore.

Credit for making me clean this code up a bit goes to Al Viro, who pointed
out that the ->actor calling convention is suboptimal and should be
changed.

Link: http://lkml.kernel.org/r/20180802151539.5373-1-jannh@google.com
Fixes: 48b32a3553a5 ("reiserfs: use generic xattr handlers")
Signed-off-by: Jann Horn <jannh@google.com>
Acked-by: Jeff Mahoney <jeffm@suse.com>
Cc: Eric Biggers <ebiggers@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/reiserfs/xattr.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index a8dbc93e45eb..8b32fdaad468 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -791,8 +791,10 @@ static int listxattr_filler(struct dir_context *ctx, const char *name,
 			size = handler->list(handler, b->dentry,
 					     b->buf + b->pos, b->size, name,
 					     namelen);
-			if (size > b->size)
+			if (b->pos + size > b->size) {
+				b->pos = -ERANGE;
 				return -ERANGE;
+			}
 		} else {
 			size = handler->list(handler, b->dentry,
 					     NULL, 0, name, namelen);

From 0c73169690eb1d7d6f72a128a010bd84343e503a Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 24 Aug 2018 13:27:02 +0200
Subject: [PATCH 432/519] Linux 4.4.152

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 04199cf99dd5..523b0d4354fb 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 4
-SUBLEVEL = 151
+SUBLEVEL = 152
 EXTRAVERSION =
 NAME = Blurry Fish Butt
 

From adaba23ccd7d1625942f2c27612d2b416c87e011 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Sat, 25 Aug 2018 06:50:15 -0700
Subject: [PATCH 433/519] x86/mm/pat: Fix L1TF stable backport for CPA

Patch for stable only to fix boot resets caused by the L1TF patches.

Stable trees reverted the following patch

Revert "x86/mm/pat: Ensure cpa->pfn only contains page frame numbers"

    This reverts commit 87e2bd898d3a79a8c609f183180adac47879a2a4 which is
    commit edc3b9129cecd0f0857112136f5b8b1bc1d45918 upstream.

but the L1TF patch backported here

   x86/mm/pat: Make set_memory_np() L1TF safe

    commit 958f79b9ee55dfaf00c8106ed1c22a2919e0028b upstream

    set_memory_np() is used to mark kernel mappings not present, but it has
    it's own open coded mechanism which does not have the L1TF protection of
    inverting the address bits.

assumed that cpa->pfn contains a PFN. With the above patch reverted
it does not, which causes the PMD to be set to an incorrect address
shifted by 12 bits, which can cause early boot reset on some
systems, like an Apollo Lake embedded system.

Convert the address to a PFN before passing it to pmd_pfn()

Thanks to Bernhard for bisecting and testing.

Cc: stable@vger.kernel.org # 4.4 and 4.9
Reported-by: Bernhard Kaindl <bernhard.kaindl@thalesgroup.com>
Tested-by: Bernhard Kaindl <bernhard.kaindl@thalesgroup.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/mm/pageattr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 27610c2d1821..1007fa80f5a6 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1006,7 +1006,7 @@ static int populate_pmd(struct cpa_data *cpa,
 
 		pmd = pmd_offset(pud, start);
 
-		set_pmd(pmd, pmd_mkhuge(pfn_pmd(cpa->pfn,
+		set_pmd(pmd, pmd_mkhuge(pfn_pmd(cpa->pfn >> PAGE_SHIFT,
 					canon_pgprot(pmd_pgprot))));
 
 		start	  += PMD_SIZE;

From f9866720724db8a163cf305fc907cdab0b38fa09 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Thu, 24 Aug 2017 10:50:29 -0700
Subject: [PATCH 434/519] x86/mm: Fix use-after-free of ldt_struct

commit ccd5b3235180eef3cfec337df1c8554ab151b5cc upstream.

The following commit:

  39a0526fb3f7 ("x86/mm: Factor out LDT init from context init")

renamed init_new_context() to init_new_context_ldt() and added a new
init_new_context() which calls init_new_context_ldt().  However, the
error code of init_new_context_ldt() was ignored.  Consequently, if a
memory allocation in alloc_ldt_struct() failed during a fork(), the
->context.ldt of the new task remained the same as that of the old task
(due to the memcpy() in dup_mm()).  ldt_struct's are not intended to be
shared, so a use-after-free occurred after one task exited.

Fix the bug by making init_new_context() pass through the error code of
init_new_context_ldt().

This bug was found by syzkaller, which encountered the following splat:

    BUG: KASAN: use-after-free in free_ldt_struct.part.2+0x10a/0x150 arch/x86/kernel/ldt.c:116
    Read of size 4 at addr ffff88006d2cb7c8 by task kworker/u9:0/3710

    CPU: 1 PID: 3710 Comm: kworker/u9:0 Not tainted 4.13.0-rc4-next-20170811 #2
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
    Call Trace:
     __dump_stack lib/dump_stack.c:16 [inline]
     dump_stack+0x194/0x257 lib/dump_stack.c:52
     print_address_description+0x73/0x250 mm/kasan/report.c:252
     kasan_report_error mm/kasan/report.c:351 [inline]
     kasan_report+0x24e/0x340 mm/kasan/report.c:409
     __asan_report_load4_noabort+0x14/0x20 mm/kasan/report.c:429
     free_ldt_struct.part.2+0x10a/0x150 arch/x86/kernel/ldt.c:116
     free_ldt_struct arch/x86/kernel/ldt.c:173 [inline]
     destroy_context_ldt+0x60/0x80 arch/x86/kernel/ldt.c:171
     destroy_context arch/x86/include/asm/mmu_context.h:157 [inline]
     __mmdrop+0xe9/0x530 kernel/fork.c:889
     mmdrop include/linux/sched/mm.h:42 [inline]
     exec_mmap fs/exec.c:1061 [inline]
     flush_old_exec+0x173c/0x1ff0 fs/exec.c:1291
     load_elf_binary+0x81f/0x4ba0 fs/binfmt_elf.c:855
     search_binary_handler+0x142/0x6b0 fs/exec.c:1652
     exec_binprm fs/exec.c:1694 [inline]
     do_execveat_common.isra.33+0x1746/0x22e0 fs/exec.c:1816
     do_execve+0x31/0x40 fs/exec.c:1860
     call_usermodehelper_exec_async+0x457/0x8f0 kernel/umh.c:100
     ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:431

    Allocated by task 3700:
     save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
     save_stack+0x43/0xd0 mm/kasan/kasan.c:447
     set_track mm/kasan/kasan.c:459 [inline]
     kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551
     kmem_cache_alloc_trace+0x136/0x750 mm/slab.c:3627
     kmalloc include/linux/slab.h:493 [inline]
     alloc_ldt_struct+0x52/0x140 arch/x86/kernel/ldt.c:67
     write_ldt+0x7b7/0xab0 arch/x86/kernel/ldt.c:277
     sys_modify_ldt+0x1ef/0x240 arch/x86/kernel/ldt.c:307
     entry_SYSCALL_64_fastpath+0x1f/0xbe

    Freed by task 3700:
     save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
     save_stack+0x43/0xd0 mm/kasan/kasan.c:447
     set_track mm/kasan/kasan.c:459 [inline]
     kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524
     __cache_free mm/slab.c:3503 [inline]
     kfree+0xca/0x250 mm/slab.c:3820
     free_ldt_struct.part.2+0xdd/0x150 arch/x86/kernel/ldt.c:121
     free_ldt_struct arch/x86/kernel/ldt.c:173 [inline]
     destroy_context_ldt+0x60/0x80 arch/x86/kernel/ldt.c:171
     destroy_context arch/x86/include/asm/mmu_context.h:157 [inline]
     __mmdrop+0xe9/0x530 kernel/fork.c:889
     mmdrop include/linux/sched/mm.h:42 [inline]
     __mmput kernel/fork.c:916 [inline]
     mmput+0x541/0x6e0 kernel/fork.c:927
     copy_process.part.36+0x22e1/0x4af0 kernel/fork.c:1931
     copy_process kernel/fork.c:1546 [inline]
     _do_fork+0x1ef/0xfb0 kernel/fork.c:2025
     SYSC_clone kernel/fork.c:2135 [inline]
     SyS_clone+0x37/0x50 kernel/fork.c:2129
     do_syscall_64+0x26c/0x8c0 arch/x86/entry/common.c:287
     return_from_SYSCALL_64+0x0/0x7a

Here is a C reproducer:

    #include <asm/ldt.h>
    #include <pthread.h>
    #include <signal.h>
    #include <stdlib.h>
    #include <sys/syscall.h>
    #include <sys/wait.h>
    #include <unistd.h>

    static void *fork_thread(void *_arg)
    {
        fork();
    }

    int main(void)
    {
        struct user_desc desc = { .entry_number = 8191 };

        syscall(__NR_modify_ldt, 1, &desc, sizeof(desc));

        for (;;) {
            if (fork() == 0) {
                pthread_t t;

                srand(getpid());
                pthread_create(&t, NULL, fork_thread, NULL);
                usleep(rand() % 10000);
                syscall(__NR_exit_group, 0);
            }
            wait(NULL);
        }
    }

Note: the reproducer takes advantage of the fact that alloc_ldt_struct()
may use vmalloc() to allocate a large ->entries array, and after
commit:

  5d17a73a2ebe ("vmalloc: back off when the current task is killed")

it is possible for userspace to fail a task's vmalloc() by
sending a fatal signal, e.g. via exit_group().  It would be more
difficult to reproduce this bug on kernels without that commit.

This bug only affected kernels with CONFIG_MODIFY_LDT_SYSCALL=y.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: <stable@vger.kernel.org> [v4.6+]
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-mm@kvack.org
Fixes: 39a0526fb3f7 ("x86/mm: Factor out LDT init from context init")
Link: http://lkml.kernel.org/r/20170824175029.76040-1-ebiggers3@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Ben Hutchings <ben.hutchings@codethink.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/mmu_context.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index effc12767cbf..d8d19fe99e45 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -109,8 +109,7 @@ static inline int init_new_context(struct task_struct *tsk,
 				   struct mm_struct *mm)
 {
 	mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
-	init_new_context_ldt(tsk, mm);
-	return 0;
+	return init_new_context_ldt(tsk, mm);
 }
 static inline void destroy_context(struct mm_struct *mm)
 {

From d5e678942de33a5d8545a8b7c825eb93b57be1a9 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Mon, 22 Feb 2016 09:28:34 -0500
Subject: [PATCH 435/519] ovl: Ensure upper filesystem supports d_type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 45aebeaf4f67468f76bedf62923a576a519a9b68 upstream.

In some instances xfs has been created with ftype=0 and there if a file
on lower fs is removed, overlay leaves a whiteout in upper fs but that
whiteout does not get filtered out and is visible to overlayfs users.

And reason it does not get filtered out because upper filesystem does
not report file type of whiteout as DT_CHR during iterate_dir().

So it seems to be a requirement that upper filesystem support d_type for
overlayfs to work properly. Do this check during mount and fail if d_type
is not supported.

Suggested-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: SZ Lin (林上智) <sz.lin@moxa.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/overlayfs/overlayfs.h |  1 +
 fs/overlayfs/readdir.c   | 37 +++++++++++++++++++++++++++++++++++++
 fs/overlayfs/super.c     | 15 +++++++++++++++
 3 files changed, 53 insertions(+)

diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index c319d5eaabcf..28316b292b8a 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -163,6 +163,7 @@ extern const struct file_operations ovl_dir_operations;
 int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list);
 void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list);
 void ovl_cache_free(struct list_head *list);
+int ovl_check_d_type_supported(struct path *realpath);
 
 /* inode.c */
 int ovl_setattr(struct dentry *dentry, struct iattr *attr);
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 299a6e1d6b77..0c59955c4653 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -43,6 +43,7 @@ struct ovl_readdir_data {
 	struct ovl_cache_entry *first_maybe_whiteout;
 	int count;
 	int err;
+	bool d_type_supported;
 };
 
 struct ovl_dir_file {
@@ -581,3 +582,39 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
 	}
 	mutex_unlock(&upper->d_inode->i_mutex);
 }
+
+static int ovl_check_d_type(struct dir_context *ctx, const char *name,
+			  int namelen, loff_t offset, u64 ino,
+			  unsigned int d_type)
+{
+	struct ovl_readdir_data *rdd =
+		container_of(ctx, struct ovl_readdir_data, ctx);
+
+	/* Even if d_type is not supported, DT_DIR is returned for . and .. */
+	if (!strncmp(name, ".", namelen) || !strncmp(name, "..", namelen))
+		return 0;
+
+	if (d_type != DT_UNKNOWN)
+		rdd->d_type_supported = true;
+
+	return 0;
+}
+
+/*
+ * Returns 1 if d_type is supported, 0 not supported/unknown. Negative values
+ * if error is encountered.
+ */
+int ovl_check_d_type_supported(struct path *realpath)
+{
+	int err;
+	struct ovl_readdir_data rdd = {
+		.ctx.actor = ovl_check_d_type,
+		.d_type_supported = false,
+	};
+
+	err = ovl_dir_read(realpath, &rdd);
+	if (err)
+		return err;
+
+	return rdd.d_type_supported;
+}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index d70208c0de84..2de4e3a7d6e7 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1054,6 +1054,21 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 			sb->s_flags |= MS_RDONLY;
 			ufs->workdir = NULL;
 		}
+
+		/*
+		 * Upper should support d_type, else whiteouts are visible.
+		 * Given workdir and upper are on same fs, we can do
+		 * iterate_dir() on workdir.
+		 */
+		err = ovl_check_d_type_supported(&workpath);
+		if (err < 0)
+			goto out_put_workdir;
+
+		if (!err) {
+			pr_err("overlayfs: upper fs needs to support d_type.\n");
+			err = -EINVAL;
+			goto out_put_workdir;
+		}
 	}
 
 	err = -ENOMEM;

From 0f9a6d88cd9f3b16a86639bd652202fe27096b18 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Fri, 20 May 2016 09:04:26 -0400
Subject: [PATCH 436/519] ovl: Do d_type check only if work dir creation was
 successful
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 21765194cecf2e4514ad75244df459f188140a0f upstream.

d_type check requires successful creation of workdir as iterates
through work dir and expects work dir to be present in it. If that's
not the case, this check will always return d_type not supported even
if underlying filesystem might be supporting it.

So don't do this check if work dir creation failed in previous step.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: SZ Lin (林上智) <sz.lin@moxa.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/overlayfs/super.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 2de4e3a7d6e7..fd21c5f74fba 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1058,16 +1058,19 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 		/*
 		 * Upper should support d_type, else whiteouts are visible.
 		 * Given workdir and upper are on same fs, we can do
-		 * iterate_dir() on workdir.
+		 * iterate_dir() on workdir. This check requires successful
+		 * creation of workdir in previous step.
 		 */
-		err = ovl_check_d_type_supported(&workpath);
-		if (err < 0)
-			goto out_put_workdir;
+		if (ufs->workdir) {
+			err = ovl_check_d_type_supported(&workpath);
+			if (err < 0)
+				goto out_put_workdir;
 
-		if (!err) {
-			pr_err("overlayfs: upper fs needs to support d_type.\n");
-			err = -EINVAL;
-			goto out_put_workdir;
+			if (!err) {
+				pr_err("overlayfs: upper fs needs to support d_type.\n");
+				err = -EINVAL;
+				goto out_put_workdir;
+			}
 		}
 	}
 

From 7eaa995c75bd23b57163541c3285a2c984018b7e Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Fri, 1 Jul 2016 10:02:44 -0400
Subject: [PATCH 437/519] ovl: warn instead of error if d_type is not supported
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit e7c0b5991dd1be7b6f6dc2b54a15a0f47b64b007 upstream.

overlay needs underlying fs to support d_type. Recently I put in a
patch in to detect this condition and started failing mount if
underlying fs did not support d_type.

But this breaks existing configurations over kernel upgrade. Those who
are running docker (partially broken configuration) with xfs not
supporting d_type, are surprised that after kernel upgrade docker does
not run anymore.

https://github.com/docker/docker/issues/22937#issuecomment-229881315

So instead of erroring out, detect broken configuration and warn
about it. This should allow existing docker setups to continue
working after kernel upgrade.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Fixes: 45aebeaf4f67 ("ovl: Ensure upper filesystem supports d_type")
Cc: <stable@vger.kernel.org> 4.6
Signed-off-by: SZ Lin (林上智) <sz.lin@moxa.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/overlayfs/super.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index fd21c5f74fba..0035cb80ecd1 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1066,11 +1066,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 			if (err < 0)
 				goto out_put_workdir;
 
-			if (!err) {
-				pr_err("overlayfs: upper fs needs to support d_type.\n");
-				err = -EINVAL;
-				goto out_put_workdir;
-			}
+			/*
+			 * We allowed this configuration and don't want to
+			 * break users over kernel upgrade. So warn instead
+			 * of erroring out.
+			 */
+			if (!err)
+				pr_warn("overlayfs: upper fs needs to support d_type.\n");
 		}
 	}
 

From 577189c37a844243359afce1c3c94418259fe696 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 28 Aug 2018 07:23:44 +0200
Subject: [PATCH 438/519] Linux 4.4.153

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 523b0d4354fb..208a813be615 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 4
-SUBLEVEL = 152
+SUBLEVEL = 153
 EXTRAVERSION =
 NAME = Blurry Fish Butt
 

From 21a24b5db1858867ffafb5ca221b352a232e15eb Mon Sep 17 00:00:00 2001
From: Ethan Zhao <ethan.zhao@oracle.com>
Date: Mon, 4 Sep 2017 13:59:34 +0800
Subject: [PATCH 439/519] sched/sysctl: Check user input value of
 sysctl_sched_time_avg

commit 5ccba44ba118a5000cccc50076b0344632459779 upstream.

System will hang if user set sysctl_sched_time_avg to 0:

  [root@XXX ~]# sysctl kernel.sched_time_avg_ms=0

  Stack traceback for pid 0
  0xffff883f6406c600 0 0 1 3 R 0xffff883f6406cf50 *swapper/3
  ffff883f7ccc3ae8 0000000000000018 ffffffff810c4dd0 0000000000000000
  0000000000017800 ffff883f7ccc3d78 0000000000000003 ffff883f7ccc3bf8
  ffffffff810c4fc9 ffff883f7ccc3c08 00000000810c5043 ffff883f7ccc3c08
  Call Trace:
  <IRQ> [<ffffffff810c4dd0>] ? update_group_capacity+0x110/0x200
  [<ffffffff810c4fc9>] ? update_sd_lb_stats+0x109/0x600
  [<ffffffff810c5507>] ? find_busiest_group+0x47/0x530
  [<ffffffff810c5b84>] ? load_balance+0x194/0x900
  [<ffffffff810ad5ca>] ? update_rq_clock.part.83+0x1a/0xe0
  [<ffffffff810c6d42>] ? rebalance_domains+0x152/0x290
  [<ffffffff810c6f5c>] ? run_rebalance_domains+0xdc/0x1d0
  [<ffffffff8108a75b>] ? __do_softirq+0xfb/0x320
  [<ffffffff8108ac85>] ? irq_exit+0x125/0x130
  [<ffffffff810b3a17>] ? scheduler_ipi+0x97/0x160
  [<ffffffff81052709>] ? smp_reschedule_interrupt+0x29/0x30
  [<ffffffff8173a1be>] ? reschedule_interrupt+0x6e/0x80
   <EOI> [<ffffffff815bc83c>] ? cpuidle_enter_state+0xcc/0x230
  [<ffffffff815bc80c>] ? cpuidle_enter_state+0x9c/0x230
  [<ffffffff815bc9d7>] ? cpuidle_enter+0x17/0x20
  [<ffffffff810cd6dc>] ? cpu_startup_entry+0x38c/0x420
  [<ffffffff81053373>] ? start_secondary+0x173/0x1e0

Because divide-by-zero error happens in function:

update_group_capacity()
  update_cpu_capacity()
    scale_rt_capacity()
     {
          ...
          total = sched_avg_period() + delta;
          used = div_u64(avg, total);
          ...
     }

To fix this issue, check user input value of sysctl_sched_time_avg, keep
it unchanged when hitting invalid input, and set the minimum limit of
sysctl_sched_time_avg to 1 ms.

Reported-by: James Puthukattukaran <james.puthukattukaran@oracle.com>
Signed-off-by: Ethan Zhao <ethan.zhao@oracle.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: efault@gmx.de
Cc: ethan.kernel@gmail.com
Cc: keescook@chromium.org
Cc: mcgrof@kernel.org
Cc: <stable@vger.kernel.org>
Link: http://lkml.kernel.org/r/1504504774-18253-1-git-send-email-ethan.zhao@oracle.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Steve Muckle <smuckle@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/sysctl.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 17c59e78661b..66100d1bc3f7 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -342,7 +342,8 @@ static struct ctl_table kern_table[] = {
 		.data		= &sysctl_sched_time_avg,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
 	},
 	{
 		.procname	= "sched_shares_window_ns",

From 8ed0ff83f5fd0ca412a52bd71af332598c01ca46 Mon Sep 17 00:00:00 2001
From: "yujuan.qi" <yujuan.qi@mediatek.com>
Date: Mon, 31 Jul 2017 11:23:01 +0800
Subject: [PATCH 440/519] Cipso: cipso_v4_optptr enter infinite loop

commit 40413955ee265a5e42f710940ec78f5450d49149 upstream.

in for(),if((optlen > 0) && (optptr[1] == 0)), enter infinite loop.

Test: receive a packet which the ip length > 20 and the first byte of ip option is 0, produce this issue

Signed-off-by: yujuan.qi <yujuan.qi@mediatek.com>
Acked-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/cipso_ipv4.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 5f3b81941a6f..5169b9b36b6a 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1593,9 +1593,17 @@ unsigned char *cipso_v4_optptr(const struct sk_buff *skb)
 	int taglen;
 
 	for (optlen = iph->ihl*4 - sizeof(struct iphdr); optlen > 0; ) {
-		if (optptr[0] == IPOPT_CIPSO)
+		switch (optptr[0]) {
+		case IPOPT_CIPSO:
 			return optptr;
-		taglen = optptr[1];
+		case IPOPT_END:
+			return NULL;
+		case IPOPT_NOOP:
+			taglen = 1;
+			break;
+		default:
+			taglen = optptr[1];
+		}
 		optlen -= taglen;
 		optptr += taglen;
 	}

From 1b8e283f8a48e1a5d34f26dc3e5dcabf9607f503 Mon Sep 17 00:00:00 2001
From: Eyal Birger <eyal.birger@gmail.com>
Date: Thu, 7 Jun 2018 10:11:02 +0300
Subject: [PATCH 441/519] vti6: fix PMTU caching and reporting on xmit

[ Upstream commit d6990976af7c5d8f55903bfb4289b6fb030bf754 ]

When setting the skb->dst before doing the MTU check, the route PMTU
caching and reporting is done on the new dst which is about to be
released.

Instead, PMTU handling should be done using the original dst.

This is aligned with IPv4 VTI.

Fixes: ccd740cbc6 ("vti6: Add pmtu handling to vti6_xmit.")
Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/ip6_vti.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 40bb7a5e6d47..6aca9a6b2303 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -469,10 +469,6 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
 		goto tx_err_dst_release;
 	}
 
-	skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
-	skb_dst_set(skb, dst);
-	skb->dev = skb_dst(skb)->dev;
-
 	mtu = dst_mtu(dst);
 	if (!skb->ignore_df && skb->len > mtu) {
 		skb_dst(skb)->ops->update_pmtu(dst, NULL, skb, mtu);
@@ -487,9 +483,14 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
 				  htonl(mtu));
 		}
 
-		return -EMSGSIZE;
+		err = -EMSGSIZE;
+		goto tx_err_dst_release;
 	}
 
+	skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
+	skb_dst_set(skb, dst);
+	skb->dev = skb_dst(skb)->dev;
+
 	err = dst_output(t->net, skb->sk, skb);
 	if (net_xmit_eval(err) == 0) {
 		struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);

From dbcad9a65dbb5f3efa1391f8e4b67303669f8cc0 Mon Sep 17 00:00:00 2001
From: Tommi Rantala <tommi.t.rantala@nokia.com>
Date: Thu, 21 Jun 2018 09:30:47 +0300
Subject: [PATCH 442/519] xfrm: fix missing dst_release() after policy blocking
 lbcast and multicast

[ Upstream commit 8cc88773855f988d6a3bbf102bbd9dd9c828eb81 ]

Fix missing dst_release() when local broadcast or multicast traffic is
xfrm policy blocked.

For IPv4 this results to dst leak: ip_route_output_flow() allocates
dst_entry via __ip_route_output_key() and passes it to
xfrm_lookup_route(). xfrm_lookup returns ERR_PTR(-EPERM) that is
propagated. The dst that was allocated is never released.

IPv4 local broadcast testcase:
 ping -b 192.168.1.255 &
 sleep 1
 ip xfrm policy add src 0.0.0.0/0 dst 192.168.1.255/32 dir out action block

IPv4 multicast testcase:
 ping 224.0.0.1 &
 sleep 1
 ip xfrm policy add src 0.0.0.0/0 dst 224.0.0.1/32 dir out action block

For IPv6 the missing dst_release() causes trouble e.g. when used in netns:
 ip netns add TEST
 ip netns exec TEST ip link set lo up
 ip link add dummy0 type dummy
 ip link set dev dummy0 netns TEST
 ip netns exec TEST ip addr add fd00::1111 dev dummy0
 ip netns exec TEST ip link set dummy0 up
 ip netns exec TEST ping -6 -c 5 ff02::1%dummy0 &
 sleep 1
 ip netns exec TEST ip xfrm policy add src ::/0 dst ff02::1 dir out action block
 wait
 ip netns del TEST

After netns deletion we see:
[  258.239097] unregister_netdevice: waiting for lo to become free. Usage count = 2
[  268.279061] unregister_netdevice: waiting for lo to become free. Usage count = 2
[  278.367018] unregister_netdevice: waiting for lo to become free. Usage count = 2
[  288.375259] unregister_netdevice: waiting for lo to become free. Usage count = 2

Fixes: ac37e2515c1a ("xfrm: release dst_orig in case of error in xfrm_lookup()")
Signed-off-by: Tommi Rantala <tommi.t.rantala@nokia.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/xfrm/xfrm_policy.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f9a13b67df5e..e9eecf6f0bff 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2326,6 +2326,9 @@ struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
 	if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE)
 		return make_blackhole(net, dst_orig->ops->family, dst_orig);
 
+	if (IS_ERR(dst))
+		dst_release(dst_orig);
+
 	return dst;
 }
 EXPORT_SYMBOL(xfrm_lookup_route);

From d9c00c89596a239996dd8659b7f4db5674e51a98 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 25 Jun 2018 14:00:07 +0200
Subject: [PATCH 443/519] xfrm: free skb if nlsk pointer is NULL

[ Upstream commit 86126b77dcd551ce223e7293bb55854e3df05646 ]

nlmsg_multicast() always frees the skb, so in case we cannot call
it we must do that ourselves.

Fixes: 21ee543edc0dea ("xfrm: fix race between netns cleanup and state expire notification")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/xfrm/xfrm_user.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 78c40bb681b9..a9b4491a3cc4 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -980,10 +980,12 @@ static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb,
 {
 	struct sock *nlsk = rcu_dereference(net->xfrm.nlsk);
 
-	if (nlsk)
-		return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC);
-	else
-		return -1;
+	if (!nlsk) {
+		kfree_skb(skb);
+		return -EPIPE;
+	}
+
+	return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC);
 }
 
 static inline size_t xfrm_spdinfo_msgsize(void)

From 0dc742d907f8d05f188a284433cf9fb565a61f6d Mon Sep 17 00:00:00 2001
From: "mpubbise@codeaurora.org" <mpubbise@codeaurora.org>
Date: Mon, 2 Jul 2018 15:40:14 +0530
Subject: [PATCH 444/519] mac80211: add stations tied to AP_VLANs during hw
 reconfig

[ Upstream commit 19103a4bfb42f320395daa5616ece3e89e759d63 ]

As part of hw reconfig, only stations linked to AP interfaces are added
back to the driver ignoring those which are tied to AP_VLAN interfaces.

It is true that there could be stations tied to the AP_VLAN interface while
serving 4addr clients or when using AP_VLAN for VLAN operations; we should
be adding these stations back to the driver as part of hw reconfig, failing
to do so can cause functional issues.

In the case of ath10k driver, the following errors were observed.

ath10k_pci : failed to install key for non-existent peer XX:XX:XX:XX:XX:XX
Workqueue: events_freezable ieee80211_restart_work [mac80211]
(unwind_backtrace) from (show_stack+0x10/0x14)
(show_stack) (dump_stack+0x80/0xa0)
(dump_stack) (warn_slowpath_common+0x68/0x8c)
(warn_slowpath_common) (warn_slowpath_null+0x18/0x20)
(warn_slowpath_null) (ieee80211_enable_keys+0x88/0x154 [mac80211])
(ieee80211_enable_keys) (ieee80211_reconfig+0xc90/0x19c8 [mac80211])
(ieee80211_reconfig]) (ieee80211_restart_work+0x8c/0xa0 [mac80211])
(ieee80211_restart_work) (process_one_work+0x284/0x488)
(process_one_work) (worker_thread+0x228/0x360)
(worker_thread) (kthread+0xd8/0xec)
(kthread) (ret_from_fork+0x14/0x24)

Also while bringing down the AP VAP, WARN_ONs and errors related to peer
removal were observed.

ath10k_pci : failed to clear all peer wep keys for vdev 0: -2
ath10k_pci : failed to disassociate station: 8c:fd:f0:0a:8c:f5 vdev 0: -2
(unwind_backtrace) (show_stack+0x10/0x14)
(show_stack) (dump_stack+0x80/0xa0)
(dump_stack) (warn_slowpath_common+0x68/0x8c)
(warn_slowpath_common) (warn_slowpath_null+0x18/0x20)
(warn_slowpath_null) (sta_set_sinfo+0xb98/0xc9c [mac80211])
(sta_set_sinfo [mac80211]) (__sta_info_flush+0xf0/0x134 [mac80211])
(__sta_info_flush [mac80211]) (ieee80211_stop_ap+0xe8/0x390 [mac80211])
(ieee80211_stop_ap [mac80211]) (__cfg80211_stop_ap+0xe0/0x3dc [cfg80211])
(__cfg80211_stop_ap [cfg80211]) (cfg80211_stop_ap+0x30/0x44 [cfg80211])
(cfg80211_stop_ap [cfg80211]) (genl_rcv_msg+0x274/0x30c)
(genl_rcv_msg) (netlink_rcv_skb+0x58/0xac)
(netlink_rcv_skb) (genl_rcv+0x20/0x34)
(genl_rcv) (netlink_unicast+0x11c/0x204)
(netlink_unicast) (netlink_sendmsg+0x30c/0x370)
(netlink_sendmsg) (sock_sendmsg+0x70/0x84)
(sock_sendmsg) (___sys_sendmsg.part.3+0x188/0x228)
(___sys_sendmsg.part.3) (__sys_sendmsg+0x4c/0x70)
(__sys_sendmsg) (ret_fast_syscall+0x0/0x44)

These issues got fixed by adding the stations which are
tied to AP_VLANs back to the driver.

Signed-off-by: Manikanta Pubbisetty <mpubbise@codeaurora.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/mac80211/util.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index ec26a84b00e2..2214c77d4172 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -2006,7 +2006,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 		if (!sta->uploaded)
 			continue;
 
-		if (sta->sdata->vif.type != NL80211_IFTYPE_AP)
+		if (sta->sdata->vif.type != NL80211_IFTYPE_AP &&
+		    sta->sdata->vif.type != NL80211_IFTYPE_AP_VLAN)
 			continue;
 
 		for (state = IEEE80211_STA_NOTEXIST;

From 8d437bd6db6563b58e861361dc8070558cac6e89 Mon Sep 17 00:00:00 2001
From: Bernd Edlinger <bernd.edlinger@hotmail.de>
Date: Sun, 8 Jul 2018 09:57:22 +0000
Subject: [PATCH 445/519] nl80211: Add a missing break in parse_station_flags

[ Upstream commit 5cf3006cc81d9aa09a10aa781fc065546b12919d ]

I was looking at usually suppressed gcc warnings,
[-Wimplicit-fallthrough=] in this case:

The code definitely looks like a break is missing here.
However I am not able to test the NL80211_IFTYPE_MESH_POINT,
nor do I actually know what might be :)
So please use this patch with caution and only if you are
able to do some testing.

Signed-off-by: Bernd Edlinger <bernd.edlinger@hotmail.de>
[johannes: looks obvious enough to apply as is, interesting
 though that it never seems to have been a problem]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/wireless/nl80211.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index b0b58d1565c2..b07fd8b8b50c 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3578,6 +3578,7 @@ static int parse_station_flags(struct genl_info *info,
 		params->sta_flags_mask = BIT(NL80211_STA_FLAG_AUTHENTICATED) |
 					 BIT(NL80211_STA_FLAG_MFP) |
 					 BIT(NL80211_STA_FLAG_AUTHORIZED);
+		break;
 	default:
 		return -EINVAL;
 	}

From f9ddeba81ccdeb6eb85e47929b81cae6dfc3cdfb Mon Sep 17 00:00:00 2001
From: Sean Paul <seanpaul@chromium.org>
Date: Tue, 3 Jul 2018 12:56:03 -0400
Subject: [PATCH 446/519] drm/bridge: adv7511: Reset registers on hotplug

[ Upstream commit 5f3417569165a8ee57654217f73e0160312f409c ]

The bridge loses its hw state when the cable is unplugged. If we detect
this case in the hpd handler, reset its state.

Reported-by: Rob Clark <robdclark@gmail.com>
Tested-by: Rob Clark <robdclark@gmail.com>
Reviewed-by: Archit Taneja <architt@codeaurora.org>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20180703165648.120401-1-seanpaul@chromium.org
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i2c/adv7511.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/i2c/adv7511.c b/drivers/gpu/drm/i2c/adv7511.c
index dba5c0ea0827..c7c243e9b808 100644
--- a/drivers/gpu/drm/i2c/adv7511.c
+++ b/drivers/gpu/drm/i2c/adv7511.c
@@ -450,6 +450,18 @@ static void adv7511_hpd_work(struct work_struct *work)
 	else
 		status = connector_status_disconnected;
 
+	/*
+	 * The bridge resets its registers on unplug. So when we get a plug
+	 * event and we're already supposed to be powered, cycle the bridge to
+	 * restore its state.
+	 */
+	if (status == connector_status_connected &&
+	    adv7511->connector.status == connector_status_disconnected &&
+	    adv7511->powered) {
+		regcache_mark_dirty(adv7511->regmap);
+		adv7511_power_on(adv7511);
+	}
+
 	if (adv7511->connector.status != status) {
 		adv7511->connector.status = status;
 		drm_kms_helper_hotplug_event(adv7511->connector.dev);

From 982d6c0f1d449492e13ec60f9db8e1acaf7f9e64 Mon Sep 17 00:00:00 2001
From: Varun Prakash <varun@chelsio.com>
Date: Wed, 11 Jul 2018 22:09:52 +0530
Subject: [PATCH 447/519] scsi: libiscsi: fix possible NULL pointer dereference
 in case of TMF

[ Upstream commit a17037e7d59075053b522048742a08ac9500bde8 ]

In iscsi_check_tmf_restrictions() task->hdr is dereferenced to print the
opcode, it is possible that task->hdr is NULL.

There are two cases based on opcode argument:

1. ISCSI_OP_SCSI_CMD - In this case alloc_pdu() is called
after iscsi_check_tmf_restrictions()

iscsi_prep_scsi_cmd_pdu() -> iscsi_check_tmf_restrictions() -> alloc_pdu().

Transport drivers allocate memory for iSCSI hdr in alloc_pdu() and assign
it to task->hdr. In case of TMF task->hdr will be NULL resulting in NULL
pointer dereference.

2. ISCSI_OP_SCSI_DATA_OUT - In this case transport driver can free the
memory for iSCSI hdr after transmitting the pdu so task->hdr can be NULL or
invalid.

This patch fixes this issue by removing task->hdr->opcode from the printk
statement.

Signed-off-by: Varun Prakash <varun@chelsio.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/libiscsi.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 9f0b00c38658..a74f8fbefd33 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -283,11 +283,11 @@ static int iscsi_check_tmf_restrictions(struct iscsi_task *task, int opcode)
 		 */
 		if (opcode != ISCSI_OP_SCSI_DATA_OUT) {
 			iscsi_conn_printk(KERN_INFO, conn,
-					  "task [op %x/%x itt "
+					  "task [op %x itt "
 					  "0x%x/0x%x] "
 					  "rejected.\n",
-					  task->hdr->opcode, opcode,
-					  task->itt, task->hdr_itt);
+					  opcode, task->itt,
+					  task->hdr_itt);
 			return -EACCES;
 		}
 		/*
@@ -296,10 +296,10 @@ static int iscsi_check_tmf_restrictions(struct iscsi_task *task, int opcode)
 		 */
 		if (conn->session->fast_abort) {
 			iscsi_conn_printk(KERN_INFO, conn,
-					  "task [op %x/%x itt "
+					  "task [op %x itt "
 					  "0x%x/0x%x] fast abort.\n",
-					  task->hdr->opcode, opcode,
-					  task->itt, task->hdr_itt);
+					  opcode, task->itt,
+					  task->hdr_itt);
 			return -EACCES;
 		}
 		break;

From 0f14e3a837623b5634aae4f2d56cf7bbb9accca5 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Wed, 11 Apr 2018 17:31:35 +0200
Subject: [PATCH 448/519] drm/imx: imx-ldb: disable LDB on driver bind

[ Upstream commit b58262396fabd43dc869b576e3defdd23b32fe94 ]

The LVDS signal integrity is only guaranteed when the correct enable
sequence (first IPU DI, then LDB) is used. If the LDB display output was
active before the imx-drm driver is loaded (like when a bootsplash was
active) the DI will be disabled by the full IPU reset we do when loading
the driver. The LDB control registers are not part of the IPU range and
thus will remain unchanged.

This leads to the LDB still being active when the DI is getting enabled,
effectively reversing the required enable sequence. Fix this by also
disabling the LDB on driver bind.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/imx/imx-ldb.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c
index abacc8f67469..e1226b71cbda 100644
--- a/drivers/gpu/drm/imx/imx-ldb.c
+++ b/drivers/gpu/drm/imx/imx-ldb.c
@@ -526,6 +526,9 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data)
 		return PTR_ERR(imx_ldb->regmap);
 	}
 
+	/* disable LDB by resetting the control register to POR default */
+	regmap_write(imx_ldb->regmap, IOMUXC_GPR2, 0);
+
 	imx_ldb->dev = dev;
 
 	if (of_id)

From 0ec60fd99a94996767c455cb34d49e946f7d3687 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Wed, 11 Apr 2018 17:31:36 +0200
Subject: [PATCH 449/519] drm/imx: imx-ldb: check if channel is enabled before
 printing warning

[ Upstream commit c80d673b91a6c81d765864e10f2b15110ee900ad ]

If the second LVDS channel has been disabled in the DT when using dual-channel
mode we should not print a warning.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/imx/imx-ldb.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c
index e1226b71cbda..31ca56e593f5 100644
--- a/drivers/gpu/drm/imx/imx-ldb.c
+++ b/drivers/gpu/drm/imx/imx-ldb.c
@@ -569,14 +569,14 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data)
 		if (ret || i < 0 || i > 1)
 			return -EINVAL;
 
+		if (!of_device_is_available(child))
+			continue;
+
 		if (dual && i > 0) {
 			dev_warn(dev, "dual-channel mode, ignoring second output\n");
 			continue;
 		}
 
-		if (!of_device_is_available(child))
-			continue;
-
 		channel = &imx_ldb->channel[i];
 		channel->ldb = imx_ldb;
 		channel->chno = i;

From 9ffc4d3233da61a674c6e6bff122346e7d7fa679 Mon Sep 17 00:00:00 2001
From: Jia-Ju Bai <baijiaju1990@gmail.com>
Date: Wed, 20 Jun 2018 11:54:53 +0800
Subject: [PATCH 450/519] usb: gadget: r8a66597: Fix two possible
 sleep-in-atomic-context bugs in init_controller()

[ Upstream commit 0602088b10a7c0b4e044a810678ef93d7cc5bf48 ]

The driver may sleep with holding a spinlock.
The function call paths (from bottom to top) in Linux-4.16.7 are:

[FUNC] msleep
drivers/usb/gadget/udc/r8a66597-udc.c, 839:
		msleep in init_controller
drivers/usb/gadget/udc/r8a66597-udc.c, 96:
		init_controller in r8a66597_usb_disconnect
drivers/usb/gadget/udc/r8a66597-udc.c, 93:
		spin_lock in r8a66597_usb_disconnect

[FUNC] msleep
drivers/usb/gadget/udc/r8a66597-udc.c, 835:
		msleep in init_controller
drivers/usb/gadget/udc/r8a66597-udc.c, 96:
		init_controller in r8a66597_usb_disconnect
drivers/usb/gadget/udc/r8a66597-udc.c, 93:
		spin_lock in r8a66597_usb_disconnect

To fix these bugs, msleep() is replaced with mdelay().

This bug is found by my static analysis tool (DSAC-2) and checked by
my code review.

Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/udc/r8a66597-udc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/gadget/udc/r8a66597-udc.c b/drivers/usb/gadget/udc/r8a66597-udc.c
index baa0609a429d..c7206a3b13c7 100644
--- a/drivers/usb/gadget/udc/r8a66597-udc.c
+++ b/drivers/usb/gadget/udc/r8a66597-udc.c
@@ -835,11 +835,11 @@ static void init_controller(struct r8a66597 *r8a66597)
 
 		r8a66597_bset(r8a66597, XCKE, SYSCFG0);
 
-		msleep(3);
+		mdelay(3);
 
 		r8a66597_bset(r8a66597, PLLC, SYSCFG0);
 
-		msleep(1);
+		mdelay(1);
 
 		r8a66597_bset(r8a66597, SCKE, SYSCFG0);
 

From 68f4f658ef0a1d49b13de2ae991b13a93df675e6 Mon Sep 17 00:00:00 2001
From: Jia-Ju Bai <baijiaju1990@gmail.com>
Date: Wed, 20 Jun 2018 11:55:08 +0800
Subject: [PATCH 451/519] usb: gadget: r8a66597: Fix a possible
 sleep-in-atomic-context bugs in r8a66597_queue()

[ Upstream commit f36b507c14c4b6e634463a610294e9cb0065c8ea ]

The driver may sleep in an interrupt handler.
The function call path (from bottom to top) in Linux-4.16.7 is:

[FUNC] r8a66597_queue(GFP_KERNEL)
drivers/usb/gadget/udc/r8a66597-udc.c, 1193:
		r8a66597_queue in get_status
drivers/usb/gadget/udc/r8a66597-udc.c, 1301:
		get_status in setup_packet
drivers/usb/gadget/udc/r8a66597-udc.c, 1381:
		setup_packet in irq_control_stage
drivers/usb/gadget/udc/r8a66597-udc.c, 1508:
		irq_control_stage in r8a66597_irq (interrupt handler)

To fix this bug, GFP_KERNEL is replaced with GFP_ATOMIC.

This bug is found by my static analysis tool (DSAC-2) and checked by
my code review.

Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/udc/r8a66597-udc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/udc/r8a66597-udc.c b/drivers/usb/gadget/udc/r8a66597-udc.c
index c7206a3b13c7..e34094647603 100644
--- a/drivers/usb/gadget/udc/r8a66597-udc.c
+++ b/drivers/usb/gadget/udc/r8a66597-udc.c
@@ -1193,7 +1193,7 @@ __acquires(r8a66597->lock)
 	r8a66597->ep0_req->length = 2;
 	/* AV: what happens if we get called again before that gets through? */
 	spin_unlock(&r8a66597->lock);
-	r8a66597_queue(r8a66597->gadget.ep0, r8a66597->ep0_req, GFP_KERNEL);
+	r8a66597_queue(r8a66597->gadget.ep0, r8a66597->ep0_req, GFP_ATOMIC);
 	spin_lock(&r8a66597->lock);
 }
 

From 5e02503f2a99621059daaf176e970b083648e0bb Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 15 Jul 2018 10:37:37 -0700
Subject: [PATCH 452/519] usb/phy: fix PPC64 build errors in phy-fsl-usb.c

[ Upstream commit a39ba90a1cc7010edb0a7132e1b67f3d80b994e9 ]

Fix build errors when built for PPC64:
These variables are only used on PPC32 so they don't need to be
initialized for PPC64.

../drivers/usb/phy/phy-fsl-usb.c: In function 'usb_otg_start':
../drivers/usb/phy/phy-fsl-usb.c:865:3: error: '_fsl_readl' undeclared (first use in this function); did you mean 'fsl_readl'?
   _fsl_readl = _fsl_readl_be;
../drivers/usb/phy/phy-fsl-usb.c:865:16: error: '_fsl_readl_be' undeclared (first use in this function); did you mean 'fsl_readl'?
   _fsl_readl = _fsl_readl_be;
../drivers/usb/phy/phy-fsl-usb.c:866:3: error: '_fsl_writel' undeclared (first use in this function); did you mean 'fsl_writel'?
   _fsl_writel = _fsl_writel_be;
../drivers/usb/phy/phy-fsl-usb.c:866:17: error: '_fsl_writel_be' undeclared (first use in this function); did you mean 'fsl_writel'?
   _fsl_writel = _fsl_writel_be;
../drivers/usb/phy/phy-fsl-usb.c:868:16: error: '_fsl_readl_le' undeclared (first use in this function); did you mean 'fsl_readl'?
   _fsl_readl = _fsl_readl_le;
../drivers/usb/phy/phy-fsl-usb.c:869:17: error: '_fsl_writel_le' undeclared (first use in this function); did you mean 'fsl_writel'?
   _fsl_writel = _fsl_writel_le;

and the sysfs "show" function return type should be ssize_t, not int:

../drivers/usb/phy/phy-fsl-usb.c:1042:49: error: initialization of 'ssize_t (*)(struct device *, struct device_attribute *, char *)' {aka 'long int (*)(struct device *, struct device_attribute *, char *)'} from incompatible pointer type 'int (*)(struct device *, struct device_attribute *, char *)' [-Werror=incompatible-pointer-types]
 static DEVICE_ATTR(fsl_usb2_otg_state, S_IRUGO, show_fsl_usb2_otg_state, NULL);

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Felipe Balbi <balbi@kernel.org>
Cc: linux-usb@vger.kernel.org
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/phy/phy-fsl-usb.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/phy/phy-fsl-usb.c b/drivers/usb/phy/phy-fsl-usb.c
index 94eb2923afed..85d031ce85c1 100644
--- a/drivers/usb/phy/phy-fsl-usb.c
+++ b/drivers/usb/phy/phy-fsl-usb.c
@@ -879,6 +879,7 @@ int usb_otg_start(struct platform_device *pdev)
 	if (pdata->init && pdata->init(pdev) != 0)
 		return -EINVAL;
 
+#ifdef CONFIG_PPC32
 	if (pdata->big_endian_mmio) {
 		_fsl_readl = _fsl_readl_be;
 		_fsl_writel = _fsl_writel_be;
@@ -886,6 +887,7 @@ int usb_otg_start(struct platform_device *pdev)
 		_fsl_readl = _fsl_readl_le;
 		_fsl_writel = _fsl_writel_le;
 	}
+#endif
 
 	/* request irq */
 	p_otg->irq = platform_get_irq(pdev, 0);
@@ -976,7 +978,7 @@ int usb_otg_start(struct platform_device *pdev)
 /*
  * state file in sysfs
  */
-static int show_fsl_usb2_otg_state(struct device *dev,
+static ssize_t show_fsl_usb2_otg_state(struct device *dev,
 				   struct device_attribute *attr, char *buf)
 {
 	struct otg_fsm *fsm = &fsl_otg_dev->fsm;

From 7b85bc4bea0f1c922315aaf9451c544b69dc20f6 Mon Sep 17 00:00:00 2001
From: Peter Senna Tschudin <peter.senna@gmail.com>
Date: Tue, 10 Jul 2018 16:01:45 +0200
Subject: [PATCH 453/519] tools: usb: ffs-test: Fix build on big endian systems
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit a2b22dddc7bb6110ac3b5ed1a60aa9279836fadb ]

The tools/usb/ffs-test.c file defines cpu_to_le16/32 by using the C
library htole16/32 function calls. However, cpu_to_le16/32 are used when
initializing structures, i.e in a context where a function call is not
allowed.

It works fine on little endian systems because htole16/32 are defined by
the C library as no-ops. But on big-endian systems, they are actually
doing something, which might involve calling a function, causing build
failures, such as:

   ffs-test.c:48:25: error: initializer element is not constant
    #define cpu_to_le32(x)  htole32(x)
                            ^~~~~~~
   ffs-test.c:128:12: note: in expansion of macro ‘cpu_to_le32’
      .magic = cpu_to_le32(FUNCTIONFS_DESCRIPTORS_MAGIC_V2),
               ^~~~~~~~~~~

To solve this, we code cpu_to_le16/32 in a way that allows them to be
used when initializing structures. This fix was imported from
meta-openembedded/android-tools/fix-big-endian-build.patch written by
Thomas Petazzoni <thomas.petazzoni@free-electrons.com>.

CC: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Peter Senna Tschudin <peter.senna@gmail.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/usb/ffs-test.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/tools/usb/ffs-test.c b/tools/usb/ffs-test.c
index 88d5e71be044..47dfa0b0fcd7 100644
--- a/tools/usb/ffs-test.c
+++ b/tools/usb/ffs-test.c
@@ -44,12 +44,25 @@
 
 /******************** Little Endian Handling ********************************/
 
-#define cpu_to_le16(x)  htole16(x)
-#define cpu_to_le32(x)  htole32(x)
+/*
+ * cpu_to_le16/32 are used when initializing structures, a context where a
+ * function call is not allowed. To solve this, we code cpu_to_le16/32 in a way
+ * that allows them to be used when initializing structures.
+ */
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define cpu_to_le16(x)  (x)
+#define cpu_to_le32(x)  (x)
+#else
+#define cpu_to_le16(x)  ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8))
+#define cpu_to_le32(x)  \
+	((((x) & 0xff000000u) >> 24) | (((x) & 0x00ff0000u) >>  8) | \
+	(((x) & 0x0000ff00u) <<  8) | (((x) & 0x000000ffu) << 24))
+#endif
+
 #define le32_to_cpu(x)  le32toh(x)
 #define le16_to_cpu(x)  le16toh(x)
 
-
 /******************** Messages and Errors ***********************************/
 
 static const char argv0[] = "ffs-test";

From 801a35b0963c1d1af581486ac0461ad41cea6a6f Mon Sep 17 00:00:00 2001
From: Eugeniu Rosca <roscaeugeniu@gmail.com>
Date: Mon, 2 Jul 2018 23:46:47 +0200
Subject: [PATCH 454/519] usb: gadget: f_uac2: fix endianness of 'struct
 cntrl_*_lay3'

[ Upstream commit eec24f2a0d4dc3b1d95a3ccd2feb523ede3ba775 ]

The list [1] of commits doing endianness fixes in USB subsystem is long
due to below quote from USB spec Revision 2.0 from April 27, 2000:

------------
8.1 Byte/Bit Ordering

Multiple byte fields in standard descriptors, requests, and responses
are interpreted as and moved over the bus in little-endian order, i.e.
LSB to MSB.
------------

This commit belongs to the same family.

[1] Example of endianness fixes in USB subsystem:
commit 14e1d56cbea6 ("usb: gadget: f_uac2: endianness fixes.")
commit 42370b821168 ("usb: gadget: f_uac1: endianness fixes.")
commit 63afd5cc7877 ("USB: chaoskey: fix Alea quirk on big-endian hosts")
commit 74098c4ac782 ("usb: gadget: acm: fix endianness in notifications")
commit cdd7928df0d2 ("ACM gadget: fix endianness in notifications")
commit 323ece54e076 ("cdc-wdm: fix endianness bug in debug statements")
commit e102609f1072 ("usb: gadget: uvc: Fix endianness mismatches")
       list goes on

Fixes: 132fcb460839 ("usb: gadget: Add Audio Class 2.0 Driver")
Signed-off-by: Eugeniu Rosca <erosca@de.adit-jv.com>
Reviewed-by: Ruslan Bilovol <ruslan.bilovol@gmail.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>

Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/f_uac2.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c
index b5dab103be38..e931c3cb0840 100644
--- a/drivers/usb/gadget/function/f_uac2.c
+++ b/drivers/usb/gadget/function/f_uac2.c
@@ -941,14 +941,14 @@ static struct usb_descriptor_header *hs_audio_desc[] = {
 };
 
 struct cntrl_cur_lay3 {
-	__u32	dCUR;
+	__le32	dCUR;
 };
 
 struct cntrl_range_lay3 {
-	__u16	wNumSubRanges;
-	__u32	dMIN;
-	__u32	dMAX;
-	__u32	dRES;
+	__le16	wNumSubRanges;
+	__le32	dMIN;
+	__le32	dMAX;
+	__le32	dRES;
 } __packed;
 
 static inline void
@@ -1296,9 +1296,9 @@ in_rq_cur(struct usb_function *fn, const struct usb_ctrlrequest *cr)
 		memset(&c, 0, sizeof(struct cntrl_cur_lay3));
 
 		if (entity_id == USB_IN_CLK_ID)
-			c.dCUR = p_srate;
+			c.dCUR = cpu_to_le32(p_srate);
 		else if (entity_id == USB_OUT_CLK_ID)
-			c.dCUR = c_srate;
+			c.dCUR = cpu_to_le32(c_srate);
 
 		value = min_t(unsigned, w_length, sizeof c);
 		memcpy(req->buf, &c, value);
@@ -1336,15 +1336,15 @@ in_rq_range(struct usb_function *fn, const struct usb_ctrlrequest *cr)
 
 	if (control_selector == UAC2_CS_CONTROL_SAM_FREQ) {
 		if (entity_id == USB_IN_CLK_ID)
-			r.dMIN = p_srate;
+			r.dMIN = cpu_to_le32(p_srate);
 		else if (entity_id == USB_OUT_CLK_ID)
-			r.dMIN = c_srate;
+			r.dMIN = cpu_to_le32(c_srate);
 		else
 			return -EOPNOTSUPP;
 
 		r.dMAX = r.dMIN;
 		r.dRES = 0;
-		r.wNumSubRanges = 1;
+		r.wNumSubRanges = cpu_to_le16(1);
 
 		value = min_t(unsigned, w_length, sizeof r);
 		memcpy(req->buf, &r, value);

From 1fa903cb3b09af0c32888fbf8dde4d3def60fb6f Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 20 Jul 2018 14:47:03 -0400
Subject: [PATCH 455/519] tools/power turbostat: fix -S on UP systems

[ Upstream commit 9d83601a9cc1884d1b5706ee2acc661d558c6838 ]

The -S (system summary) option failed to print any data on a 1-processor system.

Reported-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/power/x86/turbostat/turbostat.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 0dac7e05a6ac..36e0d255d1b6 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -663,9 +663,7 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_
 	if (!printed || !summary_only)
 		print_header();
 
-	if (topo.num_cpus > 1)
-		format_counters(&average.threads, &average.cores,
-			&average.packages);
+	format_counters(&average.threads, &average.cores, &average.packages);
 
 	printed = 1;
 

From 7193329df84d8d346585e22ea1b800b8fa177124 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 19 Jul 2018 10:27:13 +0800
Subject: [PATCH 456/519] net: caif: Add a missing rcu_read_unlock() in
 caif_flow_cb

[ Upstream commit 64119e05f7b31e83e2555f6782e6cdc8f81c63f4 ]

Add a missing rcu_read_unlock in the error path

Fixes: c95567c80352 ("caif: added check for potential null return")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/caif/caif_dev.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index d730a0f68f46..a0443d40d677 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -131,8 +131,10 @@ static void caif_flow_cb(struct sk_buff *skb)
 	caifd = caif_get(skb->dev);
 
 	WARN_ON(caifd == NULL);
-	if (caifd == NULL)
+	if (!caifd) {
+		rcu_read_unlock();
 		return;
+	}
 
 	caifd_hold(caifd);
 	rcu_read_unlock();

From af130c7f1d432510034bdd10332188ec46dcc04c Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@cavium.com>
Date: Wed, 18 Jul 2018 22:50:03 -0700
Subject: [PATCH 457/519] qed: Fix possible race for the link state value.

[ Upstream commit 58874c7b246109d8efb2b0099d1aa296d6bfc3fa ]

There's a possible race where driver can read link status in mid-transition
and see that virtual-link is up yet speed is 0. Since in this
mid-transition we're guaranteed to see a mailbox from MFW soon, we can
afford to treat this as link down.

Fixes: cc875c2e ("qed: Add link support")
Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Ariel Elior <ariel.elior@cavium.com>
Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/qlogic/qed/qed_mcp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 20d048cdcb88..c898006abb32 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -420,6 +420,7 @@ static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn,
 		break;
 	default:
 		p_link->speed = 0;
+		p_link->link_up = 0;
 	}
 
 	/* Correct speed according to bandwidth allocation */

From 405d3fdb6b8671d73377f619e1971ce4fece7a50 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 20 Jul 2018 19:30:57 +0200
Subject: [PATCH 458/519] atl1c: reserve min skb headroom

[ Upstream commit 6e56830776828d8ca9897fc4429eeab47c3bb432 ]

Got crash report with following backtrace:
BUG: unable to handle kernel paging request at ffff8801869daffe
RIP: 0010:[<ffffffff816429c4>]  [<ffffffff816429c4>] ip6_finish_output2+0x394/0x4c0
RSP: 0018:ffff880186c83a98  EFLAGS: 00010283
RAX: ffff8801869db00e ...
  [<ffffffff81644cdc>] ip6_finish_output+0x8c/0xf0
  [<ffffffff81644d97>] ip6_output+0x57/0x100
  [<ffffffff81643dc9>] ip6_forward+0x4b9/0x840
  [<ffffffff81645566>] ip6_rcv_finish+0x66/0xc0
  [<ffffffff81645db9>] ipv6_rcv+0x319/0x530
  [<ffffffff815892ac>] netif_receive_skb+0x1c/0x70
  [<ffffffffc0060bec>] atl1c_clean+0x1ec/0x310 [atl1c]
  ...

The bad access is in neigh_hh_output(), at skb->data - 16 (HH_DATA_MOD).
atl1c driver provided skb with no headroom, so 14 bytes (ethernet
header) got pulled, but then 16 are copied.

Reserve NET_SKB_PAD bytes headroom, like netdev_alloc_skb().

Compile tested only; I lack hardware.

Fixes: 7b7017642199 ("atl1c: Fix misuse of netdev_alloc_skb in refilling rx ring")
Signed-off-by: Florian Westphal <fw@strlen.de>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 8b5988e210d5..c08d34f618b9 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -1683,6 +1683,7 @@ static struct sk_buff *atl1c_alloc_skb(struct atl1c_adapter *adapter)
 	skb = build_skb(page_address(page) + adapter->rx_page_offset,
 			adapter->rx_frag_size);
 	if (likely(skb)) {
+		skb_reserve(skb, NET_SKB_PAD);
 		adapter->rx_page_offset += adapter->rx_frag_size;
 		if (adapter->rx_page_offset >= PAGE_SIZE)
 			adapter->rx_page = NULL;

From 66673aace3d6fb5539c7e4c6c2bb589ffed51dab Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 21 Jul 2018 12:59:25 -0700
Subject: [PATCH 459/519] net: prevent ISA drivers from building on PPC32

[ Upstream commit c9ce1fa1c24b08e13c2a3b5b1f94a19c9eaa982c ]

Prevent drivers from building on PPC32 if they use isa_bus_to_virt(),
isa_virt_to_bus(), or isa_page_to_bus(), which are not available and
thus cause build errors.

../drivers/net/ethernet/3com/3c515.c: In function 'corkscrew_open':
../drivers/net/ethernet/3com/3c515.c:824:9: error: implicit declaration of function 'isa_virt_to_bus'; did you mean 'virt_to_bus'? [-Werror=implicit-function-declaration]

../drivers/net/ethernet/amd/lance.c: In function 'lance_rx':
../drivers/net/ethernet/amd/lance.c:1203:23: error: implicit declaration of function 'isa_bus_to_virt'; did you mean 'bus_to_virt'? [-Werror=implicit-function-declaration]

../drivers/net/ethernet/amd/ni65.c: In function 'ni65_init_lance':
../drivers/net/ethernet/amd/ni65.c:585:20: error: implicit declaration of function 'isa_virt_to_bus'; did you mean 'virt_to_bus'? [-Werror=implicit-function-declaration]

../drivers/net/ethernet/cirrus/cs89x0.c: In function 'net_open':
../drivers/net/ethernet/cirrus/cs89x0.c:897:20: error: implicit declaration of function 'isa_virt_to_bus'; did you mean 'virt_to_bus'? [-Werror=implicit-function-declaration]

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Suggested-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/3com/Kconfig   | 2 +-
 drivers/net/ethernet/amd/Kconfig    | 4 ++--
 drivers/net/ethernet/cirrus/Kconfig | 1 +
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/3com/Kconfig b/drivers/net/ethernet/3com/Kconfig
index 5b7658bcf020..5c3ef9fc8207 100644
--- a/drivers/net/ethernet/3com/Kconfig
+++ b/drivers/net/ethernet/3com/Kconfig
@@ -32,7 +32,7 @@ config EL3
 
 config 3C515
 	tristate "3c515 ISA \"Fast EtherLink\""
-	depends on ISA && ISA_DMA_API
+	depends on ISA && ISA_DMA_API && !PPC32
 	---help---
 	  If you have a 3Com ISA EtherLink XL "Corkscrew" 3c515 Fast Ethernet
 	  network card, say Y here.
diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig
index 0038709fd317..ec59425fdbff 100644
--- a/drivers/net/ethernet/amd/Kconfig
+++ b/drivers/net/ethernet/amd/Kconfig
@@ -44,7 +44,7 @@ config AMD8111_ETH
 
 config LANCE
 	tristate "AMD LANCE and PCnet (AT1500 and NE2100) support"
-	depends on ISA && ISA_DMA_API && !ARM
+	depends on ISA && ISA_DMA_API && !ARM && !PPC32
 	---help---
 	  If you have a network (Ethernet) card of this type, say Y here.
 	  Some LinkSys cards are of this type.
@@ -138,7 +138,7 @@ config PCMCIA_NMCLAN
 
 config NI65
 	tristate "NI6510 support"
-	depends on ISA && ISA_DMA_API && !ARM
+	depends on ISA && ISA_DMA_API && !ARM && !PPC32
 	---help---
 	  If you have a network (Ethernet) card of this type, say Y here.
 
diff --git a/drivers/net/ethernet/cirrus/Kconfig b/drivers/net/ethernet/cirrus/Kconfig
index 5ab912937aff..ec0b545197e2 100644
--- a/drivers/net/ethernet/cirrus/Kconfig
+++ b/drivers/net/ethernet/cirrus/Kconfig
@@ -19,6 +19,7 @@ if NET_VENDOR_CIRRUS
 config CS89x0
 	tristate "CS89x0 support"
 	depends on ISA || EISA || ARM
+	depends on !PPC32
 	---help---
 	  Support for CS89x0 chipset based Ethernet cards. If you have a
 	  network (Ethernet) card of this type, say Y and read the file

From ae34cbea8c5378ca3b2a423260f4bc80e04a49d7 Mon Sep 17 00:00:00 2001
From: Nicholas Mc Guire <hofrat@osadl.org>
Date: Mon, 9 Jul 2018 21:16:40 +0200
Subject: [PATCH 460/519] can: mpc5xxx_can: check of_iomap return before use

[ Upstream commit b5c1a23b17e563b656cc9bb76ce5323b997d90e8 ]

of_iomap() can return NULL so that return needs to be checked and NULL
treated as failure. While at it also take care of the missing
of_node_put() in the error path.

Signed-off-by: Nicholas Mc Guire <hofrat@osadl.org>
Fixes: commit afa17a500a36 ("net/can: add driver for mscan family & mpc52xx_mscan")
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/can/mscan/mpc5xxx_can.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/can/mscan/mpc5xxx_can.c b/drivers/net/can/mscan/mpc5xxx_can.c
index c7427bdd3a4b..2949a381a94d 100644
--- a/drivers/net/can/mscan/mpc5xxx_can.c
+++ b/drivers/net/can/mscan/mpc5xxx_can.c
@@ -86,6 +86,11 @@ static u32 mpc52xx_can_get_clock(struct platform_device *ofdev,
 		return 0;
 	}
 	cdm = of_iomap(np_cdm, 0);
+	if (!cdm) {
+		of_node_put(np_cdm);
+		dev_err(&ofdev->dev, "can't map clock node!\n");
+		return 0;
+	}
 
 	if (in_8(&cdm->ipb_clk_sel) & 0x1)
 		freq *= 2;

From ca08b42d1f69d4af1037dbb0091120cc583c6d29 Mon Sep 17 00:00:00 2001
From: Alexander Sverdlin <alexander.sverdlin@nokia.com>
Date: Fri, 13 Jul 2018 17:20:17 +0200
Subject: [PATCH 461/519] i2c: davinci: Avoid zero value of CLKH

[ Upstream commit cc8de9a68599b261244ea453b38678229f06ada7 ]

If CLKH is set to 0 I2C clock is not generated at all, so avoid this value
and stretch the clock in this case.

Signed-off-by: Alexander Sverdlin <alexander.sverdlin@nokia.com>
Acked-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/i2c/busses/i2c-davinci.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/i2c/busses/i2c-davinci.c b/drivers/i2c/busses/i2c-davinci.c
index a8bdcb5292f5..57f6eb1427b4 100644
--- a/drivers/i2c/busses/i2c-davinci.c
+++ b/drivers/i2c/busses/i2c-davinci.c
@@ -234,12 +234,16 @@ static void i2c_davinci_calc_clk_dividers(struct davinci_i2c_dev *dev)
 	/*
 	 * It's not always possible to have 1 to 2 ratio when d=7, so fall back
 	 * to minimal possible clkh in this case.
+	 *
+	 * Note:
+	 * CLKH is not allowed to be 0, in this case I2C clock is not generated
+	 * at all
 	 */
-	if (clk >= clkl + d) {
+	if (clk > clkl + d) {
 		clkh = clk - clkl - d;
 		clkl -= d;
 	} else {
-		clkh = 0;
+		clkh = 1;
 		clkl = clk - (d << 1);
 	}
 

From b7e90154dc16d9538067fe7012a14447d341c43a Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Mon, 23 Jul 2018 14:39:33 -0700
Subject: [PATCH 462/519] media: staging: omap4iss: Include asm/cacheflush.h
 after generic includes

[ Upstream commit 0894da849f145af51bde88a6b84f95b9c9e0bc66 ]

Including asm/cacheflush.h first results in the following build error
when trying to build sparc32:allmodconfig, because 'struct page' has not
been declared, and the function declaration ends up creating a separate
(private) declaration of struct page (as a result of function arguments
being in the scope of the function declaration and definition, not in
global scope).

The C scoping rules do not just affect variable visibility, they also
affect type declaration visibility.

The end result is that when the actual call site is seen in
<linux/highmem.h>, the 'struct page' type in the caller is not the same
'struct page' that the function was declared with, resulting in:

  In file included from arch/sparc/include/asm/page.h:10:0,
                   ...
                   from drivers/staging/media/omap4iss/iss_video.c:15:
  include/linux/highmem.h: In function 'clear_user_highpage':
  include/linux/highmem.h:137:31: error:
	passing argument 1 of 'sparc_flush_page_to_ram' from incompatible
	pointer type

Include generic includes files first to fix the problem.

Fixes: fc96d58c10162 ("[media] v4l: omap4iss: Add support for OMAP4 camera interface - Video devices")
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
[ Added explanation of C scope rules - Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/media/omap4iss/iss_video.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/media/omap4iss/iss_video.c b/drivers/staging/media/omap4iss/iss_video.c
index 2a0158bb4974..5a78ef057635 100644
--- a/drivers/staging/media/omap4iss/iss_video.c
+++ b/drivers/staging/media/omap4iss/iss_video.c
@@ -11,7 +11,6 @@
  * (at your option) any later version.
  */
 
-#include <asm/cacheflush.h>
 #include <linux/clk.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
@@ -22,6 +21,8 @@
 #include <media/v4l2-dev.h>
 #include <media/v4l2-ioctl.h>
 
+#include <asm/cacheflush.h>
+
 #include "iss_video.h"
 #include "iss.h"
 

From 21aee5fe5b4b399123d28628ca31c7823d532887 Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@cavium.com>
Date: Tue, 24 Jul 2018 02:43:52 -0700
Subject: [PATCH 463/519] bnx2x: Fix invalid memory access in rss hash config
 path.

[ Upstream commit ae2dcb28c24794a87e424a726a1cf1a61980f52d ]

Rx hash/filter table configuration uses rss_conf_obj to configure filters
in the hardware. This object is initialized only when the interface is
brought up.
This patch adds driver changes to configure rss params only when the device
is in opened state. In port disabled case, the config will be cached in the
driver structure which will be applied in the successive load path.

Please consider applying it to 'net' branch.

Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index d84efcd34fac..c56b61dce2d1 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -3360,14 +3360,18 @@ static int bnx2x_set_rss_flags(struct bnx2x *bp, struct ethtool_rxnfc *info)
 			DP(BNX2X_MSG_ETHTOOL,
 			   "rss re-configured, UDP 4-tupple %s\n",
 			   udp_rss_requested ? "enabled" : "disabled");
-			return bnx2x_rss(bp, &bp->rss_conf_obj, false, true);
+			if (bp->state == BNX2X_STATE_OPEN)
+				return bnx2x_rss(bp, &bp->rss_conf_obj, false,
+						 true);
 		} else if ((info->flow_type == UDP_V6_FLOW) &&
 			   (bp->rss_conf_obj.udp_rss_v6 != udp_rss_requested)) {
 			bp->rss_conf_obj.udp_rss_v6 = udp_rss_requested;
 			DP(BNX2X_MSG_ETHTOOL,
 			   "rss re-configured, UDP 4-tupple %s\n",
 			   udp_rss_requested ? "enabled" : "disabled");
-			return bnx2x_rss(bp, &bp->rss_conf_obj, false, true);
+			if (bp->state == BNX2X_STATE_OPEN)
+				return bnx2x_rss(bp, &bp->rss_conf_obj, false,
+						 true);
 		}
 		return 0;
 
@@ -3481,7 +3485,10 @@ static int bnx2x_set_rxfh(struct net_device *dev, const u32 *indir,
 		bp->rss_conf_obj.ind_table[i] = indir[i] + bp->fp->cl_id;
 	}
 
-	return bnx2x_config_rss_eth(bp, false);
+	if (bp->state == BNX2X_STATE_OPEN)
+		return bnx2x_config_rss_eth(bp, false);
+
+	return 0;
 }
 
 /**

From 7022f61c15dc06efe8d4af4711dac139f2eea121 Mon Sep 17 00:00:00 2001
From: Shubhrajyoti Datta <shubhrajyoti.datta@xilinx.com>
Date: Tue, 24 Jul 2018 10:09:53 +0530
Subject: [PATCH 464/519] net: axienet: Fix double deregister of mdio

[ Upstream commit 03bc7cab7d7218088412a75e141696a89059ab00 ]

If the registration fails then mdio_unregister is called.
However at unbind the unregister ia attempted again resulting
in the below crash

[   73.544038] kernel BUG at drivers/net/phy/mdio_bus.c:415!
[   73.549362] Internal error: Oops - BUG: 0 [#1] SMP
[   73.554127] Modules linked in:
[   73.557168] CPU: 0 PID: 2249 Comm: sh Not tainted 4.14.0 #183
[   73.562895] Hardware name: xlnx,zynqmp (DT)
[   73.567062] task: ffffffc879e41180 task.stack: ffffff800cbe0000
[   73.572973] PC is at mdiobus_unregister+0x84/0x88
[   73.577656] LR is at axienet_mdio_teardown+0x18/0x30
[   73.582601] pc : [<ffffff80085fa4cc>] lr : [<ffffff8008616858>]
pstate: 20000145
[   73.589981] sp : ffffff800cbe3c30
[   73.593277] x29: ffffff800cbe3c30 x28: ffffffc879e41180
[   73.598573] x27: ffffff8008a21000 x26: 0000000000000040
[   73.603868] x25: 0000000000000124 x24: ffffffc879efe920
[   73.609164] x23: 0000000000000060 x22: ffffffc879e02000
[   73.614459] x21: ffffffc879e02800 x20: ffffffc87b0b8870
[   73.619754] x19: ffffffc879e02800 x18: 000000000000025d
[   73.625050] x17: 0000007f9a719ad0 x16: ffffff8008195bd8
[   73.630345] x15: 0000007f9a6b3d00 x14: 0000000000000010
[   73.635640] x13: 74656e7265687465 x12: 0000000000000030
[   73.640935] x11: 0000000000000030 x10: 0101010101010101
[   73.646231] x9 : 241f394f42533300 x8 : ffffffc8799f6e98
[   73.651526] x7 : ffffffc8799f6f18 x6 : ffffffc87b0ba318
[   73.656822] x5 : ffffffc87b0ba498 x4 : 0000000000000000
[   73.662117] x3 : 0000000000000000 x2 : 0000000000000008
[   73.667412] x1 : 0000000000000004 x0 : ffffffc8799f4000
[   73.672708] Process sh (pid: 2249, stack limit = 0xffffff800cbe0000)

Fix the same by making the bus NULL on unregister.

Signed-off-by: Shubhrajyoti Datta <shubhrajyoti.datta@xilinx.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c
index 507bbb0355c2..f6108413adba 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c
@@ -218,6 +218,7 @@ int axienet_mdio_setup(struct axienet_local *lp, struct device_node *np)
 	ret = of_mdiobus_register(bus, np1);
 	if (ret) {
 		mdiobus_free(bus);
+		lp->mii_bus = NULL;
 		return ret;
 	}
 	return 0;

From 58b96dea990fa8d440e55e1ccb55ef568ac5cf52 Mon Sep 17 00:00:00 2001
From: Kiran Kumar Modukuri <kiran.modukuri@gmail.com>
Date: Wed, 25 Jul 2018 14:31:20 +0100
Subject: [PATCH 465/519] fscache: Allow cancelled operations to be enqueued

[ Upstream commit d0eb06afe712b7b103b6361f40a9a0c638524669 ]

Alter the state-check assertion in fscache_enqueue_operation() to allow
cancelled operations to be given processing time so they can be cleaned up.

Also fix a debugging statement that was requiring such operations to have
an object assigned.

Fixes: 9ae326a69004 ("CacheFiles: A cache that backs onto a mounted filesystem")
Reported-by: Kiran Kumar Modukuri <kiran.modukuri@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/fscache/operation.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index de67745e1cd7..77946d6f617d 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -66,7 +66,8 @@ void fscache_enqueue_operation(struct fscache_operation *op)
 	ASSERT(op->processor != NULL);
 	ASSERT(fscache_object_is_available(op->object));
 	ASSERTCMP(atomic_read(&op->usage), >, 0);
-	ASSERTCMP(op->state, ==, FSCACHE_OP_ST_IN_PROGRESS);
+	ASSERTIFCMP(op->state != FSCACHE_OP_ST_IN_PROGRESS,
+		    op->state, ==,  FSCACHE_OP_ST_CANCELLED);
 
 	fscache_stat(&fscache_n_op_enqueue);
 	switch (op->flags & FSCACHE_OP_TYPE) {
@@ -481,7 +482,8 @@ void fscache_put_operation(struct fscache_operation *op)
 	struct fscache_cache *cache;
 
 	_enter("{OBJ%x OP%x,%d}",
-	       op->object->debug_id, op->debug_id, atomic_read(&op->usage));
+	       op->object ? op->object->debug_id : 0,
+	       op->debug_id, atomic_read(&op->usage));
 
 	ASSERTCMP(atomic_read(&op->usage), >, 0);
 

From 06144b250b7a7981a7e60f5614e13777fea8dae7 Mon Sep 17 00:00:00 2001
From: Kiran Kumar Modukuri <kiran.modukuri@gmail.com>
Date: Tue, 18 Jul 2017 16:25:49 -0700
Subject: [PATCH 466/519] cachefiles: Fix refcounting bug in backing-file read
 monitoring

[ Upstream commit 934140ab028713a61de8bca58c05332416d037d1 ]

cachefiles_read_waiter() has the right to access a 'monitor' object by
virtue of being called under the waitqueue lock for one of the pages in its
purview.  However, it has no ref on that monitor object or on the
associated operation.

What it is allowed to do is to move the monitor object to the operation's
to_do list, but once it drops the work_lock, it's actually no longer
permitted to access that object.  However, it is trying to enqueue the
retrieval operation for processing - but it can only do this via a pointer
in the monitor object, something it shouldn't be doing.

If it doesn't enqueue the operation, the operation may not get processed.
If the order is flipped so that the enqueue is first, then it's possible
for the work processor to look at the to_do list before the monitor is
enqueued upon it.

Fix this by getting a ref on the operation so that we can trust that it
will still be there once we've added the monitor to the to_do list and
dropped the work_lock.  The op can then be enqueued after the lock is
dropped.

The bug can manifest in one of a couple of ways.  The first manifestation
looks like:

 FS-Cache:
 FS-Cache: Assertion failed
 FS-Cache: 6 == 5 is false
 ------------[ cut here ]------------
 kernel BUG at fs/fscache/operation.c:494!
 RIP: 0010:fscache_put_operation+0x1e3/0x1f0
 ...
 fscache_op_work_func+0x26/0x50
 process_one_work+0x131/0x290
 worker_thread+0x45/0x360
 kthread+0xf8/0x130
 ? create_worker+0x190/0x190
 ? kthread_cancel_work_sync+0x10/0x10
 ret_from_fork+0x1f/0x30

This is due to the operation being in the DEAD state (6) rather than
INITIALISED, COMPLETE or CANCELLED (5) because it's already passed through
fscache_put_operation().

The bug can also manifest like the following:

 kernel BUG at fs/fscache/operation.c:69!
 ...
    [exception RIP: fscache_enqueue_operation+246]
 ...
 #7 [ffff883fff083c10] fscache_enqueue_operation at ffffffffa0b793c6
 #8 [ffff883fff083c28] cachefiles_read_waiter at ffffffffa0b15a48
 #9 [ffff883fff083c48] __wake_up_common at ffffffff810af028

I'm not entirely certain as to which is line 69 in Lei's kernel, so I'm not
entirely clear which assertion failed.

Fixes: 9ae326a69004 ("CacheFiles: A cache that backs onto a mounted filesystem")
Reported-by: Lei Xue <carmark.dlut@gmail.com>
Reported-by: Vegard Nossum <vegard.nossum@gmail.com>
Reported-by: Anthony DeRobertis <aderobertis@metrics.net>
Reported-by: NeilBrown <neilb@suse.com>
Reported-by: Daniel Axtens <dja@axtens.net>
Reported-by: Kiran Kumar Modukuri <kiran.modukuri@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Daniel Axtens <dja@axtens.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/cachefiles/rdwr.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index c0f3da3926a0..5b68cf526887 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -27,6 +27,7 @@ static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode,
 	struct cachefiles_one_read *monitor =
 		container_of(wait, struct cachefiles_one_read, monitor);
 	struct cachefiles_object *object;
+	struct fscache_retrieval *op = monitor->op;
 	struct wait_bit_key *key = _key;
 	struct page *page = wait->private;
 
@@ -51,16 +52,22 @@ static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode,
 	list_del(&wait->task_list);
 
 	/* move onto the action list and queue for FS-Cache thread pool */
-	ASSERT(monitor->op);
+	ASSERT(op);
 
-	object = container_of(monitor->op->op.object,
-			      struct cachefiles_object, fscache);
+	/* We need to temporarily bump the usage count as we don't own a ref
+	 * here otherwise cachefiles_read_copier() may free the op between the
+	 * monitor being enqueued on the op->to_do list and the op getting
+	 * enqueued on the work queue.
+	 */
+	fscache_get_retrieval(op);
 
+	object = container_of(op->op.object, struct cachefiles_object, fscache);
 	spin_lock(&object->work_lock);
-	list_add_tail(&monitor->op_link, &monitor->op->to_do);
+	list_add_tail(&monitor->op_link, &op->to_do);
 	spin_unlock(&object->work_lock);
 
-	fscache_enqueue_retrieval(monitor->op);
+	fscache_enqueue_retrieval(op);
+	fscache_put_retrieval(op);
 	return 0;
 }
 

From 9dbea3f6b80638031e4c65285f0b9a60504f1f43 Mon Sep 17 00:00:00 2001
From: Kiran Kumar Modukuri <kiran.modukuri@gmail.com>
Date: Thu, 21 Jun 2018 13:25:53 -0700
Subject: [PATCH 467/519] cachefiles: Wait rather than BUG'ing on "Unexpected
 object collision"

[ Upstream commit c2412ac45a8f8f1cd582723c1a139608694d410d ]

If we meet a conflicting object that is marked FSCACHE_OBJECT_IS_LIVE in
the active object tree, we have been emitting a BUG after logging
information about it and the new object.

Instead, we should wait for the CACHEFILES_OBJECT_ACTIVE flag to be cleared
on the old object (or return an error).  The ACTIVE flag should be cleared
after it has been removed from the active object tree.  A timeout of 60s is
used in the wait, so we shouldn't be able to get stuck there.

Fixes: 9ae326a69004 ("CacheFiles: A cache that backs onto a mounted filesystem")
Signed-off-by: Kiran Kumar Modukuri <kiran.modukuri@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/cachefiles/namei.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index c4b893453e0e..c43b4b08546b 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -194,7 +194,6 @@ static int cachefiles_mark_object_active(struct cachefiles_cache *cache,
 		pr_err("\n");
 		pr_err("Error: Unexpected object collision\n");
 		cachefiles_printk_object(object, xobject);
-		BUG();
 	}
 	atomic_inc(&xobject->usage);
 	write_unlock(&cache->active_lock);

From b142f71f17dbd1c203d51b975b98945e7dc310fd Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Sat, 14 Jul 2018 01:28:44 +0900
Subject: [PATCH 468/519] selftests/ftrace: Add snapshot and tracing_on test
 case

[ Upstream commit 82f4f3e69c5c29bce940dd87a2c0f16c51d48d17 ]

Add a testcase for checking snapshot and tracing_on
relationship. This ensures that the snapshotting doesn't
affect current tracing on/off settings.

Link: http://lkml.kernel.org/r/153149932412.11274.15289227592627901488.stgit@devbox

Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Hiraku Toyooka <hiraku.toyooka@cybertrust.co.jp>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: linux-kselftest@vger.kernel.org
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../ftrace/test.d/00basic/snapshot.tc         | 28 +++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc

diff --git a/tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc b/tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc
new file mode 100644
index 000000000000..3b1f45e13a2e
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc
@@ -0,0 +1,28 @@
+#!/bin/sh
+# description: Snapshot and tracing setting
+# flags: instance
+
+[ ! -f snapshot ] && exit_unsupported
+
+echo "Set tracing off"
+echo 0 > tracing_on
+
+echo "Allocate and take a snapshot"
+echo 1 > snapshot
+
+# Since trace buffer is empty, snapshot is also empty, but allocated
+grep -q "Snapshot is allocated" snapshot
+
+echo "Ensure keep tracing off"
+test `cat tracing_on` -eq 0
+
+echo "Set tracing on"
+echo 1 > tracing_on
+
+echo "Take a snapshot again"
+echo 1 > snapshot
+
+echo "Ensure keep tracing on"
+test `cat tracing_on` -eq 1
+
+exit 0

From 0d35e0188a97a89cf92d16250e505d04707d56b3 Mon Sep 17 00:00:00 2001
From: Li Wang <liwang@redhat.com>
Date: Thu, 26 Jul 2018 16:37:42 -0700
Subject: [PATCH 469/519] zswap: re-check zswap_is_full() after do
 zswap_shrink()

[ Upstream commit 16e536ef47f567289a5699abee9ff7bb304bc12d ]

/sys/../zswap/stored_pages keeps rising in a zswap test with
"zswap.max_pool_percent=0" parameter.  But it should not compress or
store pages any more since there is no space in the compressed pool.

Reproduce steps:
  1. Boot kernel with "zswap.enabled=1"
  2. Set the max_pool_percent to 0
      # echo 0 > /sys/module/zswap/parameters/max_pool_percent
  3. Do memory stress test to see if some pages have been compressed
      # stress --vm 1 --vm-bytes $mem_available"M" --timeout 60s
  4. Watching the 'stored_pages' number increasing or not

The root cause is:

  When zswap_max_pool_percent is set to 0 via kernel parameter,
  zswap_is_full() will always return true due to zswap_shrink().  But if
  the shinking is able to reclain a page successfully the code then
  proceeds to compressing/storing another page, so the value of
  stored_pages will keep changing.

To solve the issue, this patch adds a zswap_is_full() check again after
  zswap_shrink() to make sure it's now under the max_pool_percent, and to
  not compress/store if we reached the limit.

Link: http://lkml.kernel.org/r/20180530103936.17812-1-liwang@redhat.com
Signed-off-by: Li Wang <liwang@redhat.com>
Acked-by: Dan Streetman <ddstreet@ieee.org>
Cc: Seth Jennings <sjenning@redhat.com>
Cc: Huang Ying <huang.ying.caritas@gmail.com>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/zswap.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/mm/zswap.c b/mm/zswap.c
index 568015e2fe7a..87a8491909ee 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1018,6 +1018,15 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
 			ret = -ENOMEM;
 			goto reject;
 		}
+
+		/* A second zswap_is_full() check after
+		 * zswap_shrink() to make sure it's now
+		 * under the max_pool_percent
+		 */
+		if (zswap_is_full()) {
+			ret = -ENOMEM;
+			goto reject;
+		}
 	}
 
 	/* allocate entry */

From 27629a00ad18a05669e19ec5f48b0c6d33dba61a Mon Sep 17 00:00:00 2001
From: Calvin Walton <calvin.walton@kepstin.ca>
Date: Fri, 27 Jul 2018 07:50:53 -0400
Subject: [PATCH 470/519] tools/power turbostat: Read extended processor family
 from CPUID

[ Upstream commit 5aa3d1a20a233d4a5f1ec3d62da3f19d9afea682 ]

This fixes the reported family on modern AMD processors (e.g. Ryzen,
which is family 0x17). Previously these processors all showed up as
family 0xf.

See the document
https://support.amd.com/TechDocs/56255_OSRR.pdf
section CPUID_Fn00000001_EAX for how to calculate the family
from the BaseFamily and ExtFamily values.

This matches the code in arch/x86/lib/cpu.c

Signed-off-by: Calvin Walton <calvin.walton@kepstin.ca>
Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/power/x86/turbostat/turbostat.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 36e0d255d1b6..33c79e415075 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2691,7 +2691,9 @@ void process_cpuid()
 	family = (fms >> 8) & 0xf;
 	model = (fms >> 4) & 0xf;
 	stepping = fms & 0xf;
-	if (family == 6 || family == 0xf)
+	if (family == 0xf)
+		family += (fms >> 20) & 0xff;
+	if (family >= 6)
 		model += ((fms >> 16) & 0xf) << 4;
 
 	if (debug)

From 7d1a2eef5e468fdeca2c443445ff792c2e58bffa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Fri, 27 Jul 2018 13:13:39 +0200
Subject: [PATCH 471/519] Revert "MIPS: BCM47XX: Enable 74K Core ExternalSync
 for PCIe erratum"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit d5ea019f8a381f88545bb26993b62ec24a2796b7 ]

This reverts commit 2a027b47dba6 ("MIPS: BCM47XX: Enable 74K Core
ExternalSync for PCIe erratum").

Enabling ExternalSync caused a regression for BCM4718A1 (used e.g. in
Netgear E3000 and ASUS RT-N16): it simply hangs during PCIe
initialization. It's likely that BCM4717A1 is also affected.

I didn't notice that earlier as the only BCM47XX devices with PCIe I
own are:
1) BCM4706 with 2 x 14e4:4331
2) BCM4706 with 14e4:4360 and 14e4:4331
it appears that BCM4706 is unaffected.

While BCM5300X-ES300-RDS.pdf seems to document that erratum and its
workarounds (according to quotes provided by Tokunori) it seems not even
Broadcom follows them.

According to the provided info Broadcom should define CONF7_ES in their
SDK's mipsinc.h and implement workaround in the si_mips_init(). Checking
both didn't reveal such code. It *could* mean Broadcom also had some
problems with the given workaround.

Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Reported-by: Michael Marley <michael@michaelmarley.com>
Patchwork: https://patchwork.linux-mips.org/patch/20032/
URL: https://bugs.openwrt.org/index.php?do=details&task_id=1688
Cc: Tokunori Ikegami <ikegami@allied-telesis.co.jp>
Cc: Hauke Mehrtens <hauke@hauke-m.de>
Cc: Chris Packham <chris.packham@alliedtelesis.co.nz>
Cc: James Hogan <jhogan@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/bcm47xx/setup.c        | 6 ------
 arch/mips/include/asm/mipsregs.h | 3 ---
 2 files changed, 9 deletions(-)

diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c
index 4ca33175ec05..6d38948f0f1e 100644
--- a/arch/mips/bcm47xx/setup.c
+++ b/arch/mips/bcm47xx/setup.c
@@ -249,12 +249,6 @@ static int __init bcm47xx_cpu_fixes(void)
 		 */
 		if (bcm47xx_bus.bcma.bus.chipinfo.id == BCMA_CHIP_ID_BCM4706)
 			cpu_wait = NULL;
-
-		/*
-		 * BCM47XX Erratum "R10: PCIe Transactions Periodically Fail"
-		 * Enable ExternalSync for sync instruction to take effect
-		 */
-		set_c0_config7(MIPS_CONF7_ES);
 		break;
 #endif
 	}
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index 15c183ce9d4f..e43aca183c99 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -605,8 +605,6 @@
 #define MIPS_CONF7_WII		(_ULCAST_(1) << 31)
 
 #define MIPS_CONF7_RPS		(_ULCAST_(1) << 2)
-/* ExternalSync */
-#define MIPS_CONF7_ES		(_ULCAST_(1) << 8)
 
 #define MIPS_CONF7_IAR		(_ULCAST_(1) << 10)
 #define MIPS_CONF7_AR		(_ULCAST_(1) << 16)
@@ -2014,7 +2012,6 @@ __BUILD_SET_C0(status)
 __BUILD_SET_C0(cause)
 __BUILD_SET_C0(config)
 __BUILD_SET_C0(config5)
-__BUILD_SET_C0(config7)
 __BUILD_SET_C0(intcontrol)
 __BUILD_SET_C0(intctl)
 __BUILD_SET_C0(srsmap)

From cae83b4d45d522f3b2cb2c091ff46fdb79bd68bd Mon Sep 17 00:00:00 2001
From: Govindarajulu Varadarajan <gvaradar@cisco.com>
Date: Fri, 27 Jul 2018 11:19:29 -0700
Subject: [PATCH 472/519] enic: handle mtu change for vf properly

[ Upstream commit ab123fe071c9aa9680ecd62eb080eb26cff4892c ]

When driver gets notification for mtu change, driver does not handle it for
all RQs. It handles only RQ[0].

Fix is to use enic_change_mtu() interface to change mtu for vf.

Signed-off-by: Govindarajulu Varadarajan <gvaradar@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/cisco/enic/enic_main.c | 78 +++++++--------------
 1 file changed, 27 insertions(+), 51 deletions(-)

diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index 8390597aecb8..b20bce2c7da1 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -1842,10 +1842,32 @@ static int enic_stop(struct net_device *netdev)
 	return 0;
 }
 
+static int _enic_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	bool running = netif_running(netdev);
+	int err = 0;
+
+	ASSERT_RTNL();
+	if (running) {
+		err = enic_stop(netdev);
+		if (err)
+			return err;
+	}
+
+	netdev->mtu = new_mtu;
+
+	if (running) {
+		err = enic_open(netdev);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static int enic_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct enic *enic = netdev_priv(netdev);
-	int running = netif_running(netdev);
 
 	if (new_mtu < ENIC_MIN_MTU || new_mtu > ENIC_MAX_MTU)
 		return -EINVAL;
@@ -1853,20 +1875,12 @@ static int enic_change_mtu(struct net_device *netdev, int new_mtu)
 	if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic))
 		return -EOPNOTSUPP;
 
-	if (running)
-		enic_stop(netdev);
-
-	netdev->mtu = new_mtu;
-
 	if (netdev->mtu > enic->port_mtu)
 		netdev_warn(netdev,
-			"interface MTU (%d) set higher than port MTU (%d)\n",
-			netdev->mtu, enic->port_mtu);
+			    "interface MTU (%d) set higher than port MTU (%d)\n",
+			    netdev->mtu, enic->port_mtu);
 
-	if (running)
-		enic_open(netdev);
-
-	return 0;
+	return _enic_change_mtu(netdev, new_mtu);
 }
 
 static void enic_change_mtu_work(struct work_struct *work)
@@ -1874,47 +1888,9 @@ static void enic_change_mtu_work(struct work_struct *work)
 	struct enic *enic = container_of(work, struct enic, change_mtu_work);
 	struct net_device *netdev = enic->netdev;
 	int new_mtu = vnic_dev_mtu(enic->vdev);
-	int err;
-	unsigned int i;
-
-	new_mtu = max_t(int, ENIC_MIN_MTU, min_t(int, ENIC_MAX_MTU, new_mtu));
 
 	rtnl_lock();
-
-	/* Stop RQ */
-	del_timer_sync(&enic->notify_timer);
-
-	for (i = 0; i < enic->rq_count; i++)
-		napi_disable(&enic->napi[i]);
-
-	vnic_intr_mask(&enic->intr[0]);
-	enic_synchronize_irqs(enic);
-	err = vnic_rq_disable(&enic->rq[0]);
-	if (err) {
-		rtnl_unlock();
-		netdev_err(netdev, "Unable to disable RQ.\n");
-		return;
-	}
-	vnic_rq_clean(&enic->rq[0], enic_free_rq_buf);
-	vnic_cq_clean(&enic->cq[0]);
-	vnic_intr_clean(&enic->intr[0]);
-
-	/* Fill RQ with new_mtu-sized buffers */
-	netdev->mtu = new_mtu;
-	vnic_rq_fill(&enic->rq[0], enic_rq_alloc_buf);
-	/* Need at least one buffer on ring to get going */
-	if (vnic_rq_desc_used(&enic->rq[0]) == 0) {
-		rtnl_unlock();
-		netdev_err(netdev, "Unable to alloc receive buffers.\n");
-		return;
-	}
-
-	/* Start RQ */
-	vnic_rq_enable(&enic->rq[0]);
-	napi_enable(&enic->napi[0]);
-	vnic_intr_unmask(&enic->intr[0]);
-	enic_notify_timer_start(enic);
-
+	(void)_enic_change_mtu(netdev, new_mtu);
 	rtnl_unlock();
 
 	netdev_info(netdev, "interface MTU set as %d\n", netdev->mtu);

From ef33fed0535aad84dffdfb978ae732ec4469c485 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 26 Jul 2018 20:16:35 -0700
Subject: [PATCH 473/519] arc: fix build errors in arc/include/asm/delay.h

[ Upstream commit 2423665ec53f2a29191b35382075e9834288a975 ]

Fix build errors in arch/arc/'s delay.h:
- add "extern unsigned long loops_per_jiffy;"
- add <asm-generic/types.h> for "u64"

In file included from ../drivers/infiniband/hw/cxgb3/cxio_hal.c:32:
../arch/arc/include/asm/delay.h: In function '__udelay':
../arch/arc/include/asm/delay.h:61:12: error: 'u64' undeclared (first use in this function)
  loops = ((u64) usecs * 4295 * HZ * loops_per_jiffy) >> 32;
            ^~~

In file included from ../drivers/infiniband/hw/cxgb3/cxio_hal.c:32:
../arch/arc/include/asm/delay.h: In function '__udelay':
../arch/arc/include/asm/delay.h:63:37: error: 'loops_per_jiffy' undeclared (first use in this function)
  loops = ((u64) usecs * 4295 * HZ * loops_per_jiffy) >> 32;
                                     ^~~~~~~~~~~~~~~

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: linux-snps-arc@lists.infradead.org
Cc: Elad Kanfi <eladkan@mellanox.com>
Cc: Leon Romanovsky <leonro@mellanox.com>
Cc: Ofer Levi <oferle@mellanox.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arc/include/asm/delay.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arc/include/asm/delay.h b/arch/arc/include/asm/delay.h
index d5da2115d78a..03d6bb0f4e13 100644
--- a/arch/arc/include/asm/delay.h
+++ b/arch/arc/include/asm/delay.h
@@ -17,8 +17,11 @@
 #ifndef __ASM_ARC_UDELAY_H
 #define __ASM_ARC_UDELAY_H
 
+#include <asm-generic/types.h>
 #include <asm/param.h>		/* HZ */
 
+extern unsigned long loops_per_jiffy;
+
 static inline void __delay(unsigned long loops)
 {
 	__asm__ __volatile__(

From 21d36c9737183a995faca026f887b0a8dcd04a44 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 26 Jul 2018 20:16:35 -0700
Subject: [PATCH 474/519] arc: fix type warnings in arc/mm/cache.c

[ Upstream commit ec837d620c750c0d4996a907c8c4f7febe1bbeee ]

Fix type warnings in arch/arc/mm/cache.c.

../arch/arc/mm/cache.c: In function 'flush_anon_page':
../arch/arc/mm/cache.c:1062:55: warning: passing argument 2 of '__flush_dcache_page' makes integer from pointer without a cast [-Wint-conversion]
  __flush_dcache_page((phys_addr_t)page_address(page), page_address(page));
                                                       ^~~~~~~~~~~~~~~~~~
../arch/arc/mm/cache.c:1013:59: note: expected 'long unsigned int' but argument is of type 'void *'
 void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr)
                                             ~~~~~~~~~~~~~~^~~~~

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: linux-snps-arc@lists.infradead.org
Cc: Elad Kanfi <eladkan@mellanox.com>
Cc: Leon Romanovsky <leonro@mellanox.com>
Cc: Ofer Levi <oferle@mellanox.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arc/mm/cache.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 9a84cbdd44b0..017fb440bba4 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -821,7 +821,7 @@ void flush_cache_mm(struct mm_struct *mm)
 void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr,
 		      unsigned long pfn)
 {
-	unsigned int paddr = pfn << PAGE_SHIFT;
+	phys_addr_t paddr = pfn << PAGE_SHIFT;
 
 	u_vaddr &= PAGE_MASK;
 
@@ -841,8 +841,9 @@ void flush_anon_page(struct vm_area_struct *vma, struct page *page,
 		     unsigned long u_vaddr)
 {
 	/* TBD: do we really need to clear the kernel mapping */
-	__flush_dcache_page(page_address(page), u_vaddr);
-	__flush_dcache_page(page_address(page), page_address(page));
+	__flush_dcache_page((phys_addr_t)page_address(page), u_vaddr);
+	__flush_dcache_page((phys_addr_t)page_address(page),
+			    (phys_addr_t)page_address(page));
 
 }
 

From 3837d650c8ca8e5974760859834c4160be671556 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 1 Aug 2018 18:22:41 +0100
Subject: [PATCH 475/519] drivers: net: lmc: fix case value for target abort
 error

[ Upstream commit afb41bb039656f0cecb54eeb8b2e2088201295f5 ]

Current value for a target abort error is 0x010, however, this value
should in fact be 0x002.  As it stands, the range of error is 0..7 so
it is currently never being detected.  This bug has been in the driver
since the early 2.6.12 days (or before).

Detected by CoverityScan, CID#744290 ("Logically dead code")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wan/lmc/lmc_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c
index 317bc79cc8b9..c178e1218347 100644
--- a/drivers/net/wan/lmc/lmc_main.c
+++ b/drivers/net/wan/lmc/lmc_main.c
@@ -1385,7 +1385,7 @@ static irqreturn_t lmc_interrupt (int irq, void *dev_instance) /*fold00*/
             case 0x001:
                 printk(KERN_WARNING "%s: Master Abort (naughty)\n", dev->name);
                 break;
-            case 0x010:
+            case 0x002:
                 printk(KERN_WARNING "%s: Target Abort (not so naughty)\n", dev->name);
                 break;
             default:

From 621b2dd52dd92b77f61e3a15b634902ffbe54e1b Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Tue, 31 Jul 2018 15:46:02 +0200
Subject: [PATCH 476/519] scsi: fcoe: drop frames in ELS LOGO error path

[ Upstream commit 63d0e3dffda311e77b9a8c500d59084e960a824a ]

Drop the frames in the ELS LOGO error path instead of just returning an
error.

This fixes the following kmemleak report:
unreferenced object 0xffff880064cb1000 (size 424):
  comm "kworker/0:2", pid 24, jiffies 4294904293 (age 68.504s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<(____ptrval____)>] _fc_frame_alloc+0x2c/0x180 [libfc]
    [<(____ptrval____)>] fc_lport_enter_logo+0x106/0x360 [libfc]
    [<(____ptrval____)>] fc_fabric_logoff+0x8c/0xc0 [libfc]
    [<(____ptrval____)>] fcoe_if_destroy+0x79/0x3b0 [fcoe]
    [<(____ptrval____)>] fcoe_destroy_work+0xd2/0x170 [fcoe]
    [<(____ptrval____)>] process_one_work+0x7ff/0x1420
    [<(____ptrval____)>] worker_thread+0x87/0xef0
    [<(____ptrval____)>] kthread+0x2db/0x390
    [<(____ptrval____)>] ret_from_fork+0x35/0x40
    [<(____ptrval____)>] 0xffffffffffffffff

which can be triggered by issuing
echo eth0 > /sys/bus/fcoe/ctlr_destroy

Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/fcoe/fcoe_ctlr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c
index 34a1b1f333b4..d5184aa1ace4 100644
--- a/drivers/scsi/fcoe/fcoe_ctlr.c
+++ b/drivers/scsi/fcoe/fcoe_ctlr.c
@@ -752,9 +752,9 @@ int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct fc_lport *lport,
 	case ELS_LOGO:
 		if (fip->mode == FIP_MODE_VN2VN) {
 			if (fip->state != FIP_ST_VNMP_UP)
-				return -EINVAL;
+				goto drop;
 			if (ntoh24(fh->fh_d_id) == FC_FID_FLOGI)
-				return -EINVAL;
+				goto drop;
 		} else {
 			if (fip->state != FIP_ST_ENABLED)
 				return 0;

From 8bbbd3a9a0f8ea4b22150639a05d03000b72fe12 Mon Sep 17 00:00:00 2001
From: Jim Gill <jgill@vmware.com>
Date: Thu, 2 Aug 2018 14:13:30 -0700
Subject: [PATCH 477/519] scsi: vmw_pvscsi: Return DID_RESET for status
 SAM_STAT_COMMAND_TERMINATED

[ Upstream commit e95153b64d03c2b6e8d62e51bdcc33fcad6e0856 ]

Commands that are reset are returned with status
SAM_STAT_COMMAND_TERMINATED. PVSCSI currently returns DID_OK |
SAM_STAT_COMMAND_TERMINATED which fails the command. Instead, set hostbyte
to DID_RESET to allow upper layers to retry.

Tested by copying a large file between two pvscsi disks on same adapter
while performing a bus reset at 1-second intervals. Before fix, commands
sometimes fail with DID_OK. After fix, commands observed to fail with
DID_RESET.

Signed-off-by: Jim Gill <jgill@vmware.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/vmw_pvscsi.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c
index 0f133c1817de..0de2f9069e23 100644
--- a/drivers/scsi/vmw_pvscsi.c
+++ b/drivers/scsi/vmw_pvscsi.c
@@ -545,9 +545,14 @@ static void pvscsi_complete_request(struct pvscsi_adapter *adapter,
 	    (btstat == BTSTAT_SUCCESS ||
 	     btstat == BTSTAT_LINKED_COMMAND_COMPLETED ||
 	     btstat == BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG)) {
-		cmd->result = (DID_OK << 16) | sdstat;
-		if (sdstat == SAM_STAT_CHECK_CONDITION && cmd->sense_buffer)
-			cmd->result |= (DRIVER_SENSE << 24);
+		if (sdstat == SAM_STAT_COMMAND_TERMINATED) {
+			cmd->result = (DID_RESET << 16);
+		} else {
+			cmd->result = (DID_OK << 16) | sdstat;
+			if (sdstat == SAM_STAT_CHECK_CONDITION &&
+			    cmd->sense_buffer)
+				cmd->result |= (DRIVER_SENSE << 24);
+		}
 	} else
 		switch (btstat) {
 		case BTSTAT_SUCCESS:

From fbee7b5b8c28ed02b6d6603eef27730c148a4481 Mon Sep 17 00:00:00 2001
From: "jie@chenjie6@huwei.com" <jie@chenjie6@huwei.com>
Date: Fri, 10 Aug 2018 17:23:06 -0700
Subject: [PATCH 478/519] mm/memory.c: check return value of ioremap_prot

[ Upstream commit 24eee1e4c47977bdfb71d6f15f6011e7b6188d04 ]

ioremap_prot() can return NULL which could lead to an oops.

Link: http://lkml.kernel.org/r/1533195441-58594-1-git-send-email-chenjie6@huawei.com
Signed-off-by: chen jie <chenjie6@huawei.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: chenjie <chenjie6@huawei.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/memory.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mm/memory.c b/mm/memory.c
index d5bb1465d30c..42db644f5ec4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3701,6 +3701,9 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
 		return -EINVAL;
 
 	maddr = ioremap_prot(phys_addr, PAGE_ALIGN(len + offset), prot);
+	if (!maddr)
+		return -ENOMEM;
+
 	if (write)
 		memcpy_toio(maddr + offset, buf, len);
 	else

From 58d141a9ad8668b267362b1d17296c9cfa053a01 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Thu, 28 Jun 2018 18:46:40 -0500
Subject: [PATCH 479/519] cifs: add missing debug entries for kconfig options

commit 950132afd59385caf6e2b84e5235d069fa10681d upstream.

/proc/fs/cifs/DebugData displays the features (Kconfig options)
used to build cifs.ko but it was missing some, and needed comma
separator.  These can be useful in debugging certain problems
so we know which optional features were enabled in the user's build.
Also clarify them, by making them more closely match the
corresponding CONFIG_CIFS_* parm.

Old format:
Features: dfs fscache posix spnego xattr acl

New format:
Features: DFS,FSCACHE,SMB_DIRECT,STATS,DEBUG2,ALLOW_INSECURE_LEGACY,CIFS_POSIX,UPCALL(SPNEGO),XATTR,ACL

Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
Reviewed-by: Paulo Alcantara <palcantara@suse.de>
CC: Stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/cifs/cifs_debug.c | 30 +++++++++++++++++++++++-------
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 0a3544fb50f9..bcbe42fb7e92 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -123,25 +123,41 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
 	seq_printf(m, "CIFS Version %s\n", CIFS_VERSION);
 	seq_printf(m, "Features:");
 #ifdef CONFIG_CIFS_DFS_UPCALL
-	seq_printf(m, " dfs");
+	seq_printf(m, " DFS");
 #endif
 #ifdef CONFIG_CIFS_FSCACHE
-	seq_printf(m, " fscache");
+	seq_printf(m, ",FSCACHE");
+#endif
+#ifdef CONFIG_CIFS_SMB_DIRECT
+	seq_printf(m, ",SMB_DIRECT");
+#endif
+#ifdef CONFIG_CIFS_STATS2
+	seq_printf(m, ",STATS2");
+#elif defined(CONFIG_CIFS_STATS)
+	seq_printf(m, ",STATS");
+#endif
+#ifdef CONFIG_CIFS_DEBUG2
+	seq_printf(m, ",DEBUG2");
+#elif defined(CONFIG_CIFS_DEBUG)
+	seq_printf(m, ",DEBUG");
+#endif
+#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
+	seq_printf(m, ",ALLOW_INSECURE_LEGACY");
 #endif
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
-	seq_printf(m, " lanman");
+	seq_printf(m, ",WEAK_PW_HASH");
 #endif
 #ifdef CONFIG_CIFS_POSIX
-	seq_printf(m, " posix");
+	seq_printf(m, ",CIFS_POSIX");
 #endif
 #ifdef CONFIG_CIFS_UPCALL
-	seq_printf(m, " spnego");
+	seq_printf(m, ",UPCALL(SPNEGO)");
 #endif
 #ifdef CONFIG_CIFS_XATTR
-	seq_printf(m, " xattr");
+	seq_printf(m, ",XATTR");
 #endif
 #ifdef CONFIG_CIFS_ACL
-	seq_printf(m, " acl");
+	seq_printf(m, ",ACL");
 #endif
 	seq_putc(m, '\n');
 	seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid);

From 0e28220672bd699b04cb2ac0c1a39f4307ee3402 Mon Sep 17 00:00:00 2001
From: Nicholas Mc Guire <hofrat@osadl.org>
Date: Thu, 23 Aug 2018 12:24:02 +0200
Subject: [PATCH 480/519] cifs: check kmalloc before use

commit 126c97f4d0d1b5b956e8b0740c81a2b2a2ae548c upstream.

The kmalloc was not being checked - if it fails issue a warning
and return -ENOMEM to the caller.

Signed-off-by: Nicholas Mc Guire <hofrat@osadl.org>
Fixes: b8da344b74c8 ("cifs: dynamic allocation of ntlmssp blob")
Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
cc: Stable <stable@vger.kernel.org>`
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/cifs/sess.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index a035d1a95882..9bc7a29f88d6 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -398,6 +398,12 @@ int build_ntlmssp_auth_blob(unsigned char **pbuffer,
 		goto setup_ntlmv2_ret;
 	}
 	*pbuffer = kmalloc(size_of_ntlmssp_blob(ses), GFP_KERNEL);
+	if (!*pbuffer) {
+		rc = -ENOMEM;
+		cifs_dbg(VFS, "Error %d during NTLMSSP allocation\n", rc);
+		*buflen = 0;
+		goto setup_ntlmv2_ret;
+	}
 	sec_blob = (AUTHENTICATE_MESSAGE *)*pbuffer;
 
 	memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);

From ff3bb182bc12ee1f852fd7f80dd8d9ce040f2462 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Thu, 2 Aug 2018 20:28:18 -0500
Subject: [PATCH 481/519] smb3: Do not send SMB3 SET_INFO if nothing changed

commit fd09b7d3b352105f08b8e02f7afecf7e816380ef upstream.

An earlier commit had a typo which prevented the
optimization from working:

commit 18dd8e1a65dd ("Do not send SMB3 SET_INFO request if nothing is changing")

Thank you to Metze for noticing this.  Also clear a
reserved field in the FILE_BASIC_INFO struct we send
that should be zero (all the other fields in that
struct were set or cleared explicitly already in
cifs_set_file_info).

Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
CC: Stable <stable@vger.kernel.org> # 4.9.x+
Reported-by: Stefan Metzmacher <metze@samba.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/cifs/inode.c     | 2 ++
 fs/cifs/smb2inode.c | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 9cdeb0293267..36c8594bb147 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1063,6 +1063,8 @@ cifs_set_file_info(struct inode *inode, struct iattr *attrs, unsigned int xid,
 	if (!server->ops->set_file_info)
 		return -ENOSYS;
 
+	info_buf.Pad = 0;
+
 	if (attrs->ia_valid & ATTR_ATIME) {
 		set_time = true;
 		info_buf.LastAccessTime =
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 1238cd3552f9..0267d8cbc996 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -267,7 +267,7 @@ smb2_set_file_info(struct inode *inode, const char *full_path,
 	int rc;
 
 	if ((buf->CreationTime == 0) && (buf->LastAccessTime == 0) &&
-	    (buf->LastWriteTime == 0) && (buf->ChangeTime) &&
+	    (buf->LastWriteTime == 0) && (buf->ChangeTime == 0) &&
 	    (buf->Attributes == 0))
 		return 0; /* would be a no op, no sense sending this */
 

From 951461680e88afb347cfb1e53255cd1b50187e2e Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Fri, 27 Jul 2018 22:01:49 -0500
Subject: [PATCH 482/519] smb3: don't request leases in symlink creation and
 query

commit 22783155f4bf956c346a81624ec9258930a6fe06 upstream.

Fixes problem pointed out by Pavel in discussions about commit
729c0c9dd55204f0c9a823ac8a7bfa83d36c7e78

Signed-off-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
CC: Stable <stable@vger.kernel.org> # 3.18.x+
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/cifs/link.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index e3548f73bdea..728289c32b32 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -419,7 +419,7 @@ smb3_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
 	struct cifs_io_parms io_parms;
 	int buf_type = CIFS_NO_BUFFER;
 	__le16 *utf16_path;
-	__u8 oplock = SMB2_OPLOCK_LEVEL_II;
+	__u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
 	struct smb2_file_all_info *pfile_info = NULL;
 
 	oparms.tcon = tcon;
@@ -481,7 +481,7 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
 	struct cifs_io_parms io_parms;
 	int create_options = CREATE_NOT_DIR;
 	__le16 *utf16_path;
-	__u8 oplock = SMB2_OPLOCK_LEVEL_EXCLUSIVE;
+	__u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
 	struct kvec iov[2];
 
 	if (backup_cred(cifs_sb))

From 685d7af55fcc6595aedce7bc91b1f5f25ff88301 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Thu, 19 Jul 2018 10:49:51 -0400
Subject: [PATCH 483/519] btrfs: don't leak ret from do_chunk_alloc

commit 4559b0a71749c442d34f7cfb9e72c9e58db83948 upstream.

If we're trying to make a data reservation and we have to allocate a
data chunk we could leak ret == 1, as do_chunk_alloc() will return 1 if
it allocated a chunk.  Since the end of the function is the success path
just return 0.

CC: stable@vger.kernel.org # 4.4+
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/extent-tree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 982a9d509817..493c7354ec0b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4128,7 +4128,7 @@ int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes)
 				      data_sinfo->flags, bytes, 1);
 	spin_unlock(&data_sinfo->lock);
 
-	return ret;
+	return 0;
 }
 
 /*

From f4d76c934b4dcb6f068d2c850c35ebac5faff021 Mon Sep 17 00:00:00 2001
From: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Date: Mon, 16 Jul 2018 10:38:57 +0200
Subject: [PATCH 484/519] s390/kvm: fix deadlock when killed by oom

commit 306d6c49ac9ded11114cb53b0925da52f2c2ada1 upstream.

When the oom killer kills a userspace process in the page fault handler
while in guest context, the fault handler fails to release the mm_sem
if the FAULT_FLAG_RETRY_NOWAIT option is set. This leads to a deadlock
when tearing down the mm when the process terminates. This bug can only
happen when pfault is enabled, so only KVM clients are affected.

The problem arises in the rare cases in which handle_mm_fault does not
release the mm_sem. This patch fixes the issue by manually releasing
the mm_sem when needed.

Fixes: 24eb3a824c4f3 ("KVM: s390: Add FAULT_FLAG_RETRY_NOWAIT for guest fault")
Cc: <stable@vger.kernel.org> # 3.15+
Signed-off-by: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/mm/fault.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index ec1a30d0d11a..7218689bd6ee 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -459,6 +459,8 @@ static inline int do_exception(struct pt_regs *regs, int access)
 	/* No reason to continue if interrupted by SIGKILL. */
 	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
 		fault = VM_FAULT_SIGNAL;
+		if (flags & FAULT_FLAG_RETRY_NOWAIT)
+			goto out_up;
 		goto out;
 	}
 	if (unlikely(fault & VM_FAULT_ERROR))

From ac92782e4ef6fc9f4694bdc9e1b5d5bfb72027ff Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 1 Aug 2018 12:36:52 -0400
Subject: [PATCH 485/519] ext4: check for NUL characters in extended
 attribute's name

commit 7d95178c77014dbd8dce36ee40bbbc5e6c121ff5 upstream.

Extended attribute names are defined to be NUL-terminated, so the name
must not contain a NUL character.  This is important because there are
places when remove extended attribute, the code uses strlen to
determine the length of the entry.  That should probably be fixed at
some point, but code is currently really messy, so the simplest fix
for now is to simply validate that the extended attributes are sane.

https://bugzilla.kernel.org/show_bug.cgi?id=200401

Reported-by: Wen Xu <wen.xu@gatech.edu>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/xattr.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index c7cad05aed27..9fb2a751fce4 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -197,6 +197,8 @@ ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end,
 		struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e);
 		if ((void *)next >= end)
 			return -EFSCORRUPTED;
+		if (strnlen(e->e_name, e->e_name_len) != e->e_name_len)
+			return -EFSCORRUPTED;
 		e = next;
 	}
 

From bca4f76edcc74a0290fd007bf937217a8c43ee00 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 29 Jul 2018 15:48:00 -0400
Subject: [PATCH 486/519] ext4: sysfs: print ext4_super_block fields as
 little-endian

commit a4d2aadca184ece182418950d45ba4ffc7b652d2 upstream.

While working on extended rand for last_error/first_error timestamps,
I noticed that the endianess is wrong; we access the little-endian
fields in struct ext4_super_block as native-endian when we print them.

This adds a special case in ext4_attr_show() and ext4_attr_store()
to byteswap the superblock fields if needed.

In older kernels, this code was part of super.c, it got moved to
sysfs.c in linux-4.4.

Cc: stable@vger.kernel.org
Fixes: 52c198c6820f ("ext4: add sysfs entry showing whether the fs contains errors")
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/sysfs.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index c2ee23acf359..ae9929d678d6 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -277,8 +277,12 @@ static ssize_t ext4_attr_show(struct kobject *kobj,
 	case attr_pointer_ui:
 		if (!ptr)
 			return 0;
-		return snprintf(buf, PAGE_SIZE, "%u\n",
-				*((unsigned int *) ptr));
+		if (a->attr_ptr == ptr_ext4_super_block_offset)
+			return snprintf(buf, PAGE_SIZE, "%u\n",
+					le32_to_cpup(ptr));
+		else
+			return snprintf(buf, PAGE_SIZE, "%u\n",
+					*((unsigned int *) ptr));
 	case attr_pointer_atomic:
 		if (!ptr)
 			return 0;
@@ -311,7 +315,10 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
 		ret = kstrtoul(skip_spaces(buf), 0, &t);
 		if (ret)
 			return ret;
-		*((unsigned int *) ptr) = t;
+		if (a->attr_ptr == ptr_ext4_super_block_offset)
+			*((__le32 *) ptr) = cpu_to_le32(t);
+		else
+			*((unsigned int *) ptr) = t;
 		return len;
 	case attr_inode_readahead:
 		return inode_readahead_blks_store(a, sbi, buf, len);

From d684b70576fe7afdfb5d943f30aaaeac1f84a5a2 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Sun, 29 Jul 2018 17:13:42 -0400
Subject: [PATCH 487/519] ext4: reset error code in ext4_find_entry in fallback

commit f39b3f45dbcb0343822cce31ea7636ad66e60bc2 upstream.

When ext4_find_entry() falls back to "searching the old fashioned
way" due to a corrupt dx dir, it needs to reset the error code
to NULL so that the nonstandard ERR_BAD_DX_DIR code isn't returned
to userspace.

https://bugzilla.kernel.org/show_bug.cgi?id=199947

Reported-by: Anatoly Trosinenko <anatoly.trosinenko@yandex.com>
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/namei.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 97472088d65a..a1f1e53d0e25 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1401,6 +1401,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
 			goto cleanup_and_exit;
 		dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
 			       "falling back\n"));
+		ret = NULL;
 	}
 	nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
 	if (!nblocks) {

From 355cccb6593f87329940d411894bd9de30286b97 Mon Sep 17 00:00:00 2001
From: Greg Hackmann <ghackmann@android.com>
Date: Wed, 15 Aug 2018 12:51:21 -0700
Subject: [PATCH 488/519] arm64: mm: check for upper PAGE_SHIFT bits in
 pfn_valid()

commit 5ad356eabc47d26a92140a0c4b20eba471c10de3 upstream.

ARM64's pfn_valid() shifts away the upper PAGE_SHIFT bits of the input
before seeing if the PFN is valid.  This leads to false positives when
some of the upper bits are set, but the lower bits match a valid PFN.

For example, the following userspace code looks up a bogus entry in
/proc/kpageflags:

    int pagemap = open("/proc/self/pagemap", O_RDONLY);
    int pageflags = open("/proc/kpageflags", O_RDONLY);
    uint64_t pfn, val;

    lseek64(pagemap, [...], SEEK_SET);
    read(pagemap, &pfn, sizeof(pfn));
    if (pfn & (1UL << 63)) {        /* valid PFN */
        pfn &= ((1UL << 55) - 1);   /* clear flag bits */
        pfn |= (1UL << 55);
        lseek64(pageflags, pfn * sizeof(uint64_t), SEEK_SET);
        read(pageflags, &val, sizeof(val));
    }

On ARM64 this causes the userspace process to crash with SIGSEGV rather
than reading (1 << KPF_NOPAGE).  kpageflags_read() treats the offset as
valid, and stable_page_flags() will try to access an address between the
user and kernel address ranges.

Fixes: c1cc1552616d ("arm64: MMU initialisation")
Cc: stable@vger.kernel.org
Signed-off-by: Greg Hackmann <ghackmann@google.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/mm/init.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index efd89ce4533d..adf4122502a9 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -120,7 +120,11 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
 #ifdef CONFIG_HAVE_ARCH_PFN_VALID
 int pfn_valid(unsigned long pfn)
 {
-	return memblock_is_memory(pfn << PAGE_SHIFT);
+	phys_addr_t addr = pfn << PAGE_SHIFT;
+
+	if ((addr >> PAGE_SHIFT) != pfn)
+		return 0;
+	return memblock_is_memory(addr);
 }
 EXPORT_SYMBOL(pfn_valid);
 #endif

From 24fa4a211e2655ba71f0da7e351475bb8f752e93 Mon Sep 17 00:00:00 2001
From: Punit Agrawal <punit.agrawal@arm.com>
Date: Mon, 13 Aug 2018 11:43:51 +0100
Subject: [PATCH 489/519] KVM: arm/arm64: Skip updating PTE entry if no change

commit 976d34e2dab10ece5ea8fe7090b7692913f89084 upstream.

When there is contention on faulting in a particular page table entry
at stage 2, the break-before-make requirement of the architecture can
lead to additional refaulting due to TLB invalidation.

Avoid this by skipping a page table update if the new value of the PTE
matches the previous value.

Cc: stable@vger.kernel.org
Fixes: d5d8184d35c9 ("KVM: ARM: Memory virtualization setup")
Reviewed-by: Suzuki Poulose <suzuki.poulose@arm.com>
Acked-by: Christoffer Dall <christoffer.dall@arm.com>
Signed-off-by: Punit Agrawal <punit.agrawal@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/kvm/mmu.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index e8835d4e173c..cd1779bf6ef7 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -961,6 +961,10 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
 	/* Create 2nd stage page table mapping - Level 3 */
 	old_pte = *pte;
 	if (pte_present(old_pte)) {
+		/* Skip page table update if there is no change */
+		if (pte_val(old_pte) == pte_val(*new_pte))
+			return 0;
+
 		kvm_set_pte(pte, __pte(0));
 		kvm_tlb_flush_vmid_ipa(kvm, addr);
 	} else {

From d839710da969f2f686802bcbbfb8164261b85833 Mon Sep 17 00:00:00 2001
From: Punit Agrawal <punit.agrawal@arm.com>
Date: Mon, 13 Aug 2018 11:43:50 +0100
Subject: [PATCH 490/519] KVM: arm/arm64: Skip updating PMD entry if no change

commit 86658b819cd0a9aa584cd84453ed268a6f013770 upstream.

Contention on updating a PMD entry by a large number of vcpus can lead
to duplicate work when handling stage 2 page faults. As the page table
update follows the break-before-make requirement of the architecture,
it can lead to repeated refaults due to clearing the entry and
flushing the tlbs.

This problem is more likely when -

* there are large number of vcpus
* the mapping is large block mapping

such as when using PMD hugepages (512MB) with 64k pages.

Fix this by skipping the page table update if there is no change in
the entry being updated.

Cc: stable@vger.kernel.org
Fixes: ad361f093c1e ("KVM: ARM: Support hugetlbfs backed huge pages")
Reviewed-by: Suzuki Poulose <suzuki.poulose@arm.com>
Acked-by: Christoffer Dall <christoffer.dall@arm.com>
Signed-off-by: Punit Agrawal <punit.agrawal@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/kvm/mmu.c | 38 +++++++++++++++++++++++++++-----------
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index cd1779bf6ef7..e0267532bd4e 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -892,19 +892,35 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
 	pmd = stage2_get_pmd(kvm, cache, addr);
 	VM_BUG_ON(!pmd);
 
-	/*
-	 * Mapping in huge pages should only happen through a fault.  If a
-	 * page is merged into a transparent huge page, the individual
-	 * subpages of that huge page should be unmapped through MMU
-	 * notifiers before we get here.
-	 *
-	 * Merging of CompoundPages is not supported; they should become
-	 * splitting first, unmapped, merged, and mapped back in on-demand.
-	 */
-	VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd));
-
 	old_pmd = *pmd;
 	if (pmd_present(old_pmd)) {
+		/*
+		 * Multiple vcpus faulting on the same PMD entry, can
+		 * lead to them sequentially updating the PMD with the
+		 * same value. Following the break-before-make
+		 * (pmd_clear() followed by tlb_flush()) process can
+		 * hinder forward progress due to refaults generated
+		 * on missing translations.
+		 *
+		 * Skip updating the page table if the entry is
+		 * unchanged.
+		 */
+		if (pmd_val(old_pmd) == pmd_val(*new_pmd))
+			return 0;
+
+		/*
+		 * Mapping in huge pages should only happen through a
+		 * fault.  If a page is merged into a transparent huge
+		 * page, the individual subpages of that huge page
+		 * should be unmapped through MMU notifiers before we
+		 * get here.
+		 *
+		 * Merging of CompoundPages is not supported; they
+		 * should become splitting first, unmapped, merged,
+		 * and mapped back in on-demand.
+		 */
+		VM_BUG_ON(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd));
+
 		pmd_clear(pmd);
 		kvm_tlb_flush_vmid_ipa(kvm, addr);
 	} else {

From 6a56bd7f2ea31d4c86849b8f67d4e2dc1cb5b788 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Mon, 20 Aug 2018 11:58:35 +0200
Subject: [PATCH 491/519] x86/speculation/l1tf: Fix overflow in
 l1tf_pfn_limit() on 32bit

commit 9df9516940a61d29aedf4d91b483ca6597e7d480 upstream.

On 32bit PAE kernels on 64bit hardware with enough physical bits,
l1tf_pfn_limit() will overflow unsigned long. This in turn affects
max_swapfile_size() and can lead to swapon returning -EINVAL. This has been
observed in a 32bit guest with 42 bits physical address size, where
max_swapfile_size() overflows exactly to 1 << 32, thus zero, and produces
the following warning to dmesg:

[    6.396845] Truncating oversized swap area, only using 0k out of 2047996k

Fix this by using unsigned long long instead.

Fixes: 17dbca119312 ("x86/speculation/l1tf: Add sysfs reporting for l1tf")
Fixes: 377eeaa8e11f ("x86/speculation/l1tf: Limit swap file size to MAX_PA/2")
Reported-by: Dominique Leuenberger <dimstar@suse.de>
Reported-by: Adrian Schroeter <adrian@suse.de>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: "H . Peter Anvin" <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20180820095835.5298-1-vbabka@suse.cz
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/processor.h | 4 ++--
 arch/x86/mm/init.c               | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index a3a53955f01c..b023f127eee3 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -172,9 +172,9 @@ extern const struct seq_operations cpuinfo_op;
 
 extern void cpu_detect(struct cpuinfo_x86 *c);
 
-static inline unsigned long l1tf_pfn_limit(void)
+static inline unsigned long long l1tf_pfn_limit(void)
 {
-	return BIT(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT) - 1;
+	return BIT_ULL(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT) - 1;
 }
 
 extern void early_cpu_init(void);
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 4954a6cef50a..3bf0ff0d784a 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -779,7 +779,7 @@ unsigned long max_swapfile_size(void)
 
 	if (boot_cpu_has_bug(X86_BUG_L1TF)) {
 		/* Limit the swap file size to MAX_PA/2 for L1TF workaround */
-		unsigned long l1tf_limit = l1tf_pfn_limit() + 1;
+		unsigned long long l1tf_limit = l1tf_pfn_limit() + 1;
 		/*
 		 * We encode swap offsets also with 3 bits below those for pfn
 		 * which makes the usable limit higher.
@@ -787,7 +787,7 @@ unsigned long max_swapfile_size(void)
 #if CONFIG_PGTABLE_LEVELS > 2
 		l1tf_limit <<= PAGE_SHIFT - SWP_OFFSET_FIRST_BIT;
 #endif
-		pages = min_t(unsigned long, l1tf_limit, pages);
+		pages = min_t(unsigned long long, l1tf_limit, pages);
 	}
 	return pages;
 }

From 7b69cd6fa088e473869512672969e6c490cac1b6 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Thu, 23 Aug 2018 15:44:18 +0200
Subject: [PATCH 492/519] x86/speculation/l1tf: Fix off-by-one error when
 warning that system has too much RAM

commit b0a182f875689647b014bc01d36b340217792852 upstream.

Two users have reported [1] that they have an "extremely unlikely" system
with more than MAX_PA/2 memory and L1TF mitigation is not effective. In
fact it's a CPU with 36bits phys limit (64GB) and 32GB memory, but due to
holes in the e820 map, the main region is almost 500MB over the 32GB limit:

[    0.000000] BIOS-e820: [mem 0x0000000100000000-0x000000081effffff] usable

Suggestions to use 'mem=32G' to enable the L1TF mitigation while losing the
500MB revealed, that there's an off-by-one error in the check in
l1tf_select_mitigation().

l1tf_pfn_limit() returns the last usable pfn (inclusive) and the range
check in the mitigation path does not take this into account.

Instead of amending the range check, make l1tf_pfn_limit() return the first
PFN which is over the limit which is less error prone. Adjust the other
users accordingly.

[1] https://bugzilla.suse.com/show_bug.cgi?id=1105536

Fixes: 17dbca119312 ("x86/speculation/l1tf: Add sysfs reporting for l1tf")
Reported-by: George Anchev <studio@anchev.net>
Reported-by: Christopher Snowhill <kode54@gmail.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: "H . Peter Anvin" <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20180823134418.17008-1-vbabka@suse.cz
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/processor.h | 2 +-
 arch/x86/mm/init.c               | 2 +-
 arch/x86/mm/mmap.c               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index b023f127eee3..337c52192278 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -174,7 +174,7 @@ extern void cpu_detect(struct cpuinfo_x86 *c);
 
 static inline unsigned long long l1tf_pfn_limit(void)
 {
-	return BIT_ULL(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT) - 1;
+	return BIT_ULL(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT);
 }
 
 extern void early_cpu_init(void);
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 3bf0ff0d784a..f00eb52c16a6 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -779,7 +779,7 @@ unsigned long max_swapfile_size(void)
 
 	if (boot_cpu_has_bug(X86_BUG_L1TF)) {
 		/* Limit the swap file size to MAX_PA/2 for L1TF workaround */
-		unsigned long long l1tf_limit = l1tf_pfn_limit() + 1;
+		unsigned long long l1tf_limit = l1tf_pfn_limit();
 		/*
 		 * We encode swap offsets also with 3 bits below those for pfn
 		 * which makes the usable limit higher.
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 9a055ea279eb..528d71b50c3b 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -138,7 +138,7 @@ bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
 	/* If it's real memory always allow */
 	if (pfn_valid(pfn))
 		return true;
-	if (pfn > l1tf_pfn_limit() && !capable(CAP_SYS_ADMIN))
+	if (pfn >= l1tf_pfn_limit() && !capable(CAP_SYS_ADMIN))
 		return false;
 	return true;
 }

From 72f6531162bd2f1b57e8114c8358fca507090f41 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Thu, 23 Aug 2018 16:21:29 +0200
Subject: [PATCH 493/519] x86/speculation/l1tf: Suggest what to do on systems
 with too much RAM

commit 6a012288d6906fee1dbc244050ade1dafe4a9c8d upstream.

Two users have reported [1] that they have an "extremely unlikely" system
with more than MAX_PA/2 memory and L1TF mitigation is not effective.

Make the warning more helpful by suggesting the proper mem=X kernel boot
parameter to make it effective and a link to the L1TF document to help
decide if the mitigation is worth the unusable RAM.

[1] https://bugzilla.suse.com/show_bug.cgi?id=1105536

Suggested-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: "H . Peter Anvin" <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/966571f0-9d7f-43dc-92c6-a10eec7a1254@suse.cz
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/bugs.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 34e4aaaf03d2..b9e6b60df148 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -654,6 +654,10 @@ static void __init l1tf_select_mitigation(void)
 	half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
 	if (e820_any_mapped(half_pa, ULLONG_MAX - half_pa, E820_RAM)) {
 		pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
+		pr_info("You may make it effective by booting the kernel with mem=%llu parameter.\n",
+				half_pa);
+		pr_info("However, doing so will make a part of your RAM unusable.\n");
+		pr_info("Reading https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html might help you decide.\n");
 		return;
 	}
 

From dde9849882e1f53b8d96284b4736f95a49ea3eff Mon Sep 17 00:00:00 2001
From: Rian Hunter <rian@alum.mit.edu>
Date: Sun, 19 Aug 2018 16:08:53 -0700
Subject: [PATCH 494/519] x86/process: Re-export start_thread()

commit dc76803e57cc86589c4efcb5362918f9b0c0436f upstream.

The consolidation of the start_thread() functions removed the export
unintentionally. This breaks binfmt handlers built as a module.

Add it back.

Fixes: e634d8fc792c ("x86-64: merge the standard and compat start_thread() functions")
Signed-off-by: Rian Hunter <rian@alum.mit.edu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bpetkov@suse.de>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Joerg Roedel <jroedel@suse.de>
Cc: Dmitry Safonov <dima@arista.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20180819230854.7275-1-rian@alum.mit.edu
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/process_64.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 4cbb60fbff3e..c7cc81e9bb84 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -250,6 +250,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
 	start_thread_common(regs, new_ip, new_sp,
 			    __USER_CS, __USER_DS, 0);
 }
+EXPORT_SYMBOL_GPL(start_thread);
 
 #ifdef CONFIG_COMPAT
 void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp)

From a8eaf0fc14c07029a74c7cf97e60fb07d3628226 Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Tue, 17 Jul 2018 19:00:33 +0300
Subject: [PATCH 495/519] fuse: Don't access pipe->buffers without pipe_lock()

commit a2477b0e67c52f4364a47c3ad70902bc2a61bd4c upstream.

fuse_dev_splice_write() reads pipe->buffers to determine the size of
'bufs' array before taking the pipe_lock(). This is not safe as
another thread might change the 'pipe->buffers' between the allocation
and taking the pipe_lock(). So we end up with too small 'bufs' array.

Move the bufs allocations inside pipe_lock()/pipe_unlock() to fix this.

Fixes: dd3bb14f44a6 ("fuse: support splice() writing to fuse device")
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: <stable@vger.kernel.org> # v2.6.35
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/fuse/dev.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index d0cf1f010fbe..d6d8f8e8847a 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1991,11 +1991,14 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
 	if (!fud)
 		return -EPERM;
 
-	bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
-	if (!bufs)
-		return -ENOMEM;
-
 	pipe_lock(pipe);
+
+	bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
+	if (!bufs) {
+		pipe_unlock(pipe);
+		return -ENOMEM;
+	}
+
 	nbuf = 0;
 	rem = 0;
 	for (idx = 0; idx < pipe->nrbufs && rem < len; idx++)

From d14dbb5c6c4c068a2117fdd3ae73ade8d490f483 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Thu, 26 Jul 2018 16:13:11 +0200
Subject: [PATCH 496/519] fuse: fix double request_end()

commit 87114373ea507895a62afb10d2910bd9adac35a8 upstream.

Refcounting of request is broken when fuse_abort_conn() is called and
request is on the fpq->io list:

 - ref is taken too late
 - then it is not dropped

Fixes: 0d8e84b0432b ("fuse: simplify request abort")
Cc: <stable@vger.kernel.org> # v4.2
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/fuse/dev.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index d6d8f8e8847a..42a6263e7809 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -382,7 +382,7 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
 	struct fuse_iqueue *fiq = &fc->iq;
 
 	if (test_and_set_bit(FR_FINISHED, &req->flags))
-		return;
+		goto out_put_req;
 
 	spin_lock(&fiq->waitq.lock);
 	list_del_init(&req->intr_entry);
@@ -412,6 +412,7 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
 	wake_up(&req->waitq);
 	if (req->end)
 		req->end(fc, req);
+out_put_req:
 	fuse_put_request(fc, req);
 }
 
@@ -2154,6 +2155,7 @@ void fuse_abort_conn(struct fuse_conn *fc)
 				set_bit(FR_ABORTED, &req->flags);
 				if (!test_bit(FR_LOCKED, &req->flags)) {
 					set_bit(FR_PRIVATE, &req->flags);
+					__fuse_get_request(req);
 					list_move(&req->list, &to_end1);
 				}
 				spin_unlock(&req->waitq.lock);
@@ -2180,7 +2182,6 @@ void fuse_abort_conn(struct fuse_conn *fc)
 
 		while (!list_empty(&to_end1)) {
 			req = list_first_entry(&to_end1, struct fuse_req, list);
-			__fuse_get_request(req);
 			list_del_init(&req->list);
 			request_end(fc, req);
 		}

From 4ded21c9ea1cf8687e33ebe0577f6b4353f5d8c6 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Thu, 26 Jul 2018 16:13:11 +0200
Subject: [PATCH 497/519] fuse: fix unlocked access to processing queue

commit 45ff350bbd9d0f0977ff270a0d427c71520c0c37 upstream.

fuse_dev_release() assumes that it's the only one referencing the
fpq->processing list, but that's not true, since fuse_abort_conn() can be
doing the same without any serialization between the two.

Fixes: c3696046beb3 ("fuse: separate pqueue for clones")
Cc: <stable@vger.kernel.org> # v4.2
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/fuse/dev.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 42a6263e7809..5fc7521e05a4 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -2199,9 +2199,15 @@ int fuse_dev_release(struct inode *inode, struct file *file)
 	if (fud) {
 		struct fuse_conn *fc = fud->fc;
 		struct fuse_pqueue *fpq = &fud->pq;
+		LIST_HEAD(to_end);
 
+		spin_lock(&fpq->lock);
 		WARN_ON(!list_empty(&fpq->io));
-		end_requests(fc, &fpq->processing);
+		list_splice_init(&fpq->processing, &to_end);
+		spin_unlock(&fpq->lock);
+
+		end_requests(fc, &to_end);
+
 		/* Are we the last open device? */
 		if (atomic_dec_and_test(&fc->dev_count)) {
 			WARN_ON(fc->iq.fasync != NULL);

From 4d6ef17a060cb227007a7dec6aa886abc4c01297 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Thu, 26 Jul 2018 16:13:11 +0200
Subject: [PATCH 498/519] fuse: umount should wait for all requests

commit b8f95e5d13f5f0191dcb4b9113113d241636e7cb upstream.

fuse_abort_conn() does not guarantee that all async requests have actually
finished aborting (i.e. their ->end() function is called).  This could
actually result in still used inodes after umount.

Add a helper to wait until all requests are fully done.  This is done by
looking at the "num_waiting" counter.  When this counter drops to zero, we
can be sure that no more requests are outstanding.

Fixes: 0d8e84b0432b ("fuse: simplify request abort")
Cc: <stable@vger.kernel.org> # v4.2
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/fuse/dev.c    | 23 +++++++++++++++++++----
 fs/fuse/fuse_i.h |  1 +
 fs/fuse/inode.c  |  2 ++
 3 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 5fc7521e05a4..2671e922c720 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -143,6 +143,16 @@ static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background)
 	return !fc->initialized || (for_background && fc->blocked);
 }
 
+static void fuse_drop_waiting(struct fuse_conn *fc)
+{
+	if (fc->connected) {
+		atomic_dec(&fc->num_waiting);
+	} else if (atomic_dec_and_test(&fc->num_waiting)) {
+		/* wake up aborters */
+		wake_up_all(&fc->blocked_waitq);
+	}
+}
+
 static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
 				       bool for_background)
 {
@@ -189,7 +199,7 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
 	return req;
 
  out:
-	atomic_dec(&fc->num_waiting);
+	fuse_drop_waiting(fc);
 	return ERR_PTR(err);
 }
 
@@ -296,7 +306,7 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
 
 		if (test_bit(FR_WAITING, &req->flags)) {
 			__clear_bit(FR_WAITING, &req->flags);
-			atomic_dec(&fc->num_waiting);
+			fuse_drop_waiting(fc);
 		}
 
 		if (req->stolen_file)
@@ -382,7 +392,7 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
 	struct fuse_iqueue *fiq = &fc->iq;
 
 	if (test_and_set_bit(FR_FINISHED, &req->flags))
-		goto out_put_req;
+		goto put_request;
 
 	spin_lock(&fiq->waitq.lock);
 	list_del_init(&req->intr_entry);
@@ -412,7 +422,7 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
 	wake_up(&req->waitq);
 	if (req->end)
 		req->end(fc, req);
-out_put_req:
+put_request:
 	fuse_put_request(fc, req);
 }
 
@@ -2192,6 +2202,11 @@ void fuse_abort_conn(struct fuse_conn *fc)
 }
 EXPORT_SYMBOL_GPL(fuse_abort_conn);
 
+void fuse_wait_aborted(struct fuse_conn *fc)
+{
+	wait_event(fc->blocked_waitq, atomic_read(&fc->num_waiting) == 0);
+}
+
 int fuse_dev_release(struct inode *inode, struct file *file)
 {
 	struct fuse_dev *fud = fuse_get_dev(file);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 604cd42dafef..7aafe9acc6c0 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -842,6 +842,7 @@ void fuse_request_send_background_locked(struct fuse_conn *fc,
 
 /* Abort all requests */
 void fuse_abort_conn(struct fuse_conn *fc);
+void fuse_wait_aborted(struct fuse_conn *fc);
 
 /**
  * Invalidate inode attributes
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index f0b73e0c6d48..9517154ddd29 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -382,6 +382,8 @@ static void fuse_put_super(struct super_block *sb)
 	fuse_send_destroy(fc);
 
 	fuse_abort_conn(fc);
+	fuse_wait_aborted(fc);
+
 	mutex_lock(&fuse_mutex);
 	list_del(&fc->entry);
 	fuse_ctl_remove_conn(fc);

From c93ce9b00b9aa4bae06b758223ff62ca799522c6 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Thu, 26 Jul 2018 16:13:11 +0200
Subject: [PATCH 499/519] fuse: Fix oops at process_init_reply()

commit e8f3bd773d22f488724dffb886a1618da85c2966 upstream.

syzbot is hitting NULL pointer dereference at process_init_reply().
This is because deactivate_locked_super() is called before response for
initial request is processed.

Fix this by aborting and waiting for all requests (including FUSE_INIT)
before resetting fc->sb.

Original patch by Tetsuo Handa <penguin-kernel@I-love.SKAURA.ne.jp>.

Reported-by: syzbot <syzbot+b62f08f4d5857755e3bc@syzkaller.appspotmail.com>
Fixes: e27c9d3877a0 ("fuse: fuse: add time_gran to INIT_OUT")
Cc: <stable@vger.kernel.org> # v3.19
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/fuse/inode.c | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 9517154ddd29..4b2eb65be0d4 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -379,11 +379,6 @@ static void fuse_put_super(struct super_block *sb)
 {
 	struct fuse_conn *fc = get_fuse_conn_super(sb);
 
-	fuse_send_destroy(fc);
-
-	fuse_abort_conn(fc);
-	fuse_wait_aborted(fc);
-
 	mutex_lock(&fuse_mutex);
 	list_del(&fc->entry);
 	fuse_ctl_remove_conn(fc);
@@ -1174,16 +1169,25 @@ static struct dentry *fuse_mount(struct file_system_type *fs_type,
 	return mount_nodev(fs_type, flags, raw_data, fuse_fill_super);
 }
 
-static void fuse_kill_sb_anon(struct super_block *sb)
+static void fuse_sb_destroy(struct super_block *sb)
 {
 	struct fuse_conn *fc = get_fuse_conn_super(sb);
 
 	if (fc) {
+		fuse_send_destroy(fc);
+
+		fuse_abort_conn(fc);
+		fuse_wait_aborted(fc);
+
 		down_write(&fc->killsb);
 		fc->sb = NULL;
 		up_write(&fc->killsb);
 	}
+}
 
+static void fuse_kill_sb_anon(struct super_block *sb)
+{
+	fuse_sb_destroy(sb);
 	kill_anon_super(sb);
 }
 
@@ -1206,14 +1210,7 @@ static struct dentry *fuse_mount_blk(struct file_system_type *fs_type,
 
 static void fuse_kill_sb_blk(struct super_block *sb)
 {
-	struct fuse_conn *fc = get_fuse_conn_super(sb);
-
-	if (fc) {
-		down_write(&fc->killsb);
-		fc->sb = NULL;
-		up_write(&fc->killsb);
-	}
-
+	fuse_sb_destroy(sb);
 	kill_block_super(sb);
 }
 

From 84c7c9010234715c9cb9470b09f41ed12b0d2163 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Thu, 19 Jul 2018 15:49:39 +0300
Subject: [PATCH 500/519] fuse: Add missed unlock_page() to
 fuse_readpages_fill()

commit 109728ccc5933151c68d1106e4065478a487a323 upstream.

The above error path returns with page unlocked, so this place seems also
to behave the same.

Fixes: f8dbdf81821b ("fuse: rework fuse_readpages()")
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/fuse/file.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 1a063cbfe503..8577f3ba6dc6 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -879,6 +879,7 @@ static int fuse_readpages_fill(void *_data, struct page *page)
 	}
 
 	if (WARN_ON(req->num_pages >= req->max_pages)) {
+		unlock_page(page);
 		fuse_put_request(fc, req);
 		return -EIO;
 	}

From f56d71483ea87fb6a4cdfb642585d66b90cc8742 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Sun, 3 Jun 2018 16:40:55 +0200
Subject: [PATCH 501/519] udl-kms: change down_interruptible to down

commit 8456b99c16d193c4c3b7df305cf431e027f0189c upstream.

If we leave urbs around, it causes not only leak, but also memory
corruption. This patch fixes the function udl_free_urb_list, so that it
always waits for all urbs that are in progress.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/udl/udl_main.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/udl/udl_main.c b/drivers/gpu/drm/udl/udl_main.c
index 33dbfb2c4748..2d7d7e0b2a23 100644
--- a/drivers/gpu/drm/udl/udl_main.c
+++ b/drivers/gpu/drm/udl/udl_main.c
@@ -141,18 +141,13 @@ static void udl_free_urb_list(struct drm_device *dev)
 	struct list_head *node;
 	struct urb_node *unode;
 	struct urb *urb;
-	int ret;
 	unsigned long flags;
 
 	DRM_DEBUG("Waiting for completes and freeing all render urbs\n");
 
 	/* keep waiting and freeing, until we've got 'em all */
 	while (count--) {
-
-		/* Getting interrupted means a leak, but ok at shutdown*/
-		ret = down_interruptible(&udl->urbs.limit_sem);
-		if (ret)
-			break;
+		down(&udl->urbs.limit_sem);
 
 		spin_lock_irqsave(&udl->urbs.lock, flags);
 

From 377d23bc9d8467cebe2b34902d9726c2683bb355 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Sun, 3 Jun 2018 16:40:56 +0200
Subject: [PATCH 502/519] udl-kms: handle allocation failure

commit 542bb9788a1f485eb1a2229178f665d8ea166156 upstream.

Allocations larger than PAGE_ALLOC_COSTLY_ORDER are unreliable and they
may fail anytime. This patch fixes the udl kms driver so that when a large
alloactions fails, it tries to do multiple smaller allocations.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/udl/udl_main.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/udl/udl_main.c b/drivers/gpu/drm/udl/udl_main.c
index 2d7d7e0b2a23..30bfeb1b2512 100644
--- a/drivers/gpu/drm/udl/udl_main.c
+++ b/drivers/gpu/drm/udl/udl_main.c
@@ -171,17 +171,22 @@ static void udl_free_urb_list(struct drm_device *dev)
 static int udl_alloc_urb_list(struct drm_device *dev, int count, size_t size)
 {
 	struct udl_device *udl = dev->dev_private;
-	int i = 0;
 	struct urb *urb;
 	struct urb_node *unode;
 	char *buf;
+	size_t wanted_size = count * size;
 
 	spin_lock_init(&udl->urbs.lock);
 
+retry:
 	udl->urbs.size = size;
 	INIT_LIST_HEAD(&udl->urbs.list);
 
-	while (i < count) {
+	sema_init(&udl->urbs.limit_sem, 0);
+	udl->urbs.count = 0;
+	udl->urbs.available = 0;
+
+	while (udl->urbs.count * size < wanted_size) {
 		unode = kzalloc(sizeof(struct urb_node), GFP_KERNEL);
 		if (!unode)
 			break;
@@ -197,11 +202,16 @@ static int udl_alloc_urb_list(struct drm_device *dev, int count, size_t size)
 		}
 		unode->urb = urb;
 
-		buf = usb_alloc_coherent(udl->udev, MAX_TRANSFER, GFP_KERNEL,
+		buf = usb_alloc_coherent(udl->udev, size, GFP_KERNEL,
 					 &urb->transfer_dma);
 		if (!buf) {
 			kfree(unode);
 			usb_free_urb(urb);
+			if (size > PAGE_SIZE) {
+				size /= 2;
+				udl_free_urb_list(dev);
+				goto retry;
+			}
 			break;
 		}
 
@@ -212,16 +222,14 @@ static int udl_alloc_urb_list(struct drm_device *dev, int count, size_t size)
 
 		list_add_tail(&unode->entry, &udl->urbs.list);
 
-		i++;
+		up(&udl->urbs.limit_sem);
+		udl->urbs.count++;
+		udl->urbs.available++;
 	}
 
-	sema_init(&udl->urbs.limit_sem, i);
-	udl->urbs.count = i;
-	udl->urbs.available = i;
+	DRM_DEBUG("allocated %d %d byte urbs\n", udl->urbs.count, (int) size);
 
-	DRM_DEBUG("allocated %d %d byte urbs\n", i, (int) size);
-
-	return i;
+	return udl->urbs.count;
 }
 
 struct urb *udl_get_urb(struct drm_device *dev)

From 7030b7046e187ce85e59cc61e10d6c18ecce7104 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Sun, 3 Jun 2018 16:40:57 +0200
Subject: [PATCH 503/519] udl-kms: fix crash due to uninitialized memory

commit 09a00abe3a9941c2715ca83eb88172cd2f54d8fd upstream.

We must use kzalloc when allocating the fb_deferred_io structure.
Otherwise, the field first_io is undefined and it causes a crash.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/udl/udl_fb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c
index 29bd801f5dad..0c648efd9a58 100644
--- a/drivers/gpu/drm/udl/udl_fb.c
+++ b/drivers/gpu/drm/udl/udl_fb.c
@@ -341,7 +341,7 @@ static int udl_fb_open(struct fb_info *info, int user)
 
 		struct fb_deferred_io *fbdefio;
 
-		fbdefio = kmalloc(sizeof(struct fb_deferred_io), GFP_KERNEL);
+		fbdefio = kzalloc(sizeof(struct fb_deferred_io), GFP_KERNEL);
 
 		if (fbdefio) {
 			fbdefio->delay = DL_DEFIO_WRITE_DELAY;

From b81b8594d6a2ed81025df895139bae5bd7231295 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Wed, 27 Jun 2018 17:36:38 +0200
Subject: [PATCH 504/519] ASoC: dpcm: don't merge format from invalid codec dai

commit 4febced15ac8ddb9cf3e603edb111842e4863d9a upstream.

When merging codec formats, dpcm_runtime_base_format() should skip
the codecs which are not supporting the current stream direction.

At the moment, if a BE link has more than one codec, and only one
of these codecs has no capture DAI, it becomes impossible to start
a capture stream because the merged format would be 0.

Skipping invalid codec DAI solves the problem.

Fixes: b073ed4e2126 ("ASoC: soc-pcm: DPCM cares BE format")
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/soc-pcm.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 43b80db952d1..f99eb8f44282 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -1570,6 +1570,14 @@ static u64 dpcm_runtime_base_format(struct snd_pcm_substream *substream)
 		int i;
 
 		for (i = 0; i < be->num_codecs; i++) {
+			/*
+			 * Skip CODECs which don't support the current stream
+			 * type. See soc_pcm_init_runtime_hw() for more details
+			 */
+			if (!snd_soc_dai_stream_valid(be->codec_dais[i],
+						      stream))
+				continue;
+
 			codec_dai_drv = be->codec_dais[i]->driver;
 			if (stream == SNDRV_PCM_STREAM_PLAYBACK)
 				codec_stream = &codec_dai_drv->playback;

From 3e053c356194988b1e190c5db71f3fc97bb24e9f Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Thu, 26 Jul 2018 15:49:10 -0500
Subject: [PATCH 505/519] ASoC: sirf: Fix potential NULL pointer dereference

commit ae1c696a480c67c45fb23b35162183f72c6be0e1 upstream.

There is a potential execution path in which function
platform_get_resource() returns NULL. If this happens,
we will end up having a NULL pointer dereference.

Fix this by replacing devm_ioremap with devm_ioremap_resource,
which has the NULL check and the memory region request.

This code was detected with the help of Coccinelle.

Cc: stable@vger.kernel.org
Fixes: 2bd8d1d5cf89 ("ASoC: sirf: Add audio usp interface driver")
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/sirf/sirf-usp.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/sound/soc/sirf/sirf-usp.c b/sound/soc/sirf/sirf-usp.c
index 45fc06c0e0e5..6b504f407079 100644
--- a/sound/soc/sirf/sirf-usp.c
+++ b/sound/soc/sirf/sirf-usp.c
@@ -367,10 +367,9 @@ static int sirf_usp_pcm_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, usp);
 
 	mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	base = devm_ioremap(&pdev->dev, mem_res->start,
-		resource_size(mem_res));
-	if (base == NULL)
-		return -ENOMEM;
+	base = devm_ioremap_resource(&pdev->dev, mem_res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
 	usp->regmap = devm_regmap_init_mmio(&pdev->dev, base,
 					    &sirf_usp_regmap_config);
 	if (IS_ERR(usp->regmap))

From 738cfe4d2de29c5f1da59a057153bf016db89243 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 13 Jul 2018 17:55:15 +0300
Subject: [PATCH 506/519] pinctrl: freescale: off by one in
 imx1_pinconf_group_dbg_show()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 19da44cd33a3a6ff7c97fff0189999ff15b241e4 upstream.

The info->groups[] array is allocated in imx1_pinctrl_parse_dt().  It
has info->ngroups elements.  Thus the > here should be >= to prevent
reading one element beyond the end of the array.

Cc: stable@vger.kernel.org
Fixes: 30612cd90005 ("pinctrl: imx1 core driver")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Uwe Kleine-König <u.kleine-könig@pengutronix.de>
Acked-by: Dong Aisheng <Aisheng.dong@nxp.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pinctrl/freescale/pinctrl-imx1-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/freescale/pinctrl-imx1-core.c b/drivers/pinctrl/freescale/pinctrl-imx1-core.c
index acaf84cadca3..6c9420ee9e03 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx1-core.c
+++ b/drivers/pinctrl/freescale/pinctrl-imx1-core.c
@@ -434,7 +434,7 @@ static void imx1_pinconf_group_dbg_show(struct pinctrl_dev *pctldev,
 	const char *name;
 	int i, ret;
 
-	if (group > info->ngroups)
+	if (group >= info->ngroups)
 		return;
 
 	seq_puts(s, "\n");

From ed9c4055689cab0908fed0214dc0844babb83a6f Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Mon, 27 Aug 2018 14:40:09 -0700
Subject: [PATCH 507/519] x86/irqflags: Mark native_restore_fl extern inline

commit 1f59a4581b5ecfe9b4f049a7a2cf904d8352842d upstream.

This should have been marked extern inline in order to pick up the out
of line definition in arch/x86/kernel/irqflags.S.

Fixes: 208cbb325589 ("x86/irqflags: Provide a declaration for native_save_fl")
Reported-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Juergen Gross <jgross@suse.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20180827214011.55428-1-ndesaulniers@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/irqflags.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index cb7f04981c6b..8afbdcd3032b 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -28,7 +28,8 @@ extern inline unsigned long native_save_fl(void)
 	return flags;
 }
 
-static inline void native_restore_fl(unsigned long flags)
+extern inline void native_restore_fl(unsigned long flags);
+extern inline void native_restore_fl(unsigned long flags)
 {
 	asm volatile("push %0 ; popf"
 		     : /* no output */

From 2edb10cbf21fca9b220a2bdf0b87b7bbeaf1e1e9 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Fri, 24 Aug 2018 10:03:51 -0700
Subject: [PATCH 508/519] x86/spectre: Add missing family 6 check to microcode
 check

commit 1ab534e85c93945f7862378d8c8adcf408205b19 upstream.

The check for Spectre microcodes does not check for family 6, only the
model numbers.

Add a family 6 check to avoid ambiguity with other families.

Fixes: a5b296636453 ("x86/cpufeature: Blacklist SPEC_CTRL/PRED_CMD on early Spectre v2 microcodes")
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20180824170351.34874-2-andi@firstfloor.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/intel.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 4dce22d3cb06..b18fe3d245fe 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -74,6 +74,9 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
 	if (cpu_has(c, X86_FEATURE_HYPERVISOR))
 		return false;
 
+	if (c->x86 != 6)
+		return false;
+
 	for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
 		if (c->x86_model == spectre_bad_microcodes[i].model &&
 		    c->x86_mask == spectre_bad_microcodes[i].stepping)

From 2502064731b133b87540758678b4485c0dbdc925 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 6 Aug 2018 14:26:39 +0200
Subject: [PATCH 509/519] s390: fix br_r1_trampoline for machines without exrl

commit 26f843848bae973817b3587780ce6b7b0200d3e4 upstream.

For machines without the exrl instruction the BFP jit generates
code that uses an "br %r1" instruction located in the lowcore page.
Unfortunately there is a cut & paste error that puts an additional
"larl %r1,.+14" instruction in the code that clobbers the branch
target address in %r1. Remove the larl instruction.

Cc: <stable@vger.kernel.org> # v4.17+
Fixes: de5cb6eb51 ("s390: use expoline thunks in the BPF JIT")
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/net/bpf_jit_comp.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index a26528afceb2..727693e283da 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -522,8 +522,6 @@ static void bpf_jit_epilogue(struct bpf_jit *jit)
 			/* br %r1 */
 			_EMIT2(0x07f1);
 		} else {
-			/* larl %r1,.+14 */
-			EMIT6_PCREL_RILB(0xc0000000, REG_1, jit->prg + 14);
 			/* ex 0,S390_lowcore.br_r1_tampoline */
 			EMIT4_DISP(0x44000000, REG_0, REG_0,
 				   offsetof(struct _lowcore, br_r1_trampoline));

From b629d4650a19d883a594a023603894ba9870ab91 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Wed, 16 May 2018 09:37:25 +0200
Subject: [PATCH 510/519] s390/qdio: reset old sbal_state flags

commit 64e03ff72623b8c2ea89ca3cb660094e019ed4ae upstream.

When allocating a new AOB fails, handle_outbound() is still capable of
transmitting the selected buffer (just without async completion).

But if a previous transfer on this queue slot used async completion, its
sbal_state flags field is still set to QDIO_OUTBUF_STATE_FLAG_PENDING.
So when the upper layer driver sees this stale flag, it expects an async
completion that never happens.

Fix this by unconditionally clearing the flags field.

Fixes: 104ea556ee7f ("qdio: support asynchronous delivery of storage blocks")
Cc: <stable@vger.kernel.org> #v3.2+
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/include/asm/qdio.h | 1 -
 drivers/s390/cio/qdio_main.c | 5 ++---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 998b61cd0e56..4b39ba700d32 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -261,7 +261,6 @@ struct qdio_outbuf_state {
 	void *user;
 };
 
-#define QDIO_OUTBUF_STATE_FLAG_NONE	0x00
 #define QDIO_OUTBUF_STATE_FLAG_PENDING	0x01
 
 #define CHSC_AC1_INITIATE_INPUTQ	0x80
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index 742ca57ece8c..d64b401f3d05 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -640,21 +640,20 @@ static inline unsigned long qdio_aob_for_buffer(struct qdio_output_q *q,
 	unsigned long phys_aob = 0;
 
 	if (!q->use_cq)
-		goto out;
+		return 0;
 
 	if (!q->aobs[bufnr]) {
 		struct qaob *aob = qdio_allocate_aob();
 		q->aobs[bufnr] = aob;
 	}
 	if (q->aobs[bufnr]) {
-		q->sbal_state[bufnr].flags = QDIO_OUTBUF_STATE_FLAG_NONE;
 		q->sbal_state[bufnr].aob = q->aobs[bufnr];
 		q->aobs[bufnr]->user1 = (u64) q->sbal_state[bufnr].user;
 		phys_aob = virt_to_phys(q->aobs[bufnr]);
 		WARN_ON_ONCE(phys_aob & 0xFF);
 	}
 
-out:
+	q->sbal_state[bufnr].flags = 0;
 	return phys_aob;
 }
 

From 5169397d50ed4ca70d273022cc7d8d8f7f68966e Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.ibm.com>
Date: Mon, 13 Aug 2018 11:26:46 +0200
Subject: [PATCH 511/519] s390/pci: fix out of bounds access during irq setup

commit 866f3576a72b2233a76dffb80290f8086dc49e17 upstream.

During interrupt setup we allocate interrupt vectors, walk the list of msi
descriptors, and fill in the message data. Requesting more interrupts than
supported on s390 can lead to an out of bounds access.

When we restrict the number of interrupts we should also stop walking the
msi list after all supported interrupts are handled.

Cc: stable@vger.kernel.org
Signed-off-by: Sebastian Ott <sebott@linux.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/pci/pci.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index ef0499b76c50..9a5754d4ee87 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -412,6 +412,8 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 	hwirq = 0;
 	for_each_pci_msi_entry(msi, pdev) {
 		rc = -EIO;
+		if (hwirq >= msi_vecs)
+			break;
 		irq = irq_alloc_desc(0);	/* Alloc irq on node 0 */
 		if (irq < 0)
 			goto out_msi;

From 3051451d49dbe177b85f1f1b8fd57ecd7dc87f21 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Sat, 28 Apr 2018 21:35:01 +0900
Subject: [PATCH 512/519] kprobes: Make list and blacklist root user read only

commit f2a3ab36077222437b4826fc76111caa14562b7c upstream.

Since the blacklist and list files on debugfs indicates
a sensitive address information to reader, it should be
restricted to the root user.

Suggested-by: Thomas Richter <tmricht@linux.ibm.com>
Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: David Howells <dhowells@redhat.com>
Cc: David S . Miller <davem@davemloft.net>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Jon Medhurst <tixy@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tobin C . Harding <me@tobin.cc>
Cc: Will Deacon <will.deacon@arm.com>
Cc: acme@kernel.org
Cc: akpm@linux-foundation.org
Cc: brueckner@linux.vnet.ibm.com
Cc: linux-arch@vger.kernel.org
Cc: rostedt@goodmis.org
Cc: schwidefsky@de.ibm.com
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/lkml/152491890171.9916.5183693615601334087.stgit@devbox
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/kprobes.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index bbe9dd0886bd..388bcace62f8 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -2441,7 +2441,7 @@ static int __init debugfs_kprobe_init(void)
 	if (!dir)
 		return -ENOMEM;
 
-	file = debugfs_create_file("list", 0444, dir, NULL,
+	file = debugfs_create_file("list", 0400, dir, NULL,
 				&debugfs_kprobes_operations);
 	if (!file)
 		goto error;
@@ -2451,7 +2451,7 @@ static int __init debugfs_kprobe_init(void)
 	if (!file)
 		goto error;
 
-	file = debugfs_create_file("blacklist", 0444, dir, NULL,
+	file = debugfs_create_file("blacklist", 0400, dir, NULL,
 				&debugfs_kprobe_blacklist_ops);
 	if (!file)
 		goto error;

From 79ffdc486e1eb4316d34394dbc611e693a35401a Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@mips.com>
Date: Tue, 15 May 2018 23:33:26 +0100
Subject: [PATCH 513/519] MIPS: Correct the 64-bit DSP accumulator register
 size

commit f5958b4cf4fc38ed4583ab83fb7c4cd1ab05f47b upstream.

Use the `unsigned long' rather than `__u32' type for DSP accumulator
registers, like with the regular MIPS multiply/divide accumulator and
general-purpose registers, as all are 64-bit in 64-bit implementations
and using a 32-bit data type leads to contents truncation on context
saving.

Update `arch_ptrace' and `compat_arch_ptrace' accordingly, removing
casts that are similarly not used with multiply/divide accumulator or
general-purpose register accesses.

Signed-off-by: Maciej W. Rozycki <macro@mips.com>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Fixes: e50c0a8fa60d ("Support the MIPS32 / MIPS64 DSP ASE.")
Patchwork: https://patchwork.linux-mips.org/patch/19329/
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: James Hogan <jhogan@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Cc: stable@vger.kernel.org # 2.6.15+
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/include/asm/processor.h | 2 +-
 arch/mips/kernel/ptrace.c         | 2 +-
 arch/mips/kernel/ptrace32.c       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index 041153f5cf93..41a8201d572e 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -131,7 +131,7 @@ struct mips_fpu_struct {
 
 #define NUM_DSP_REGS   6
 
-typedef __u32 dspreg_t;
+typedef unsigned long dspreg_t;
 
 struct mips_dsp_state {
 	dspreg_t	dspr[NUM_DSP_REGS];
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 32fa3ae1a0a6..5a869515b393 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -879,7 +879,7 @@ long arch_ptrace(struct task_struct *child, long request,
 				goto out;
 			}
 			dregs = __get_dsp_regs(child);
-			tmp = (unsigned long) (dregs[addr - DSP_BASE]);
+			tmp = dregs[addr - DSP_BASE];
 			break;
 		}
 		case DSP_CONTROL:
diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c
index d95117e71f69..286ec2d24d47 100644
--- a/arch/mips/kernel/ptrace32.c
+++ b/arch/mips/kernel/ptrace32.c
@@ -140,7 +140,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 				goto out;
 			}
 			dregs = __get_dsp_regs(child);
-			tmp = (unsigned long) (dregs[addr - DSP_BASE]);
+			tmp = dregs[addr - DSP_BASE];
 			break;
 		}
 		case DSP_CONTROL:

From bb190acea829443e0be1cd6dc87837df03b39083 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 21 Aug 2018 12:12:59 -0700
Subject: [PATCH 514/519] MIPS: lib: Provide MIPS64r6 __multi3() for GCC < 7

commit 690d9163bf4b8563a2682e619f938e6a0443947f upstream.

Some versions of GCC suboptimally generate calls to the __multi3()
intrinsic for MIPS64r6 builds, resulting in link failures due to the
missing function:

    LD      vmlinux.o
    MODPOST vmlinux.o
  kernel/bpf/verifier.o: In function `kmalloc_array':
  include/linux/slab.h:631: undefined reference to `__multi3'
  fs/select.o: In function `kmalloc_array':
  include/linux/slab.h:631: undefined reference to `__multi3'
  ...

We already have a workaround for this in which we provide the
instrinsic, but we do so selectively for GCC 7 only. Unfortunately the
issue occurs with older GCC versions too - it has been observed with
both GCC 5.4.0 & GCC 6.4.0.

MIPSr6 support was introduced in GCC 5, so all major GCC versions prior
to GCC 8 are affected and we extend our workaround accordingly to all
MIPS64r6 builds using GCC versions older than GCC 8.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Reported-by: Vladimir Kondratiev <vladimir.kondratiev@intel.com>
Fixes: ebabcf17bcd7 ("MIPS: Implement __multi3 for GCC7 MIPS64r6 builds")
Patchwork: https://patchwork.linux-mips.org/patch/20297/
Cc: James Hogan <jhogan@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org # 4.15+
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/lib/multi3.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/mips/lib/multi3.c b/arch/mips/lib/multi3.c
index 111ad475aa0c..4c2483f410c2 100644
--- a/arch/mips/lib/multi3.c
+++ b/arch/mips/lib/multi3.c
@@ -4,12 +4,12 @@
 #include "libgcc.h"
 
 /*
- * GCC 7 suboptimally generates __multi3 calls for mips64r6, so for that
- * specific case only we'll implement it here.
+ * GCC 7 & older can suboptimally generate __multi3 calls for mips64r6, so for
+ * that specific case only we implement that intrinsic here.
  *
  * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82981
  */
-#if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6) && (__GNUC__ == 7)
+#if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6) && (__GNUC__ < 8)
 
 /* multiply 64-bit values, low 64-bits returned */
 static inline long long notrace dmulu(long long a, long long b)

From 461a6385e58e8247e6ba2005aa5d1b8d980ee4a2 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Thu, 2 Aug 2018 10:51:40 -0700
Subject: [PATCH 515/519] scsi: sysfs: Introduce
 sysfs_{un,}break_active_protection()

commit 2afc9166f79b8f6da5f347f48515215ceee4ae37 upstream.

Introduce these two functions and export them such that the next patch
can add calls to these functions from the SCSI core.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/sysfs/file.c       | 44 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/sysfs.h | 14 ++++++++++++++
 2 files changed, 58 insertions(+)

diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 39c75a86c67f..666986b95c5d 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -407,6 +407,50 @@ int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr,
 }
 EXPORT_SYMBOL_GPL(sysfs_chmod_file);
 
+/**
+ * sysfs_break_active_protection - break "active" protection
+ * @kobj: The kernel object @attr is associated with.
+ * @attr: The attribute to break the "active" protection for.
+ *
+ * With sysfs, just like kernfs, deletion of an attribute is postponed until
+ * all active .show() and .store() callbacks have finished unless this function
+ * is called. Hence this function is useful in methods that implement self
+ * deletion.
+ */
+struct kernfs_node *sysfs_break_active_protection(struct kobject *kobj,
+						  const struct attribute *attr)
+{
+	struct kernfs_node *kn;
+
+	kobject_get(kobj);
+	kn = kernfs_find_and_get(kobj->sd, attr->name);
+	if (kn)
+		kernfs_break_active_protection(kn);
+	return kn;
+}
+EXPORT_SYMBOL_GPL(sysfs_break_active_protection);
+
+/**
+ * sysfs_unbreak_active_protection - restore "active" protection
+ * @kn: Pointer returned by sysfs_break_active_protection().
+ *
+ * Undo the effects of sysfs_break_active_protection(). Since this function
+ * calls kernfs_put() on the kernfs node that corresponds to the 'attr'
+ * argument passed to sysfs_break_active_protection() that attribute may have
+ * been removed between the sysfs_break_active_protection() and
+ * sysfs_unbreak_active_protection() calls, it is not safe to access @kn after
+ * this function has returned.
+ */
+void sysfs_unbreak_active_protection(struct kernfs_node *kn)
+{
+	struct kobject *kobj = kn->parent->priv;
+
+	kernfs_unbreak_active_protection(kn);
+	kernfs_put(kn);
+	kobject_put(kobj);
+}
+EXPORT_SYMBOL_GPL(sysfs_unbreak_active_protection);
+
 /**
  * sysfs_remove_file_ns - remove an object attribute with a custom ns tag
  * @kobj: object we're acting for
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 00a1f330f93a..d3c19f8c4564 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -238,6 +238,9 @@ int __must_check sysfs_create_files(struct kobject *kobj,
 				   const struct attribute **attr);
 int __must_check sysfs_chmod_file(struct kobject *kobj,
 				  const struct attribute *attr, umode_t mode);
+struct kernfs_node *sysfs_break_active_protection(struct kobject *kobj,
+						  const struct attribute *attr);
+void sysfs_unbreak_active_protection(struct kernfs_node *kn);
 void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr,
 			  const void *ns);
 bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr);
@@ -351,6 +354,17 @@ static inline int sysfs_chmod_file(struct kobject *kobj,
 	return 0;
 }
 
+static inline struct kernfs_node *
+sysfs_break_active_protection(struct kobject *kobj,
+			      const struct attribute *attr)
+{
+	return NULL;
+}
+
+static inline void sysfs_unbreak_active_protection(struct kernfs_node *kn)
+{
+}
+
 static inline void sysfs_remove_file_ns(struct kobject *kobj,
 					const struct attribute *attr,
 					const void *ns)

From a49097fb388388cc4b2bf8d3f42693eb594b2484 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Thu, 2 Aug 2018 10:51:41 -0700
Subject: [PATCH 516/519] scsi: core: Avoid that SCSI device removal through
 sysfs triggers a deadlock

commit 0ee223b2e1f67cb2de9c0e3247c510d846e74d63 upstream.

A long time ago the unfortunate decision was taken to add a self-deletion
attribute to the sysfs SCSI device directory. That decision was unfortunate
because self-deletion is really tricky. We can't drop that attribute
because widely used user space software depends on it, namely the
rescan-scsi-bus.sh script. Hence this patch that avoids that writing into
that attribute triggers a deadlock. See also commit 7973cbd9fbd9 ("[PATCH]
add sysfs attributes to scan and delete scsi_devices").

This patch avoids that self-removal triggers the following deadlock:

======================================================
WARNING: possible circular locking dependency detected
4.18.0-rc2-dbg+ #5 Not tainted
------------------------------------------------------
modprobe/6539 is trying to acquire lock:
000000008323c4cd (kn->count#202){++++}, at: kernfs_remove_by_name_ns+0x45/0x90

but task is already holding lock:
00000000a6ec2c69 (&shost->scan_mutex){+.+.}, at: scsi_remove_host+0x21/0x150 [scsi_mod]

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #1 (&shost->scan_mutex){+.+.}:
       __mutex_lock+0xfe/0xc70
       mutex_lock_nested+0x1b/0x20
       scsi_remove_device+0x26/0x40 [scsi_mod]
       sdev_store_delete+0x27/0x30 [scsi_mod]
       dev_attr_store+0x3e/0x50
       sysfs_kf_write+0x87/0xa0
       kernfs_fop_write+0x190/0x230
       __vfs_write+0xd2/0x3b0
       vfs_write+0x101/0x270
       ksys_write+0xab/0x120
       __x64_sys_write+0x43/0x50
       do_syscall_64+0x77/0x230
       entry_SYSCALL_64_after_hwframe+0x49/0xbe

-> #0 (kn->count#202){++++}:
       lock_acquire+0xd2/0x260
       __kernfs_remove+0x424/0x4a0
       kernfs_remove_by_name_ns+0x45/0x90
       remove_files.isra.1+0x3a/0x90
       sysfs_remove_group+0x5c/0xc0
       sysfs_remove_groups+0x39/0x60
       device_remove_attrs+0x82/0xb0
       device_del+0x251/0x580
       __scsi_remove_device+0x19f/0x1d0 [scsi_mod]
       scsi_forget_host+0x37/0xb0 [scsi_mod]
       scsi_remove_host+0x9b/0x150 [scsi_mod]
       sdebug_driver_remove+0x4b/0x150 [scsi_debug]
       device_release_driver_internal+0x241/0x360
       device_release_driver+0x12/0x20
       bus_remove_device+0x1bc/0x290
       device_del+0x259/0x580
       device_unregister+0x1a/0x70
       sdebug_remove_adapter+0x8b/0xf0 [scsi_debug]
       scsi_debug_exit+0x76/0xe8 [scsi_debug]
       __x64_sys_delete_module+0x1c1/0x280
       do_syscall_64+0x77/0x230
       entry_SYSCALL_64_after_hwframe+0x49/0xbe

other info that might help us debug this:

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&shost->scan_mutex);
                               lock(kn->count#202);
                               lock(&shost->scan_mutex);
  lock(kn->count#202);

 *** DEADLOCK ***

2 locks held by modprobe/6539:
 #0: 00000000efaf9298 (&dev->mutex){....}, at: device_release_driver_internal+0x68/0x360
 #1: 00000000a6ec2c69 (&shost->scan_mutex){+.+.}, at: scsi_remove_host+0x21/0x150 [scsi_mod]

stack backtrace:
CPU: 10 PID: 6539 Comm: modprobe Not tainted 4.18.0-rc2-dbg+ #5
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014
Call Trace:
 dump_stack+0xa4/0xf5
 print_circular_bug.isra.34+0x213/0x221
 __lock_acquire+0x1a7e/0x1b50
 lock_acquire+0xd2/0x260
 __kernfs_remove+0x424/0x4a0
 kernfs_remove_by_name_ns+0x45/0x90
 remove_files.isra.1+0x3a/0x90
 sysfs_remove_group+0x5c/0xc0
 sysfs_remove_groups+0x39/0x60
 device_remove_attrs+0x82/0xb0
 device_del+0x251/0x580
 __scsi_remove_device+0x19f/0x1d0 [scsi_mod]
 scsi_forget_host+0x37/0xb0 [scsi_mod]
 scsi_remove_host+0x9b/0x150 [scsi_mod]
 sdebug_driver_remove+0x4b/0x150 [scsi_debug]
 device_release_driver_internal+0x241/0x360
 device_release_driver+0x12/0x20
 bus_remove_device+0x1bc/0x290
 device_del+0x259/0x580
 device_unregister+0x1a/0x70
 sdebug_remove_adapter+0x8b/0xf0 [scsi_debug]
 scsi_debug_exit+0x76/0xe8 [scsi_debug]
 __x64_sys_delete_module+0x1c1/0x280
 do_syscall_64+0x77/0x230
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

See also https://www.mail-archive.com/linux-scsi@vger.kernel.org/msg54525.html.

Fixes: ac0ece9174ac ("scsi: use device_remove_file_self() instead of device_schedule_callback()")
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_sysfs.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 4477e999ec70..8db0c48943d6 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -678,8 +678,24 @@ static ssize_t
 sdev_store_delete(struct device *dev, struct device_attribute *attr,
 		  const char *buf, size_t count)
 {
-	if (device_remove_file_self(dev, attr))
-		scsi_remove_device(to_scsi_device(dev));
+	struct kernfs_node *kn;
+
+	kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
+	WARN_ON_ONCE(!kn);
+	/*
+	 * Concurrent writes into the "delete" sysfs attribute may trigger
+	 * concurrent calls to device_remove_file() and scsi_remove_device().
+	 * device_remove_file() handles concurrent removal calls by
+	 * serializing these and by ignoring the second and later removal
+	 * attempts.  Concurrent calls of scsi_remove_device() are
+	 * serialized. The second and later calls of scsi_remove_device() are
+	 * ignored because the first call of that function changes the device
+	 * state into SDEV_DEL.
+	 */
+	device_remove_file(dev, attr);
+	scsi_remove_device(to_scsi_device(dev));
+	if (kn)
+		sysfs_unbreak_active_protection(kn);
 	return count;
 };
 static DEVICE_ATTR(delete, S_IWUSR, NULL, sdev_store_delete);

From aba1ba97e64f2e6971df812fc45985222356cdd5 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Thu, 26 Jul 2018 12:13:49 -0500
Subject: [PATCH 517/519] iscsi target: fix session creation failure handling

commit 26abc916a898d34c5ad159315a2f683def3c5555 upstream.

The problem is that iscsi_login_zero_tsih_s1 sets conn->sess early in
iscsi_login_set_conn_values. If the function fails later like when we
alloc the idr it does kfree(sess) and leaves the conn->sess pointer set.
iscsi_login_zero_tsih_s1 then returns -Exyz and we then call
iscsi_target_login_sess_out and access the freed memory.

This patch has iscsi_login_zero_tsih_s1 either completely setup the
session or completely tear it down, so later in
iscsi_target_login_sess_out we can just check for it being set to the
connection.

Cc: stable@vger.kernel.org
Fixes: 0957627a9960 ("iscsi-target: Fix sess allocation leak in...")
Signed-off-by: Mike Christie <mchristi@redhat.com>
Acked-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/target/iscsi/iscsi_target_login.c | 35 ++++++++++++++---------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index bc2cbffec27e..63e54beed196 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -323,8 +323,7 @@ static int iscsi_login_zero_tsih_s1(
 		pr_err("idr_alloc() for sess_idr failed\n");
 		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
 				ISCSI_LOGIN_STATUS_NO_RESOURCES);
-		kfree(sess);
-		return -ENOMEM;
+		goto free_sess;
 	}
 
 	sess->creation_time = get_jiffies_64();
@@ -340,20 +339,28 @@ static int iscsi_login_zero_tsih_s1(
 				ISCSI_LOGIN_STATUS_NO_RESOURCES);
 		pr_err("Unable to allocate memory for"
 				" struct iscsi_sess_ops.\n");
-		kfree(sess);
-		return -ENOMEM;
+		goto remove_idr;
 	}
 
 	sess->se_sess = transport_init_session(TARGET_PROT_NORMAL);
 	if (IS_ERR(sess->se_sess)) {
 		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
 				ISCSI_LOGIN_STATUS_NO_RESOURCES);
-		kfree(sess->sess_ops);
-		kfree(sess);
-		return -ENOMEM;
+		goto free_ops;
 	}
 
 	return 0;
+
+free_ops:
+	kfree(sess->sess_ops);
+remove_idr:
+	spin_lock_bh(&sess_idr_lock);
+	idr_remove(&sess_idr, sess->session_index);
+	spin_unlock_bh(&sess_idr_lock);
+free_sess:
+	kfree(sess);
+	conn->sess = NULL;
+	return -ENOMEM;
 }
 
 static int iscsi_login_zero_tsih_s2(
@@ -1142,13 +1149,13 @@ void iscsi_target_login_sess_out(struct iscsi_conn *conn,
 				   ISCSI_LOGIN_STATUS_INIT_ERR);
 	if (!zero_tsih || !conn->sess)
 		goto old_sess_out;
-	if (conn->sess->se_sess)
-		transport_free_session(conn->sess->se_sess);
-	if (conn->sess->session_index != 0) {
-		spin_lock_bh(&sess_idr_lock);
-		idr_remove(&sess_idr, conn->sess->session_index);
-		spin_unlock_bh(&sess_idr_lock);
-	}
+
+	transport_free_session(conn->sess->se_sess);
+
+	spin_lock_bh(&sess_idr_lock);
+	idr_remove(&sess_idr, conn->sess->session_index);
+	spin_unlock_bh(&sess_idr_lock);
+
 	kfree(conn->sess->sess_ops);
 	kfree(conn->sess);
 	conn->sess = NULL;

From eb08a285899df82056f712421988c6ccd0b58ba6 Mon Sep 17 00:00:00 2001
From: Scott Bauer <scott.bauer@intel.com>
Date: Thu, 26 Apr 2018 11:51:08 -0600
Subject: [PATCH 518/519] cdrom: Fix info leak/OOB read in
 cdrom_ioctl_drive_status

commit 8f3fafc9c2f0ece10832c25f7ffcb07c97a32ad4 upstream.

Like d88b6d04: "cdrom: information leak in cdrom_ioctl_media_changed()"

There is another cast from unsigned long to int which causes
a bounds check to fail with specially crafted input. The value is
then used as an index in the slot array in cdrom_slot_status().

Signed-off-by: Scott Bauer <scott.bauer@intel.com>
Signed-off-by: Scott Bauer <sbauer@plzdonthack.me>
Cc: stable@vger.kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/cdrom/cdrom.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 0151039bff05..1012b2cb6a16 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2526,7 +2526,7 @@ static int cdrom_ioctl_drive_status(struct cdrom_device_info *cdi,
 	if (!CDROM_CAN(CDC_SELECT_DISC) ||
 	    (arg == CDSL_CURRENT || arg == CDSL_NONE))
 		return cdi->ops->drive_status(cdi, CDSL_CURRENT);
-	if (((int)arg >= cdi->capacity))
+	if (arg >= cdi->capacity)
 		return -EINVAL;
 	return cdrom_slot_status(cdi, arg);
 }

From 4d9339a1f0e6e288903d34020f2378d250ea3cac Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 5 Sep 2018 09:18:41 +0200
Subject: [PATCH 519/519] Linux 4.4.154

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 208a813be615..b184286cf7e6 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 4
-SUBLEVEL = 153
+SUBLEVEL = 154
 EXTRAVERSION =
 NAME = Blurry Fish Butt