From 772de7be5b75e33a02d4ab79f0876fb868cf4242 Mon Sep 17 00:00:00 2001
From: Tim Murray <timmurray@google.com>
Date: Tue, 19 Jan 2016 16:36:40 -0800
Subject: [PATCH 001/797] ANDROID: mmc: move to a SCHED_FIFO thread

(cherry picked from commit 011e507b413393eab8279dac8b778ad9b6e9971b)

Running mmcqd as a prio 120 thread forces it to compete with standard
user processes for IO performance, especially when the system is under
severe CPU load. Move it to a SCHED_FIFO thread to reduce the impact of
load on IO performance.

Signed-off-by: Tim Murray <timmurray@google.com>
Bug: 25392275
Change-Id: I1edfe73baa25e181367c30c1f40fee886e92b60d
---
 drivers/mmc/card/queue.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index 6f4323c6d653..6a4cd2bb4629 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -19,6 +19,7 @@
 
 #include <linux/mmc/card.h>
 #include <linux/mmc/host.h>
+#include <linux/sched/rt.h>
 #include "queue.h"
 
 #define MMC_QUEUE_BOUNCESZ	65536
@@ -50,6 +51,11 @@ static int mmc_queue_thread(void *d)
 {
 	struct mmc_queue *mq = d;
 	struct request_queue *q = mq->queue;
+	struct sched_param scheduler_params = {0};
+
+	scheduler_params.sched_priority = 1;
+
+	sched_setscheduler(current, SCHED_FIFO, &scheduler_params);
 
 	current->flags |= PF_MEMALLOC;
 

From c84db235f0c9db4574760979382285fdd50ef88b Mon Sep 17 00:00:00 2001
From: Dmitry Shmidt <dimitrysh@google.com>
Date: Wed, 24 Feb 2016 11:17:02 -0800
Subject: [PATCH 002/797] Revert "mmc: core: Hold a wake lock accross delayed
 work + mmc rescan"

Patch mmc: core: Signal wakeup event at card insert/removal
provides wake lock for mmc_detect_change()

This reverts commit bec7bcbb707d10b80d450f6f02384efeff294799.
---
 drivers/mmc/core/core.c | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 95fba49d2d42..96666984a103 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -29,7 +29,6 @@
 #include <linux/random.h>
 #include <linux/slab.h>
 #include <linux/of.h>
-#include <linux/wakelock.h>
 
 #include <trace/events/mmc.h>
 
@@ -59,7 +58,6 @@
 #define MMC_BKOPS_MAX_TIMEOUT	(4 * 60 * 1000) /* max time to wait in ms */
 
 static struct workqueue_struct *workqueue;
-static struct wake_lock mmc_delayed_work_wake_lock;
 static const unsigned freqs[] = { 400000, 300000, 200000, 100000 };
 
 /*
@@ -76,7 +74,6 @@ module_param(use_spi_crc, bool, 0);
 static int mmc_schedule_delayed_work(struct delayed_work *work,
 				     unsigned long delay)
 {
-	wake_lock(&mmc_delayed_work_wake_lock);
 	return queue_delayed_work(workqueue, work, delay);
 }
 
@@ -2582,7 +2579,6 @@ void mmc_rescan(struct work_struct *work)
 	struct mmc_host *host =
 		container_of(work, struct mmc_host, detect.work);
 	int i;
-	bool extend_wakelock = false;
 
 	if (host->trigger_card_event && host->ops->card_event) {
 		host->ops->card_event(host);
@@ -2644,20 +2640,14 @@ void mmc_rescan(struct work_struct *work)
 
 	mmc_claim_host(host);
 	for (i = 0; i < ARRAY_SIZE(freqs); i++) {
-		if (!mmc_rescan_try_freq(host, max(freqs[i], host->f_min))) {
-			extend_wakelock = true;
+		if (!mmc_rescan_try_freq(host, max(freqs[i], host->f_min)))
 			break;
-		}
 		if (freqs[i] <= host->f_min)
 			break;
 	}
 	mmc_release_host(host);
 
  out:
-	if (extend_wakelock)
-		wake_lock_timeout(&mmc_delayed_work_wake_lock, HZ / 2);
-	else
-		wake_unlock(&mmc_delayed_work_wake_lock);
 	if (host->caps & MMC_CAP_NEEDS_POLL)
 		mmc_schedule_delayed_work(&host->detect, HZ);
 }
@@ -2885,9 +2875,6 @@ static int __init mmc_init(void)
 	if (!workqueue)
 		return -ENOMEM;
 
-	wake_lock_init(&mmc_delayed_work_wake_lock, WAKE_LOCK_SUSPEND,
-		       "mmc_delayed_work");
-
 	ret = mmc_register_bus();
 	if (ret)
 		goto destroy_workqueue;
@@ -2908,7 +2895,6 @@ static int __init mmc_init(void)
 	mmc_unregister_bus();
 destroy_workqueue:
 	destroy_workqueue(workqueue);
-	wake_lock_destroy(&mmc_delayed_work_wake_lock);
 
 	return ret;
 }
@@ -2919,7 +2905,6 @@ static void __exit mmc_exit(void)
 	mmc_unregister_host_class();
 	mmc_unregister_bus();
 	destroy_workqueue(workqueue);
-	wake_lock_destroy(&mmc_delayed_work_wake_lock);
 }
 
 subsys_initcall(mmc_init);

From 7187712e8d53b99c64b774011945d601345ddf36 Mon Sep 17 00:00:00 2001
From: Dmitry Shmidt <dimitrysh@google.com>
Date: Wed, 24 Feb 2016 12:45:14 -0800
Subject: [PATCH 003/797] Revert "mmc: Extend wakelock if bus is dead"

This reverts commit dde72f9e313fc52d467ef0aad41cecd2c9f9f212.
---
 drivers/mmc/core/core.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 96666984a103..3e54185bc985 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -2605,12 +2605,6 @@ void mmc_rescan(struct work_struct *work)
 
 	host->detect_change = 0;
 
-	/* If the card was removed the bus will be marked
-	 * as dead - extend the wakelock so userspace
-	 * can respond */
-	if (host->bus_dead)
-		extend_wakelock = 1;
-
 	/*
 	 * Let mmc_bus_put() free the bus/bus_ops if we've found that
 	 * the card is no longer present.

From c3b67e21677e5967e04e84861f7b67b456d7a7c4 Mon Sep 17 00:00:00 2001
From: Lorenzo Colitti <lorenzo@google.com>
Date: Wed, 16 Dec 2015 12:30:02 +0900
Subject: [PATCH 004/797] net: diag: split inet_diag_dump_one_icsk into two

Currently, inet_diag_dump_one_icsk finds a socket and then dumps
its information to userspace. Split it into a part that finds the
socket and a part that dumps the information.

[cherry-pick of net-next b613f56ec9baf30edf5d9d607b822532a273dad7]

Change-Id: I144765afb6ff1cd66eb4757c9418112fb0b08a6f
Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h |  5 +++++
 net/ipv4/inet_diag.c      | 42 +++++++++++++++++++++++++--------------
 2 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 0e707f0c1a3e..e7032f041982 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -3,6 +3,7 @@
 
 #include <uapi/linux/inet_diag.h>
 
+struct net;
 struct sock;
 struct inet_hashinfo;
 struct nlattr;
@@ -41,6 +42,10 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
 			    struct sk_buff *in_skb, const struct nlmsghdr *nlh,
 			    const struct inet_diag_req_v2 *req);
 
+struct sock *inet_diag_find_one_icsk(struct net *net,
+				     struct inet_hashinfo *hashinfo,
+				     const struct inet_diag_req_v2 *req);
+
 int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
 
 extern int  inet_diag_register(const struct inet_diag_handler *handler);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index ab9f8a66615d..cfabb8f8f0a0 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -350,17 +350,12 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 				  nlmsg_flags, unlh);
 }
 
-int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
-			    struct sk_buff *in_skb,
-			    const struct nlmsghdr *nlh,
-			    const struct inet_diag_req_v2 *req)
+struct sock *inet_diag_find_one_icsk(struct net *net,
+				     struct inet_hashinfo *hashinfo,
+				     const struct inet_diag_req_v2 *req)
 {
-	struct net *net = sock_net(in_skb->sk);
-	struct sk_buff *rep;
 	struct sock *sk;
-	int err;
 
-	err = -EINVAL;
 	if (req->sdiag_family == AF_INET)
 		sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0],
 				 req->id.idiag_dport, req->id.idiag_src[0],
@@ -375,15 +370,33 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
 				  req->id.idiag_if);
 #endif
 	else
-		goto out_nosk;
+		return ERR_PTR(-EINVAL);
 
-	err = -ENOENT;
 	if (!sk)
-		goto out_nosk;
+		return ERR_PTR(-ENOENT);
 
-	err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
-	if (err)
-		goto out;
+	if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) {
+		sock_gen_put(sk);
+		return ERR_PTR(-ENOENT);
+	}
+
+	return sk;
+}
+EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk);
+
+int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
+			    struct sk_buff *in_skb,
+			    const struct nlmsghdr *nlh,
+			    const struct inet_diag_req_v2 *req)
+{
+	struct net *net = sock_net(in_skb->sk);
+	struct sk_buff *rep;
+	struct sock *sk;
+	int err;
+
+	sk = inet_diag_find_one_icsk(net, hashinfo, req);
+	if (IS_ERR(sk))
+		return PTR_ERR(sk);
 
 	rep = nlmsg_new(inet_sk_attr_size(), GFP_KERNEL);
 	if (!rep) {
@@ -409,7 +422,6 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
 	if (sk)
 		sock_gen_put(sk);
 
-out_nosk:
 	return err;
 }
 EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk);

From 7ae15a607bb106627859da621bd7543d44a6b61d Mon Sep 17 00:00:00 2001
From: Lorenzo Colitti <lorenzo@google.com>
Date: Wed, 16 Dec 2015 12:30:03 +0900
Subject: [PATCH 005/797] net: diag: Add the ability to destroy a socket.

This patch adds a SOCK_DESTROY operation, a destroy function
pointer to sock_diag_handler, and a diag_destroy function
pointer.  It does not include any implementation code.

[backport of net-next 64be0aed59ad519d6f2160868734f7e278290ac1]

Change-Id: Ic5327ff14b39dd268083ee4c1dc2c934b2820df5
Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sock_diag.h      |  2 ++
 include/net/sock.h             |  1 +
 include/uapi/linux/sock_diag.h |  1 +
 net/core/sock_diag.c           | 23 ++++++++++++++++++++---
 4 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index fddebc617469..4018b48f2b3b 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -15,6 +15,7 @@ struct sock_diag_handler {
 	__u8 family;
 	int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh);
 	int (*get_info)(struct sk_buff *skb, struct sock *sk);
+	int (*destroy)(struct sk_buff *skb, struct nlmsghdr *nlh);
 };
 
 int sock_diag_register(const struct sock_diag_handler *h);
@@ -68,4 +69,5 @@ bool sock_diag_has_destroy_listeners(const struct sock *sk)
 }
 void sock_diag_broadcast_destroy(struct sock *sk);
 
+int sock_diag_destroy(struct sock *sk, int err);
 #endif
diff --git a/include/net/sock.h b/include/net/sock.h
index 14d3c0734007..2d663ee8494d 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1067,6 +1067,7 @@ struct proto {
 	void			(*destroy_cgroup)(struct mem_cgroup *memcg);
 	struct cg_proto		*(*proto_cgroup)(struct mem_cgroup *memcg);
 #endif
+	int			(*diag_destroy)(struct sock *sk, int err);
 };
 
 int proto_register(struct proto *prot, int alloc_slab);
diff --git a/include/uapi/linux/sock_diag.h b/include/uapi/linux/sock_diag.h
index 49230d36f9ce..84e66ed670be 100644
--- a/include/uapi/linux/sock_diag.h
+++ b/include/uapi/linux/sock_diag.h
@@ -4,6 +4,7 @@
 #include <linux/types.h>
 
 #define SOCK_DIAG_BY_FAMILY 20
+#define SOCK_DESTROY_BACKPORT 21
 
 struct sock_diag_req {
 	__u8	sdiag_family;
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 0c1d58d43f67..3963c3872c69 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -214,7 +214,7 @@ void sock_diag_unregister(const struct sock_diag_handler *hnld)
 }
 EXPORT_SYMBOL_GPL(sock_diag_unregister);
 
-static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	int err;
 	struct sock_diag_req *req = nlmsg_data(nlh);
@@ -234,8 +234,12 @@ static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	hndl = sock_diag_handlers[req->sdiag_family];
 	if (hndl == NULL)
 		err = -ENOENT;
-	else
+	else if (nlh->nlmsg_type == SOCK_DIAG_BY_FAMILY)
 		err = hndl->dump(skb, nlh);
+	else if (nlh->nlmsg_type == SOCK_DESTROY_BACKPORT && hndl->destroy)
+		err = hndl->destroy(skb, nlh);
+	else
+		err = -EOPNOTSUPP;
 	mutex_unlock(&sock_diag_table_mutex);
 
 	return err;
@@ -261,7 +265,8 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 
 		return ret;
 	case SOCK_DIAG_BY_FAMILY:
-		return __sock_diag_rcv_msg(skb, nlh);
+	case SOCK_DESTROY_BACKPORT:
+		return __sock_diag_cmd(skb, nlh);
 	default:
 		return -EINVAL;
 	}
@@ -295,6 +300,18 @@ static int sock_diag_bind(struct net *net, int group)
 	return 0;
 }
 
+int sock_diag_destroy(struct sock *sk, int err)
+{
+	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (!sk->sk_prot->diag_destroy)
+		return -EOPNOTSUPP;
+
+	return sk->sk_prot->diag_destroy(sk, err);
+}
+EXPORT_SYMBOL_GPL(sock_diag_destroy);
+
 static int __net_init diag_net_init(struct net *net)
 {
 	struct netlink_kernel_cfg cfg = {

From 1046108bbba31dc9f080c176647fa59f50118622 Mon Sep 17 00:00:00 2001
From: Lorenzo Colitti <lorenzo@google.com>
Date: Wed, 16 Dec 2015 12:30:04 +0900
Subject: [PATCH 006/797] net: diag: Support SOCK_DESTROY for inet sockets.

This passes the SOCK_DESTROY operation to the underlying protocol
diag handler, or returns -EOPNOTSUPP if that handler does not
define a destroy operation.

Most of this patch is just renaming functions. This is not
strictly necessary, but it would be fairly counterintuitive to
have the code to destroy inet sockets be in a function whose name
starts with inet_diag_get.

[backport of net-next 6eb5d2e08f071c05ecbe135369c9ad418826cab2]

Change-Id: Idc13a7def20f492a5323ad2f8de105426293bd37
Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h |  4 ++++
 net/ipv4/inet_diag.c      | 23 +++++++++++++++--------
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index e7032f041982..7c27fa1030e8 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -24,6 +24,10 @@ struct inet_diag_handler {
 	void		(*idiag_get_info)(struct sock *sk,
 					  struct inet_diag_msg *r,
 					  void *info);
+
+	int		(*destroy)(struct sk_buff *in_skb,
+				   const struct inet_diag_req_v2 *req);
+
 	__u16		idiag_type;
 	__u16		idiag_info_size;
 };
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index cfabb8f8f0a0..27c5fd5747d8 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -426,7 +426,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
 }
 EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk);
 
-static int inet_diag_get_exact(struct sk_buff *in_skb,
+static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb,
 			       const struct nlmsghdr *nlh,
 			       const struct inet_diag_req_v2 *req)
 {
@@ -436,8 +436,12 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
 	handler = inet_diag_lock_handler(req->sdiag_protocol);
 	if (IS_ERR(handler))
 		err = PTR_ERR(handler);
-	else
+	else if (cmd == SOCK_DIAG_BY_FAMILY)
 		err = handler->dump_one(in_skb, nlh, req);
+	else if (cmd == SOCK_DESTROY_BACKPORT && handler->destroy)
+		err = handler->destroy(in_skb, req);
+	else
+		err = -EOPNOTSUPP;
 	inet_diag_unlock_handler(handler);
 
 	return err;
@@ -950,7 +954,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
 	req.idiag_states = rc->idiag_states;
 	req.id = rc->id;
 
-	return inet_diag_get_exact(in_skb, nlh, &req);
+	return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh, &req);
 }
 
 static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
@@ -984,7 +988,7 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
 	return inet_diag_get_exact_compat(skb, nlh);
 }
 
-static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
+static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h)
 {
 	int hdrlen = sizeof(struct inet_diag_req_v2);
 	struct net *net = sock_net(skb->sk);
@@ -992,7 +996,8 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
 	if (nlmsg_len(h) < hdrlen)
 		return -EINVAL;
 
-	if (h->nlmsg_flags & NLM_F_DUMP) {
+	if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY &&
+	    h->nlmsg_flags & NLM_F_DUMP) {
 		if (nlmsg_attrlen(h, hdrlen)) {
 			struct nlattr *attr;
 
@@ -1011,7 +1016,7 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
 		}
 	}
 
-	return inet_diag_get_exact(skb, h, nlmsg_data(h));
+	return inet_diag_cmd_exact(h->nlmsg_type, skb, h, nlmsg_data(h));
 }
 
 static
@@ -1062,14 +1067,16 @@ int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk)
 
 static const struct sock_diag_handler inet_diag_handler = {
 	.family = AF_INET,
-	.dump = inet_diag_handler_dump,
+	.dump = inet_diag_handler_cmd,
 	.get_info = inet_diag_handler_get_info,
+	.destroy = inet_diag_handler_cmd,
 };
 
 static const struct sock_diag_handler inet6_diag_handler = {
 	.family = AF_INET6,
-	.dump = inet_diag_handler_dump,
+	.dump = inet_diag_handler_cmd,
 	.get_info = inet_diag_handler_get_info,
+	.destroy = inet_diag_handler_cmd,
 };
 
 int inet_diag_register(const struct inet_diag_handler *h)

From 69f0e89f55b7667889a638d19e3a101ce7ef4f7f Mon Sep 17 00:00:00 2001
From: Lorenzo Colitti <lorenzo@google.com>
Date: Wed, 16 Dec 2015 12:30:05 +0900
Subject: [PATCH 007/797] net: diag: Support destroying TCP sockets.

This implements SOCK_DESTROY for TCP sockets. It causes all
blocking calls on the socket to fail fast with ECONNABORTED and
causes a protocol close of the socket. It informs the other end
of the connection by sending a RST, i.e., initiating a TCP ABORT
as per RFC 793. ECONNABORTED was chosen for consistency with
FreeBSD.

[cherry-pick of net-next c1e64e298b8cad309091b95d8436a0255c84f54a]

Change-Id: I728a01ef03f2ccfb9016a3f3051ef00975980e49
Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h   |  2 ++
 net/ipv4/Kconfig    | 13 +++++++++++++
 net/ipv4/tcp.c      | 32 ++++++++++++++++++++++++++++++++
 net/ipv4/tcp_diag.c | 19 +++++++++++++++++++
 net/ipv4/tcp_ipv4.c |  1 +
 net/ipv6/tcp_ipv6.c |  1 +
 6 files changed, 68 insertions(+)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index c585ab6d7c76..5f4d135a00cc 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1171,6 +1171,8 @@ void tcp_set_state(struct sock *sk, int state);
 
 void tcp_done(struct sock *sk);
 
+int tcp_abort(struct sock *sk, int err);
+
 static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
 {
 	rx_opt->dsack = 0;
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 416dfa004cfb..c22920525e5d 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -436,6 +436,19 @@ config INET_UDP_DIAG
 	  Support for UDP socket monitoring interface used by the ss tool.
 	  If unsure, say Y.
 
+config INET_DIAG_DESTROY
+	bool "INET: allow privileged process to administratively close sockets"
+	depends on INET_DIAG
+	default n
+	---help---
+	  Provides a SOCK_DESTROY operation that allows privileged processes
+	  (e.g., a connection manager or a network administration tool such as
+	  ss) to close sockets opened by other processes. Closing a socket in
+	  this way interrupts any blocking read/write/connect operations on
+	  the socket and causes future socket calls to behave as if the socket
+	  had been disconnected.
+	  If unsure, say N.
+
 menuconfig TCP_CONG_ADVANCED
 	bool "TCP: advanced congestion control"
 	---help---
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c2b1f02ea155..6033a270843a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3101,6 +3101,38 @@ void tcp_done(struct sock *sk)
 }
 EXPORT_SYMBOL_GPL(tcp_done);
 
+int tcp_abort(struct sock *sk, int err)
+{
+	if (!sk_fullsock(sk)) {
+		sock_gen_put(sk);
+		return -EOPNOTSUPP;
+	}
+
+	/* Don't race with userspace socket closes such as tcp_close. */
+	lock_sock(sk);
+
+	/* Don't race with BH socket closes such as inet_csk_listen_stop. */
+	local_bh_disable();
+	bh_lock_sock(sk);
+
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		sk->sk_err = err;
+		/* This barrier is coupled with smp_rmb() in tcp_poll() */
+		smp_wmb();
+		sk->sk_error_report(sk);
+		if (tcp_need_reset(sk->sk_state))
+			tcp_send_active_reset(sk, GFP_ATOMIC);
+		tcp_done(sk);
+	}
+
+	bh_unlock_sock(sk);
+	local_bh_enable();
+	release_sock(sk);
+	sock_put(sk);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(tcp_abort);
+
 extern struct tcp_congestion_ops tcp_reno;
 
 static __initdata unsigned long thash_entries;
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index b31604086edd..4d610934fb39 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -10,6 +10,8 @@
  */
 
 #include <linux/module.h>
+#include <linux/net.h>
+#include <linux/sock_diag.h>
 #include <linux/inet_diag.h>
 
 #include <linux/tcp.h>
@@ -46,12 +48,29 @@ static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
 	return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req);
 }
 
+#ifdef CONFIG_INET_DIAG_DESTROY
+static int tcp_diag_destroy(struct sk_buff *in_skb,
+			    const struct inet_diag_req_v2 *req)
+{
+	struct net *net = sock_net(in_skb->sk);
+	struct sock *sk = inet_diag_find_one_icsk(net, &tcp_hashinfo, req);
+
+	if (IS_ERR(sk))
+		return PTR_ERR(sk);
+
+	return sock_diag_destroy(sk, ECONNABORTED);
+}
+#endif
+
 static const struct inet_diag_handler tcp_diag_handler = {
 	.dump		 = tcp_diag_dump,
 	.dump_one	 = tcp_diag_dump_one,
 	.idiag_get_info	 = tcp_diag_get_info,
 	.idiag_type	 = IPPROTO_TCP,
 	.idiag_info_size = sizeof(struct tcp_info),
+#ifdef CONFIG_INET_DIAG_DESTROY
+	.destroy	 = tcp_diag_destroy,
+#endif
 };
 
 static int __init tcp_diag_init(void)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8c7e63163e92..c478092172f2 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2351,6 +2351,7 @@ struct proto tcp_prot = {
 	.destroy_cgroup		= tcp_destroy_cgroup,
 	.proto_cgroup		= tcp_proto_cgroup,
 #endif
+	.diag_destroy		= tcp_abort,
 };
 EXPORT_SYMBOL(tcp_prot);
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 365af3c8145d..f85b4c44c00d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1893,6 +1893,7 @@ struct proto tcpv6_prot = {
 	.proto_cgroup		= tcp_proto_cgroup,
 #endif
 	.clear_sk		= tcp_v6_clear_sk,
+	.diag_destroy		= tcp_abort,
 };
 
 static const struct inet6_protocol tcpv6_protocol = {

From 2a5cf317a11d19c2363502e3ec311cd601009221 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 17 Dec 2015 16:14:11 -0800
Subject: [PATCH 008/797] tcp: diag: add support for request sockets to
 tcp_abort()

Adding support for SYN_RECV request sockets to tcp_abort()
is quite easy after our tcp listener rewrite.

Note that we also need to better handle listeners, or we might
leak not yet accepted children, because of a missing
inet_csk_listen_stop() call.

[cherry-pick of net-next 07f6f4a31e5a8dee67960fc07bb0b37c5f879d4d]

Change-Id: I8ec6b2e6ec24f330a69595abf1d5469ace79b3fd
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Lorenzo Colitti <lorenzo@google.com>
Tested-by: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6033a270843a..6d06fbf6654f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3104,6 +3104,15 @@ EXPORT_SYMBOL_GPL(tcp_done);
 int tcp_abort(struct sock *sk, int err)
 {
 	if (!sk_fullsock(sk)) {
+		if (sk->sk_state == TCP_NEW_SYN_RECV) {
+			struct request_sock *req = inet_reqsk(sk);
+
+			local_bh_disable();
+			inet_csk_reqsk_queue_drop_and_put(req->rsk_listener,
+							  req);
+			local_bh_enable();
+			return 0;
+		}
 		sock_gen_put(sk);
 		return -EOPNOTSUPP;
 	}

From 58281e63ffb4c31d120709a090086c2c151897f5 Mon Sep 17 00:00:00 2001
From: Lorenzo Colitti <lorenzo@google.com>
Date: Tue, 22 Dec 2015 00:03:44 +0900
Subject: [PATCH 009/797] net: tcp: deal with listen sockets properly in
 tcp_abort.

When closing a listen socket, tcp_abort currently calls
tcp_done without clearing the request queue. If the socket has a
child socket that is established but not yet accepted, the child
socket is then left without a parent, causing a leak.

Fix this by setting the socket state to TCP_CLOSE and calling
inet_csk_listen_stop with the socket lock held, like tcp_close
does.

Tested using net_test. With this patch, calling SOCK_DESTROY on a
listen socket that has an established but not yet accepted child
socket results in the parent and the child being closed, such
that they no longer appear in sock_diag dumps.

[cherry-pick of net-next 2010b93e9317cc12acd20c4aed385af7f9d1681e]

Change-Id: I0555a142f11d8b36362ffd7c8ef4a5ecae8987c9
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6d06fbf6654f..7c0465202cc5 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3120,6 +3120,11 @@ int tcp_abort(struct sock *sk, int err)
 	/* Don't race with userspace socket closes such as tcp_close. */
 	lock_sock(sk);
 
+	if (sk->sk_state == TCP_LISTEN) {
+		tcp_set_state(sk, TCP_CLOSE);
+		inet_csk_listen_stop(sk);
+	}
+
 	/* Don't race with BH socket closes such as inet_csk_listen_stop. */
 	local_bh_disable();
 	bh_lock_sock(sk);

From 8538ccf40b8c5c480ee232fa948164a0b4dc166b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 20 Jan 2016 16:25:01 -0800
Subject: [PATCH 010/797] net: diag: support v4mapped sockets in
 inet_diag_find_one_icsk()

Lorenzo reported that we could not properly find v4mapped sockets
in inet_diag_find_one_icsk(). This patch fixes the issue.

[cherry-pick of fc439d9489479411fbf9bbbec2c768df89e85503]

Change-Id: I13515e83fb76d4729f00047f9eb142c929390fb2
Reported-by: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv4/inet_diag.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 27c5fd5747d8..a403a676d452 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -361,13 +361,20 @@ struct sock *inet_diag_find_one_icsk(struct net *net,
 				 req->id.idiag_dport, req->id.idiag_src[0],
 				 req->id.idiag_sport, req->id.idiag_if);
 #if IS_ENABLED(CONFIG_IPV6)
-	else if (req->sdiag_family == AF_INET6)
-		sk = inet6_lookup(net, hashinfo,
-				  (struct in6_addr *)req->id.idiag_dst,
-				  req->id.idiag_dport,
-				  (struct in6_addr *)req->id.idiag_src,
-				  req->id.idiag_sport,
-				  req->id.idiag_if);
+	else if (req->sdiag_family == AF_INET6) {
+		if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) &&
+		    ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src))
+			sk = inet_lookup(net, hashinfo, req->id.idiag_dst[3],
+					 req->id.idiag_dport, req->id.idiag_src[3],
+					 req->id.idiag_sport, req->id.idiag_if);
+		else
+			sk = inet6_lookup(net, hashinfo,
+					  (struct in6_addr *)req->id.idiag_dst,
+					  req->id.idiag_dport,
+					  (struct in6_addr *)req->id.idiag_src,
+					  req->id.idiag_sport,
+					  req->id.idiag_if);
+	}
 #endif
 	else
 		return ERR_PTR(-EINVAL);

From 4f27b251f23643c115fce7e36f31268deed6e71b Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@google.com>
Date: Mon, 29 Feb 2016 17:38:34 -0800
Subject: [PATCH 011/797] ANDROID: net: fix 'const' warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

See the following build log splats. The sock_i_uid() helper doesn't
quite treat the parameter as 'const' (it acquires a member lock), but
this cast is the same approach taken by other callers in this file, so I
don't feel too bad about the fix.

  CC      net/ipv4/inet_connection_sock.o
  CC      net/ipv6/inet6_connection_sock.o
net/ipv6/inet6_connection_sock.c: In function ‘inet6_csk_route_req’:
net/ipv6/inet6_connection_sock.c:89:2: warning: passing argument 1 of ‘sock_i_uid’ discards ‘const’ qualifier from pointer target type [enabled by default]
In file included from include/linux/tcp.h:22:0,
                 from include/linux/ipv6.h:73,
                 from net/ipv6/inet6_connection_sock.c:18:
include/net/sock.h:1689:8: note: expected ‘struct sock *’ but argument is of type ‘const struct sock *’
net/ipv4/inet_connection_sock.c: In function ‘inet_csk_route_req’:
net/ipv4/inet_connection_sock.c:423:7: warning: passing argument 1 of ‘sock_i_uid’ discards ‘const’ qualifier from pointer target type [enabled by default]
In file included from include/net/inet_sock.h:27:0,
                 from include/net/inet_connection_sock.h:23,
                 from net/ipv4/inet_connection_sock.c:19:
include/net/sock.h:1689:8: note: expected ‘struct sock *’ but argument is of type ‘const struct sock *’
net/ipv4/inet_connection_sock.c: In function ‘inet_csk_route_child_sock’:
net/ipv4/inet_connection_sock.c:460:7: warning: passing argument 1 of ‘sock_i_uid’ discards ‘const’ qualifier from pointer target type [enabled by default]
In file included from include/net/inet_sock.h:27:0,
                 from include/net/inet_connection_sock.h:23,
                 from net/ipv4/inet_connection_sock.c:19:
include/net/sock.h:1689:8: note: expected ‘struct sock *’ but argument is of type ‘const struct sock *’

Change-Id: I5c156fc1a81f90323717bffd93c31d205b85620c
Signed-off-by: Brian Norris <briannorris@google.com>
---
 net/ipv4/inet_connection_sock.c  | 4 ++--
 net/ipv6/inet6_connection_sock.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 759f90e1e499..030cd09dd2a2 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -420,7 +420,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
 			   sk->sk_protocol, inet_sk_flowi_flags(sk),
 			   (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
 			   ireq->ir_loc_addr, ireq->ir_rmt_port,
-			   htons(ireq->ir_num), sock_i_uid(sk));
+			   htons(ireq->ir_num), sock_i_uid((struct sock *)sk));
 	security_req_classify_flow(req, flowi4_to_flowi(fl4));
 	rt = ip_route_output_flow(net, fl4, sk);
 	if (IS_ERR(rt))
@@ -457,7 +457,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
 			   sk->sk_protocol, inet_sk_flowi_flags(sk),
 			   (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
 			   ireq->ir_loc_addr, ireq->ir_rmt_port,
-			   htons(ireq->ir_num), sock_i_uid(sk));
+			   htons(ireq->ir_num), sock_i_uid((struct sock *)sk));
 	security_req_classify_flow(req, flowi4_to_flowi(fl4));
 	rt = ip_route_output_flow(net, fl4, sk);
 	if (IS_ERR(rt))
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 7b214652768e..897bb6eb5751 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -86,7 +86,7 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk,
 	fl6->flowi6_mark = ireq->ir_mark;
 	fl6->fl6_dport = ireq->ir_rmt_port;
 	fl6->fl6_sport = htons(ireq->ir_num);
-	fl6->flowi6_uid = sock_i_uid(sk);
+	fl6->flowi6_uid = sock_i_uid((struct sock *)sk);
 	security_req_classify_flow(req, flowi6_to_flowi(fl6));
 
 	dst = ip6_dst_lookup_flow(sk, fl6, final_p);

From 2e9117dd33110315f302d4bf3afb11561c29af21 Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@google.com>
Date: Mon, 29 Feb 2016 17:40:05 -0800
Subject: [PATCH 012/797] ANDROID: lowmemorykiller: fix declaration order
 warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

drivers/staging/android/lowmemorykiller.c: In function ‘lowmem_scan’:
drivers/staging/android/lowmemorykiller.c:174:3: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]

Change-Id: I9de6cf2c374bc43131725a7ed666a033a4449ea9
Signed-off-by: Brian Norris <briannorris@google.com>
---
 drivers/staging/android/lowmemorykiller.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c
index 4cc3d02eacfb..af49af0cca01 100644
--- a/drivers/staging/android/lowmemorykiller.c
+++ b/drivers/staging/android/lowmemorykiller.c
@@ -161,6 +161,10 @@ static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc)
 			     p->comm, p->pid, oom_score_adj, tasksize);
 	}
 	if (selected) {
+		long cache_size = other_file * (long)(PAGE_SIZE / 1024);
+		long cache_limit = minfree * (long)(PAGE_SIZE / 1024);
+		long free = other_free * (long)(PAGE_SIZE / 1024);
+
 		task_lock(selected);
 		send_sig(SIGKILL, selected, 0);
 		/*
@@ -171,9 +175,6 @@ static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc)
 		if (selected->mm)
 			mark_oom_victim(selected);
 		task_unlock(selected);
-		long cache_size = other_file * (long)(PAGE_SIZE / 1024);
-		long cache_limit = minfree * (long)(PAGE_SIZE / 1024);
-		long free = other_free * (long)(PAGE_SIZE / 1024);
 		trace_lowmemory_kill(selected, cache_size, cache_limit, free);
 		lowmem_print(1, "Killing '%s' (%d), adj %hd,\n" \
 			        "   to free %ldkB on behalf of '%s' (%d) because\n" \

From 4ebd433b3d2c1d26425429d6192da63965fbbc7e Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@google.com>
Date: Mon, 29 Feb 2016 17:44:51 -0800
Subject: [PATCH 013/797] ANDROID: usb: gadget: f_mtp: don't use le16 for u8
 field

The 'bCount' field is u8. Noticed by this warning:

drivers/usb/gadget/function/f_mtp.c:264:3: warning: large integer implicitly truncated to unsigned type [-Woverflow]

Change-Id: Ie82dfd1a8986ecd3acf143e41c46822f0d1aca4f
Signed-off-by: Brian Norris <briannorris@google.com>
---
 drivers/usb/gadget/function/f_mtp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/function/f_mtp.c b/drivers/usb/gadget/function/f_mtp.c
index aec7b8d61fe7..8f80a7e91314 100644
--- a/drivers/usb/gadget/function/f_mtp.c
+++ b/drivers/usb/gadget/function/f_mtp.c
@@ -261,7 +261,7 @@ struct {
 		.dwLength = __constant_cpu_to_le32(sizeof(mtp_ext_config_desc)),
 		.bcdVersion = __constant_cpu_to_le16(0x0100),
 		.wIndex = __constant_cpu_to_le16(4),
-		.bCount = __constant_cpu_to_le16(1),
+		.bCount = 1,
 	},
 	.function = {
 		.bFirstInterfaceNumber = 0,

From 49f63e15598e09f24e0528e7cad18d6dec2ba804 Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@google.com>
Date: Mon, 29 Feb 2016 17:42:29 -0800
Subject: [PATCH 014/797] ANDROID: kernel/watchdog: fix unused variable warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

kernel/watchdog.c:122:22: warning: ‘hardlockup_allcpu_dumped’ defined but not used [-Wunused-variable]

Change-Id: I99e97e7cc31b589cd674fd4495832c9ef036d0b9
Signed-off-by: Brian Norris <briannorris@google.com>
---
 kernel/watchdog.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 25955235ecdd..e864906af3fc 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -119,7 +119,7 @@ static unsigned long soft_lockup_nmi_warn;
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
 unsigned int __read_mostly hardlockup_panic =
 			CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
-static unsigned long hardlockup_allcpu_dumped;
+static unsigned long __maybe_unused hardlockup_allcpu_dumped;
 /*
  * We may not want to enable hard lockup detection by default in all cases,
  * for example when running the kernel as a guest on a hypervisor. In these

From 13712cceef9b7f1a585ab83564e86b115dd41134 Mon Sep 17 00:00:00 2001
From: Dylan Reid <dgreid@chromium.org>
Date: Mon, 2 Mar 2015 17:09:07 -0800
Subject: [PATCH 015/797] ANDROID: mmc: Move tracepoint creation and export
 symbols

Move the tracepoint creation to core from card, as core shouldn't depend
on card.

Also add EXPORT_SYMBOL_GPL calls to enable module build.

Change-Id: Ie39fcdadc0516df99600d0963efe09b6cd7a9bf8
Signed-off-by: Dylan Reid <dgreid@chromium.org>
(cherry picked from commit da5fbd1e7e50fee3a8271f50d25c848d0ede64b3,
from android-3.14)
Signed-off-by: Brian Norris <briannorris@google.com>
---
 drivers/mmc/card/block.c | 1 -
 drivers/mmc/core/core.c  | 6 ++++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 21aa3244029b..90e9738a129a 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -36,7 +36,6 @@
 #include <linux/compat.h>
 #include <linux/pm_runtime.h>
 
-#define CREATE_TRACE_POINTS
 #include <trace/events/mmc.h>
 
 #include <linux/mmc/ioctl.h>
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 3e54185bc985..9fab52559a8c 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -30,6 +30,7 @@
 #include <linux/slab.h>
 #include <linux/of.h>
 
+#define CREATE_TRACE_POINTS
 #include <trace/events/mmc.h>
 
 #include <linux/mmc/card.h>
@@ -48,6 +49,11 @@
 #include "sd_ops.h"
 #include "sdio_ops.h"
 
+EXPORT_TRACEPOINT_SYMBOL_GPL(mmc_blk_erase_start);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mmc_blk_erase_end);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mmc_blk_rw_start);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mmc_blk_rw_end);
+
 /* If the device is not responding */
 #define MMC_CORE_TIMEOUT_MS	(10 * 60 * 1000) /* 10 minute timeout */
 

From 467971a1a728971f22d8642266bf6a8cb0ec6cb4 Mon Sep 17 00:00:00 2001
From: Andrew Bresticker <abrestic@chromium.org>
Date: Tue, 10 Nov 2015 14:11:46 -0800
Subject: [PATCH 016/797] ANDROID: mmc: sdio: Disable retuning in
 sdio_reset_comm()

Since sdio_reset_comm() re-initializes the SDIO card, disable retuning
before idling and shutting down the card.  Tuning will be re-enabled (if
necessary) in mmc_sdio_init_card().

BUG=chrome-os-partner:46444
TEST=With CL:311815, toggle WiFi on/off on Smaug and observe that the
WiFi card comes back up and is able to tune successfully.

Change-Id: Ib4a5cfd4d75fc9e3ed7bb3f1e2ffd30de16c5d28
Signed-off-by: Andrew Bresticker <abrestic@chromium.org>
Reviewed-on: https://chromium-review.googlesource.com/311797
Reviewed-by: Derek Basehore <dbasehore@chromium.org>

[briannorris: brought from Chromium kernel in 3.18 -> 4.4 rebase]

Signed-off-by: Brian Norris <briannorris@google.com>
---
 drivers/mmc/core/sdio.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 09e100ba22b0..b47957122fd7 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -22,6 +22,7 @@
 
 #include "core.h"
 #include "bus.h"
+#include "host.h"
 #include "sd.h"
 #include "sdio_bus.h"
 #include "mmc_ops.h"
@@ -1228,6 +1229,8 @@ int sdio_reset_comm(struct mmc_card *card)
 	printk("%s():\n", __func__);
 	mmc_claim_host(host);
 
+	mmc_retune_disable(host);
+
 	mmc_go_idle(host);
 
 	mmc_set_clock(host, host->f_min);

From a9c7e0955a71e12167d763ef286c80acb5e5fb45 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <groeck@chromium.org>
Date: Tue, 1 Mar 2016 09:44:17 -0800
Subject: [PATCH 017/797] net: pppolac/pppopns: Replace msg.msg_iov with
 iov_iter_kvec()

Commit 1af89c1ef3b6 ("Hack: net: PPPoPNS and PPPoLAC build fixes for 4.1")
fixed the build for PPPoPNS and PPPoLAC by re-introducing a field in
struct msghdr which was removed upstream. Re-introducing the field doesn't
get it used, so it is quite likely that the code never worked. Fix it up for
good.

Fixes: 1af89c1ef3b6 ("Hack: net: PPPoPNS and PPPoLAC build fixes for 4.1")
Signed-off-by: Guenter Roeck <groeck@chromium.org>
---
 drivers/net/ppp/pppolac.c | 9 ++++-----
 drivers/net/ppp/pppopns.c | 9 ++++-----
 include/linux/socket.h    | 4 ----
 3 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ppp/pppolac.c b/drivers/net/ppp/pppolac.c
index 1b8180cc1d4d..0184c96579e9 100644
--- a/drivers/net/ppp/pppolac.c
+++ b/drivers/net/ppp/pppolac.c
@@ -206,11 +206,10 @@ static void pppolac_xmit_core(struct work_struct *delivery_work)
 	while ((skb = skb_dequeue(&delivery_queue))) {
 		struct sock *sk_udp = skb->sk;
 		struct kvec iov = {.iov_base = skb->data, .iov_len = skb->len};
-		struct msghdr msg = {
-			.msg_iov = (struct iovec *)&iov,
-			.msg_iovlen = 1,
-			.msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT,
-		};
+		struct msghdr msg = { 0 };
+
+		iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1,
+			      skb->len);
 		sk_udp->sk_prot->sendmsg(sk_udp, &msg, skb->len);
 		kfree_skb(skb);
 	}
diff --git a/drivers/net/ppp/pppopns.c b/drivers/net/ppp/pppopns.c
index 568bb45cfeac..d9e06039794e 100644
--- a/drivers/net/ppp/pppopns.c
+++ b/drivers/net/ppp/pppopns.c
@@ -189,11 +189,10 @@ static void pppopns_xmit_core(struct work_struct *delivery_work)
 	while ((skb = skb_dequeue(&delivery_queue))) {
 		struct sock *sk_raw = skb->sk;
 		struct kvec iov = {.iov_base = skb->data, .iov_len = skb->len};
-		struct msghdr msg = {
-			.msg_iov = (struct iovec *)&iov,
-			.msg_iovlen = 1,
-			.msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT,
-		};
+		struct msghdr msg = { 0 };
+
+		iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1,
+			      skb->len);
 		sk_raw->sk_prot->sendmsg(sk_raw, &msg, skb->len);
 		kfree_skb(skb);
 	}
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 18a8337c8959..5bf59c8493b7 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -47,10 +47,6 @@ struct linger {
 struct msghdr {
 	void		*msg_name;	/* ptr to socket address structure */
 	int		msg_namelen;	/* size of socket address structure */
-#if defined(CONFIG_PPPOLAC) || defined(CONFIG_PPPOPNS)
-	struct iovec	*msg_iov;	/* scatter/gather array */
-	__kernel_size_t	msg_iovlen;	/* # elements in msg_iov */
-#endif
 	struct iov_iter	msg_iter;	/* data */
 	void		*msg_control;	/* ancillary data */
 	__kernel_size_t	msg_controllen;	/* ancillary data buffer length */

From 9752f90bc23d8ee78ee9d8613b89436e4fc505af Mon Sep 17 00:00:00 2001
From: Guenter Roeck <groeck@chromium.org>
Date: Tue, 1 Mar 2016 09:47:32 -0800
Subject: [PATCH 018/797] net: ppp: Fix modular build for PPPOLAC and PPPOPNS

Unlike other configurations in net/ppp, PPPOLAC and PPPOPNS
are defined as boolean configuration options. In allmodconfig builds
(or, specifically, if PPP and some of the other PPP protocols were
built as modules), this resulted in build errors such as the following,
since pppox was built both as module and into the kernel.

ERROR: "pppox_ioctl" [net/l2tp/l2tp_ppp.ko] undefined!
ERROR: "unregister_pppox_proto" [net/l2tp/l2tp_ppp.ko] undefined!
ERROR: "register_pppox_proto" [net/l2tp/l2tp_ppp.ko] undefined!
ERROR: "pppox_unbind_sock" [net/l2tp/l2tp_ppp.ko] undefined!

Fix the problem by defining PPPOLAC and PPPOPNS tristate.

Signed-off-by: Guenter Roeck <groeck@chromium.org>
---
 drivers/net/ppp/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ppp/Kconfig b/drivers/net/ppp/Kconfig
index e4cf44b1e815..282aec4860eb 100644
--- a/drivers/net/ppp/Kconfig
+++ b/drivers/net/ppp/Kconfig
@@ -150,7 +150,7 @@ config PPPOL2TP
 if TTY
 
 config PPPOLAC
-	bool "PPP on L2TP Access Concentrator"
+	tristate "PPP on L2TP Access Concentrator"
 	depends on PPP && INET
 	help
 	  L2TP (RFC 2661) is a tunneling protocol widely used in virtual private
@@ -159,7 +159,7 @@ config PPPOLAC
 	  fairly simple and suited for clients.
 
 config PPPOPNS
-	bool "PPP on PPTP Network Server"
+	tristate "PPP on PPTP Network Server"
 	depends on PPP && INET
 	help
 	  PPTP (RFC 2637) is a tunneling protocol widely used in virtual private

From 74260562e2fed255ca80f31f74140f9d493ce290 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <groeck@chromium.org>
Date: Tue, 1 Mar 2016 09:52:27 -0800
Subject: [PATCH 019/797] video: adf: Fix modular build

Builds with ADF configured as module fail the following errors.

ERROR: "adf_fops" [drivers/video/adf/adf_sysfs.ko] undefined!
ERROR: "adf_obj_sysfs_find" [drivers/video/adf/adf_fops.ko] undefined!
ERROR: "adf_buffer_cleanup" [drivers/video/adf/adf_fops.ko] undefined!
ERROR: "adf_attachment_validate" [drivers/video/adf/adf_client.ko] undefined!
ERROR: "adf_attachment_find" [drivers/video/adf/adf_client.ko] undefined!
ERROR: "adf_buffer_mapping_cleanup" [drivers/video/adf/adf_client.ko] undefined!
ERROR: "adf_attachment_free" [drivers/video/adf/adf_client.ko] undefined!
ERROR: "adf_obj_find_event_refcount" [drivers/video/adf/adf_client.ko] undefined!
ERROR: "adf_file_queue_event" [drivers/video/adf/adf.ko] undefined!
ERROR: "adf_interface_sysfs_init" [drivers/video/adf/adf.ko] undefined!
ERROR: "adf_interface_sysfs_destroy" [drivers/video/adf/adf.ko] undefined!
ERROR: "adf_device_sysfs_init" [drivers/video/adf/adf.ko] undefined!
ERROR: "adf_device_sysfs_destroy" [drivers/video/adf/adf.ko] undefined!
ERROR: "adf_sysfs_destroy" [drivers/video/adf/adf.ko] undefined!
ERROR: "adf_overlay_engine_sysfs_init" [drivers/video/adf/adf.ko] undefined!
ERROR: "adf_overlay_engine_sysfs_destroy" [drivers/video/adf/adf.ko] undefined!
ERROR: "adf_sysfs_init" [drivers/video/adf/adf.ko] undefined!

If ADF is configured as module, each of the object files ends up being
a separate module. Since the functions are used across the various files
but not exported, this results in the observed build errors.
Modify the Makefile to create a single module instead.

Fixes: 066a50cee536 ("video: add atomic display framework")
Signed-off-by: Guenter Roeck <groeck@chromium.org>
---
 drivers/video/adf/Makefile | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/video/adf/Makefile b/drivers/video/adf/Makefile
index 78d0915122f4..cdf34a666dc7 100644
--- a/drivers/video/adf/Makefile
+++ b/drivers/video/adf/Makefile
@@ -2,13 +2,15 @@ ccflags-y := -Idrivers/staging/android
 
 CFLAGS_adf.o := -I$(src)
 
-obj-$(CONFIG_ADF) += adf.o \
+obj-$(CONFIG_ADF) += adf_core.o
+
+adf_core-y := adf.o \
 	adf_client.o \
 	adf_fops.o \
 	adf_format.o \
 	adf_sysfs.o
 
-obj-$(CONFIG_COMPAT) += adf_fops32.o
+adf_core-$(CONFIG_COMPAT) += adf_fops32.o
 
 obj-$(CONFIG_ADF_FBDEV) += adf_fbdev.o
 

From c37e85aae233519567c82a34a6201cf95f1280a6 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <groeck@chromium.org>
Date: Thu, 3 Mar 2016 09:30:33 -0800
Subject: [PATCH 020/797] video: adf: Set ADF_MEMBLOCK to boolean

Attempts to build with CONFIG_ADF_MEMBLOCK=m result in the following
build error.

ERROR: "memblock_free" [drivers/video/adf/adf_memblock.ko] undefined!

memblock_free() is marked as __init_memblock, so exporting it seems to be
a bad idea. All other callers are only configurable into the kernel,
so do the same with ADF_MEMBLOCK.

Signed-off-by: Guenter Roeck <groeck@chromium.org>
---
 drivers/video/adf/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/video/adf/Kconfig b/drivers/video/adf/Kconfig
index 33858b73d8bb..2777db48fae0 100644
--- a/drivers/video/adf/Kconfig
+++ b/drivers/video/adf/Kconfig
@@ -11,4 +11,4 @@ menuconfig ADF_FBDEV
 menuconfig ADF_MEMBLOCK
 	depends on ADF
 	depends on HAVE_MEMBLOCK
-	tristate "Helper for using memblocks as buffers in ADF drivers"
+	bool "Helper for using memblocks as buffers in ADF drivers"

From 267687882304f817e75e7f9a439ecb5b8f579a34 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <groeck@chromium.org>
Date: Thu, 3 Mar 2016 09:44:44 -0800
Subject: [PATCH 021/797] drivers: power: use 'current' instead of
 'get_current()'

get_current() to get the current thread pointer is not defined for all
architectures. This results in the following build error for several
architectures (s390, powerpc, and possibly others).

drivers/base/power/main.c: In function '__device_suspend':
drivers/base/power/main.c:1415:2: error:
	implicit declaration of function 'get_current'

Use 'current' instead. Also include asm/current.h instead of depending on
an implicit include.

Fixes: ad86cc8ad632 ("drivers: power: Add watchdog timer to catch drivers which lockup during suspend."
Signed-off-by: Guenter Roeck <groeck@chromium.org>
---
 drivers/base/power/main.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index a54d810f2966..6ed8b9326629 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -35,6 +35,8 @@
 #include <linux/timer.h>
 #include <linux/wakeup_reason.h>
 
+#include <asm/current.h>
+
 #include "../base.h"
 #include "power.h"
 
@@ -1412,7 +1414,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 		goto Complete;
 	
 	data.dev = dev;
-	data.tsk = get_current();
+	data.tsk = current;
 	init_timer_on_stack(&timer);
 	timer.expires = jiffies + HZ * 12;
 	timer.function = dpm_drv_timeout;

From 860df91e2aa0da046c727ea87ad7f1176a20665d Mon Sep 17 00:00:00 2001
From: Guenter Roeck <groeck@chromium.org>
Date: Thu, 3 Mar 2016 10:33:40 -0800
Subject: [PATCH 022/797] PM / suspend: Add dependency on RTC_LIB

Commit 1eff8f99f9f9 ("PM / Suspend: Print wall time at suspend entry and
exit") calls rtc_time_to_tm(), which in turn calls rtc_time64_to_tm().
Since RTC_LIB is not mandatory for all architetures, this can result in
the following build error.

suspend.c:(.text+0x2f36c): undefined reference to `rtc_time64_to_tm'

rtc_time64_to_tm() is implemented in rtc-lib, so SUSPEND now needs to
select RTC_LIB.

Fixes: 1eff8f99f9f9 ("PM / Suspend: Print wall time at suspend entry and exit")
Signed-off-by: Guenter Roeck <groeck@chromium.org>
---
 kernel/power/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 84c480946fb2..6d6f63be1f9b 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -1,6 +1,7 @@
 config SUSPEND
 	bool "Suspend to RAM and standby"
 	depends on ARCH_SUSPEND_POSSIBLE
+	select RTC_LIB
 	default y
 	---help---
 	  Allow the system to enter sleep states in which main memory is

From 486057e23399589d5d2b904bfbfcd48ea9c72066 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <groeck@chromium.org>
Date: Fri, 4 Mar 2016 07:22:27 -0800
Subject: [PATCH 023/797] power: Provide dummy log_suspend_abort_reason() if
 SUSPEND is disabled

The API to log the suspend reason was introduced with commit 57caa2ad5ce3
("power: Adds functionality to log the last suspend abort reason.").
It is called from functions enabled with PM_SLEEP and from functions
enabled with SUSPEND, but only available if SUSPEND is enabled.
This can result in build failures such as the following if PM_SLEEP
is enabled, but SUSPEND is not.

kernel/built-in.o: In function `try_to_freeze_tasks':
process.c:(.text+0x30928): undefined reference to `log_suspend_abort_reason'
drivers/built-in.o: In function `syscore_suspend':
(.text+0x6e250): undefined reference to `log_suspend_abort_reason'
drivers/built-in.o: In function `__device_suspend':
main.c:(.text+0x7a528): undefined reference to `log_suspend_abort_reason'

Fixes: 57caa2ad5ce3 ("power: Adds functionality to log the last suspend abort reason.")
Signed-off-by: Guenter Roeck <groeck@chromium.org>
---
 include/linux/wakeup_reason.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/linux/wakeup_reason.h b/include/linux/wakeup_reason.h
index ad8b76936c7f..d84d8c301546 100644
--- a/include/linux/wakeup_reason.h
+++ b/include/linux/wakeup_reason.h
@@ -21,7 +21,12 @@
 #define MAX_SUSPEND_ABORT_LEN 256
 
 void log_wakeup_reason(int irq);
-void log_suspend_abort_reason(const char *fmt, ...);
 int check_wakeup_reason(int irq);
 
+#ifdef CONFIG_SUSPEND
+void log_suspend_abort_reason(const char *fmt, ...);
+#else
+static inline void log_suspend_abort_reason(const char *fmt, ...) { }
+#endif
+
 #endif /* _LINUX_WAKEUP_REASON_H */

From 47ccdf2dba62f9931047bd4bc3bfbddfcbef1b7e Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Tue, 3 Nov 2015 17:01:46 -0500
Subject: [PATCH 024/797] hid-sensor-hub.c: fix wrong do_div() usage

do_div() must only be used with a u64 dividend.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
(cherry picked from commit 8d43b49e7e0070f96ac46d30659a336c0224fa0b)
Signed-off-by: Guenter Roeck <groeck@chromium.org>
---
 drivers/hid/hid-sensor-hub.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c
index 92870cdb52d9..8efaa88329aa 100644
--- a/drivers/hid/hid-sensor-hub.c
+++ b/drivers/hid/hid-sensor-hub.c
@@ -218,7 +218,8 @@ int sensor_hub_set_feature(struct hid_sensor_hub_device *hsdev, u32 report_id,
 		goto done_proc;
 	}
 
-	remaining_bytes = do_div(buffer_size, sizeof(__s32));
+	remaining_bytes = buffer_size % sizeof(__s32);
+	buffer_size = buffer_size / sizeof(__s32);
 	if (buffer_size) {
 		for (i = 0; i < buffer_size; ++i) {
 			hid_set_field(report->field[field_index], i,

From 11b55d507888b8f81679aa09636d339d42b09ac4 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <groeck@chromium.org>
Date: Fri, 4 Mar 2016 12:44:33 -0800
Subject: [PATCH 025/797] misc: uid_stat: Include linux/atomic.h instead of
 asm/atomic.h

Building the uid_stat driver on sparc32 fails with the following errors.

include/linux/atomic.h: In function 'atomic_add_unless':
include/linux/atomic.h:437:2: error:
	implicit declaration of function '__atomic_add_unless'
include/linux/atomic.h: In function 'atomic_andnot':
include/linux/atomic.h:454:2: error:
	implicit declaration of function 'atomic_and'
include/linux/atomic.h: In function 'atomic_set_mask':
include/linux/atomic.h:465:2: error:
	implicit declaration of function 'atomic_or'
include/linux/atomic.h: In function 'atomic_inc_not_zero_hint':
include/linux/atomic.h:490:3: error:
	implicit declaration of function 'atomic_cmpxchg'
include/linux/atomic.h: In function 'atomic_dec_if_positive':
include/linux/atomic.h:537:2: error:
	implicit declaration of function 'atomic_read'

Fixes: 6b6d5fbf9ae5 ("misc: uidstat: Adding uid stat driver to collect network statistics.")
Signed-off-by: Guenter Roeck <groeck@chromium.org>
---
 drivers/misc/uid_stat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/uid_stat.c b/drivers/misc/uid_stat.c
index 8b8c9a22360b..185c69c9738a 100644
--- a/drivers/misc/uid_stat.c
+++ b/drivers/misc/uid_stat.c
@@ -13,7 +13,7 @@
  *
  */
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include <linux/err.h>
 #include <linux/init.h>

From e9eb108900c60b1696426cb6d155cea9905b403f Mon Sep 17 00:00:00 2001
From: Tim Murray <timmurray@google.com>
Date: Tue, 19 Jan 2016 16:33:27 -0800
Subject: [PATCH 026/797] ANDROID: dm-crypt: run in a WQ_HIGHPRI workqueue

(cherry pick from commit ad3ac5180979e5dd1f84e4a807f76fb9fb19f814)

Running dm-crypt in a standard workqueue results in IO competing for CPU
time with standard user apps, which can lead to pipeline bubbles and
seriously degraded performance. Move to a WQ_HIGHPRI workqueue to
protect against that.

Signed-off-by: Tim Murray <timmurray@google.com>
Bug: 25392275
Change-Id: I2828587c754a7c2cafdd78b3323b9896cb8cd4e7
---
 drivers/md/dm-crypt.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 3147c8d09ea8..e85bcae50f65 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1864,16 +1864,24 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	}
 
 	ret = -ENOMEM;
-	cc->io_queue = alloc_workqueue("kcryptd_io", WQ_MEM_RECLAIM, 1);
+	cc->io_queue = alloc_workqueue("kcryptd_io",
+				       WQ_HIGHPRI |
+				       WQ_MEM_RECLAIM,
+				       1);
 	if (!cc->io_queue) {
 		ti->error = "Couldn't create kcryptd io queue";
 		goto bad;
 	}
 
 	if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
-		cc->crypt_queue = alloc_workqueue("kcryptd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1);
+		cc->crypt_queue = alloc_workqueue("kcryptd",
+						  WQ_HIGHPRI |
+						  WQ_MEM_RECLAIM, 1);
 	else
-		cc->crypt_queue = alloc_workqueue("kcryptd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND,
+		cc->crypt_queue = alloc_workqueue("kcryptd",
+						  WQ_HIGHPRI |
+						  WQ_MEM_RECLAIM |
+						  WQ_UNBOUND,
 						  num_online_cpus());
 	if (!cc->crypt_queue) {
 		ti->error = "Couldn't create kcryptd queue";

From 3e04d6dfbce9bd89f0c5c79123670397d8ca9370 Mon Sep 17 00:00:00 2001
From: Matthew Moeller <moeller.matt@gmail.com>
Date: Wed, 9 Mar 2016 20:19:25 -0600
Subject: [PATCH 027/797] usb: u_ether: Add missing rx_work init

commit 398a708ed5f3ef771d96dfb9b95b5d5170d17eb7
usb: u_ether: Add workqueue as bottom half handler for rx data path

set up a worker for the rx data path but missed a case where the
work_struct needed to be initialized.

This patch adds the missing 'INIT_WORK'

Change-Id: I2daabd39d35b3e17a3054837282d649d9c78a0aa
Signed-off-by: Matthew Moeller <moeller.matt@gmail.com>
---
 drivers/usb/gadget/function/u_ether.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c
index 76b25445c6ad..dd73dfe5dcab 100644
--- a/drivers/usb/gadget/function/u_ether.c
+++ b/drivers/usb/gadget/function/u_ether.c
@@ -1014,6 +1014,7 @@ struct net_device *gether_setup_name_default(const char *netname)
 	spin_lock_init(&dev->lock);
 	spin_lock_init(&dev->req_lock);
 	INIT_WORK(&dev->work, eth_work);
+	INIT_WORK(&dev->rx_work, process_rx_w);
 	INIT_LIST_HEAD(&dev->tx_reqs);
 	INIT_LIST_HEAD(&dev->rx_reqs);
 

From 772d83a92971662b11316a2a266730d18c1bdeda Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@google.com>
Date: Mon, 14 Mar 2016 13:34:44 -0700
Subject: [PATCH 028/797] FROMLIST: pstore-ram: fix NULL reference when used
 with pdata

When using platform-data (not DT), we get an OOPS, because drvdata is
only initialized after we try to use it.

This addresses my comments made on the upstream submission here:

https://patchwork.kernel.org/patch/7980651/

Fixes boot on Chrome OS systems, including the Pixel 2.

Change-Id: I97360edf2ce61c83dc543cb6c169f3287e2dae4b
Fixes: b1d1b7187c11 ("FROMLIST: pstore-ram: add Device Tree bindings")
Signed-off-by: Brian Norris <briannorris@google.com>
---
 fs/pstore/ram.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 2429c804cf78..414041342a99 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -554,7 +554,7 @@ static int ramoops_parse_dt(struct platform_device *pdev,
 static int ramoops_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct ramoops_platform_data *pdata = platform_get_drvdata(pdev);
+	struct ramoops_platform_data *pdata = pdev->dev.platform_data;
 	struct ramoops_context *cxt = &oops_cxt;
 	size_t dump_mem_sz;
 	phys_addr_t paddr;
@@ -666,7 +666,6 @@ static int ramoops_probe(struct platform_device *pdev)
 		cxt->size, (unsigned long long)cxt->phys_addr,
 		cxt->ecc_info.ecc_size, cxt->ecc_info.block_size);
 
-	platform_set_drvdata(pdev, pdata);
 	return 0;
 
 fail_buf:

From 06bfe14bdda83b9544c6b7bd4cdf738013e51ae8 Mon Sep 17 00:00:00 2001
From: dcashman <dcashman@google.com>
Date: Wed, 24 Feb 2016 13:27:06 -0800
Subject: [PATCH 029/797] FROMLIST: drivers: char: random: add
 get_random_long()

(cherry picked from commit https://lkml.org/lkml/2016/2/4/831)

d07e22597d1d355 ("mm: mmap: add new /proc tunable for mmap_base ASLR")
added the ability to choose from a range of values to use for entropy
count in generating the random offset to the mmap_base address.  The
maximum value on this range was set to 32 bits for 64-bit x86 systems, but
this value could be increased further, requiring more than the 32 bits of
randomness provided by get_random_int(), as is already possible for arm64.
Add a new function: get_random_long() which more naturally fits with the
mmap usage of get_random_int() but operates exactly the same as
get_random_int().

Also, fix the shifting constant in mmap_rnd() to be an unsigned long so
that values greater than 31 bits generate an appropriate mask without
overflow.  This is especially important on x86, as its shift instruction
uses a 5-bit mask for the shift operand, which meant that any value for
mmap_rnd_bits over 31 acts as a no-op and effectively disables mmap_base
randomization.

Finally, replace calls to get_random_int() with get_random_long() where
appropriate.

Bug: 26963541
Signed-off-by: Daniel Cashman <dcashman@android.com>
Signed-off-by: Daniel Cashman <dcashman@google.com>
Change-Id: I5b45621088666d5d1dfbf43952f25ea0798b10ba
---
 drivers/char/random.c  | 22 ++++++++++++++++++++++
 include/linux/random.h |  1 +
 2 files changed, 23 insertions(+)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index d0da5d852d41..b583e5336630 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1818,6 +1818,28 @@ unsigned int get_random_int(void)
 }
 EXPORT_SYMBOL(get_random_int);
 
+/*
+ * Same as get_random_int(), but returns unsigned long.
+ */
+unsigned long get_random_long(void)
+{
+	__u32 *hash;
+	unsigned long ret;
+
+	if (arch_get_random_long(&ret))
+		return ret;
+
+	hash = get_cpu_var(get_random_int_hash);
+
+	hash[0] += current->pid + jiffies + random_get_entropy();
+	md5_transform(hash, random_int_secret);
+	ret = *(unsigned long *)hash;
+	put_cpu_var(get_random_int_hash);
+
+	return ret;
+}
+EXPORT_SYMBOL(get_random_long);
+
 /*
  * randomize_range() returns a start address such that
  *
diff --git a/include/linux/random.h b/include/linux/random.h
index a75840c1aa71..9c29122037f9 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -34,6 +34,7 @@ extern const struct file_operations random_fops, urandom_fops;
 #endif
 
 unsigned int get_random_int(void);
+unsigned long get_random_long(void);
 unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len);
 
 u32 prandom_u32(void);

From 818ad76fe6fb93c8dcbfdd4e479026fdfe14df07 Mon Sep 17 00:00:00 2001
From: dcashman <dcashman@google.com>
Date: Wed, 24 Feb 2016 13:31:22 -0800
Subject: [PATCH 030/797] FROMLIST: mm: ASLR: use get_random_long()

(cherry picked from commit https://lkml.org/lkml/2016/2/4/833)

Replace calls to get_random_int() followed by a cast to (unsigned long)
with calls to get_random_long().  Also address shifting bug which, in case
of x86 removed entropy mask for mmap_rnd_bits values > 31 bits.

Bug: 26963541
Signed-off-by: Daniel Cashman <dcashman@android.com>
Signed-off-by: Daniel Cashman <dcashman@google.com>
Change-Id: I36c156c9b8d7d157134895fddd4cd6efddcbee86
---
 arch/arm/mm/mmap.c               | 2 +-
 arch/arm64/mm/mmap.c             | 4 ++--
 arch/mips/mm/mmap.c              | 4 ++--
 arch/powerpc/kernel/process.c    | 4 ++--
 arch/powerpc/mm/mmap.c           | 4 ++--
 arch/sparc/kernel/sys_sparc_64.c | 2 +-
 arch/x86/mm/mmap.c               | 6 +++---
 fs/binfmt_elf.c                  | 2 +-
 8 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index 4b4058db0781..66353caa35b9 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -173,7 +173,7 @@ unsigned long arch_mmap_rnd(void)
 {
 	unsigned long rnd;
 
-	rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
+	rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
 
 	return rnd << PAGE_SHIFT;
 }
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index 4c893b5189dd..232f787a088a 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -53,10 +53,10 @@ unsigned long arch_mmap_rnd(void)
 
 #ifdef CONFIG_COMPAT
 	if (test_thread_flag(TIF_32BIT))
-		rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1);
+		rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
 	else
 #endif
-		rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
+		rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
 	return rnd << PAGE_SHIFT;
 }
 
diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
index 5c81fdd032c3..353037699512 100644
--- a/arch/mips/mm/mmap.c
+++ b/arch/mips/mm/mmap.c
@@ -146,7 +146,7 @@ unsigned long arch_mmap_rnd(void)
 {
 	unsigned long rnd;
 
-	rnd = (unsigned long)get_random_int();
+	rnd = get_random_long();
 	rnd <<= PAGE_SHIFT;
 	if (TASK_IS_32BIT_ADDR)
 		rnd &= 0xfffffful;
@@ -174,7 +174,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 
 static inline unsigned long brk_rnd(void)
 {
-	unsigned long rnd = get_random_int();
+	unsigned long rnd = get_random_long();
 
 	rnd = rnd << PAGE_SHIFT;
 	/* 8MB for 32bit, 256MB for 64bit */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index ef2ad2d682da..36795d1e7558 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1659,9 +1659,9 @@ static inline unsigned long brk_rnd(void)
 
 	/* 8MB for 32bit, 1GB for 64bit */
 	if (is_32bit_task())
-		rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT)));
+		rnd = (get_random_long() % (1UL<<(23-PAGE_SHIFT)));
 	else
-		rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT)));
+		rnd = (get_random_long() % (1UL<<(30-PAGE_SHIFT)));
 
 	return rnd << PAGE_SHIFT;
 }
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 0f0502e12f6c..4087705ba90f 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -59,9 +59,9 @@ unsigned long arch_mmap_rnd(void)
 
 	/* 8MB for 32bit, 1GB for 64bit */
 	if (is_32bit_task())
-		rnd = (unsigned long)get_random_int() % (1<<(23-PAGE_SHIFT));
+		rnd = get_random_long() % (1<<(23-PAGE_SHIFT));
 	else
-		rnd = (unsigned long)get_random_int() % (1<<(30-PAGE_SHIFT));
+		rnd = get_random_long() % (1UL<<(30-PAGE_SHIFT));
 
 	return rnd << PAGE_SHIFT;
 }
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index c690c8e16a96..b489e9759518 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -264,7 +264,7 @@ static unsigned long mmap_rnd(void)
 	unsigned long rnd = 0UL;
 
 	if (current->flags & PF_RANDOMIZE) {
-		unsigned long val = get_random_int();
+		unsigned long val = get_random_long();
 		if (test_thread_flag(TIF_32BIT))
 			rnd = (val % (1UL << (23UL-PAGE_SHIFT)));
 		else
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 96bd1e2bffaf..72bb52f93c3d 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -71,12 +71,12 @@ unsigned long arch_mmap_rnd(void)
 
 	if (mmap_is_ia32())
 #ifdef CONFIG_COMPAT
-		rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1);
+		rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
 #else
-		rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
+		rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
 #endif
 	else
-		rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
+		rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
 
 	return rnd << PAGE_SHIFT;
 }
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 3a93755e880f..0c52941dd62c 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -651,7 +651,7 @@ static unsigned long randomize_stack_top(unsigned long stack_top)
 
 	if ((current->flags & PF_RANDOMIZE) &&
 		!(current->personality & ADDR_NO_RANDOMIZE)) {
-		random_variable = (unsigned long) get_random_int();
+		random_variable = get_random_long();
 		random_variable &= STACK_RND_MASK;
 		random_variable <<= PAGE_SHIFT;
 	}

From 2b081fe250a6aef7375a6d6f9783547b11c171c6 Mon Sep 17 00:00:00 2001
From: Shawn Lin <shawn.lin@rock-chips.com>
Date: Wed, 16 Mar 2016 18:15:47 +0800
Subject: [PATCH 031/797] FROMLIST: mmc: block: fix ABI regression of
 mmc_blk_ioctl

If mmc_blk_ioctl returns -EINVAL, blkdev_ioctl continues to
work without returning err to user-space. But now we check
CAP_SYS_RAWIO firstly, so we return -EPERM to blkdev_ioctl,
which make blkdev_ioctl return -EPERM to user-space directly.
So this will break all the ioctl with BLKROSET. Now we find
Android-adb suffer it for the following log:

remount of /system failed;
couldn't make block device writable: Operation not permitted
openat(AT_FDCWD, "/dev/block/platform/ff420000.dwmmc/by-name/system", O_RDONLY) = 3
ioctl(3, BLKROSET, 0)  = -1 EPERM (Operation not permitted)

Fixes: a5f5774c55a2 ("mmc: block: Add new ioctl to send multi commands")
Change-Id: Ie9ba728e366abf4ab73fd6102d2a2aa0d4ee5c66
Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 drivers/mmc/card/block.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 90e9738a129a..f2ce13ab7ae6 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -593,6 +593,14 @@ static int mmc_blk_ioctl_cmd(struct block_device *bdev,
 	struct mmc_card *card;
 	int err = 0, ioc_err = 0;
 
+	/*
+	 * The caller must have CAP_SYS_RAWIO, and must be calling this on the
+	 * whole block device, not on a partition.  This prevents overspray
+	 * between sibling partitions.
+	 */
+	if ((!capable(CAP_SYS_RAWIO)) || (bdev != bdev->bd_contains))
+		return -EPERM;
+
 	idata = mmc_blk_ioctl_copy_from_user(ic_ptr);
 	if (IS_ERR(idata))
 		return PTR_ERR(idata);
@@ -635,6 +643,14 @@ static int mmc_blk_ioctl_multi_cmd(struct block_device *bdev,
 	int i, err = 0, ioc_err = 0;
 	__u64 num_of_cmds;
 
+	/*
+	 * The caller must have CAP_SYS_RAWIO, and must be calling this on the
+	 * whole block device, not on a partition.  This prevents overspray
+	 * between sibling partitions.
+	 */
+	if ((!capable(CAP_SYS_RAWIO)) || (bdev != bdev->bd_contains))
+		return -EPERM;
+
 	if (copy_from_user(&num_of_cmds, &user->num_of_cmds,
 			   sizeof(num_of_cmds)))
 		return -EFAULT;
@@ -690,14 +706,6 @@ static int mmc_blk_ioctl_multi_cmd(struct block_device *bdev,
 static int mmc_blk_ioctl(struct block_device *bdev, fmode_t mode,
 	unsigned int cmd, unsigned long arg)
 {
-	/*
-	 * The caller must have CAP_SYS_RAWIO, and must be calling this on the
-	 * whole block device, not on a partition.  This prevents overspray
-	 * between sibling partitions.
-	 */
-	if ((!capable(CAP_SYS_RAWIO)) || (bdev != bdev->bd_contains))
-		return -EPERM;
-
 	switch (cmd) {
 	case MMC_IOC_CMD:
 		return mmc_blk_ioctl_cmd(bdev,

From a898ec7786b54a2b18f072df16e142656617cefe Mon Sep 17 00:00:00 2001
From: Mark Kuo <mkuo@nvidia.com>
Date: Thu, 20 Aug 2015 13:01:46 +0800
Subject: [PATCH 032/797] CHROMIUM: usb: gadget: f_mtp: Add SuperSpeed support

Add SuperSpeed endpoint and companion descriptors.

BUG=chrome-os-partner:43682
TEST=Smaug enumerates as a SuperSpeed device.

Change-Id: I2bf3125d180fcb07222a5740fa67f3526cf3e95c
Signed-off-by: Hui Fu <hfu@nvidia.com>
Signed-off-by: Henry Lin <henryl@nvidia.com>
Signed-off-by: Mark Kuo <mkuo@nvidia.com>
Signed-off-by: Andrew Bresticker <abrestic@chromium.org>
Reviewed-on: https://chromium-review.googlesource.com/294950
---
 drivers/usb/gadget/function/f_mtp.c | 76 ++++++++++++++++++++++++++++-
 1 file changed, 74 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/gadget/function/f_mtp.c b/drivers/usb/gadget/function/f_mtp.c
index 8f80a7e91314..148f8fcecc80 100644
--- a/drivers/usb/gadget/function/f_mtp.c
+++ b/drivers/usb/gadget/function/f_mtp.c
@@ -135,6 +135,34 @@ static struct usb_interface_descriptor ptp_interface_desc = {
 	.bInterfaceProtocol     = 1,
 };
 
+static struct usb_endpoint_descriptor mtp_ss_in_desc = {
+	.bLength                = USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType        = USB_DT_ENDPOINT,
+	.bEndpointAddress       = USB_DIR_IN,
+	.bmAttributes           = USB_ENDPOINT_XFER_BULK,
+	.wMaxPacketSize         = __constant_cpu_to_le16(1024),
+};
+
+static struct usb_ss_ep_comp_descriptor mtp_ss_in_comp_desc = {
+	.bLength                = sizeof(mtp_ss_in_comp_desc),
+	.bDescriptorType        = USB_DT_SS_ENDPOINT_COMP,
+	/* .bMaxBurst           = DYNAMIC, */
+};
+
+static struct usb_endpoint_descriptor mtp_ss_out_desc = {
+	.bLength                = USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType        = USB_DT_ENDPOINT,
+	.bEndpointAddress       = USB_DIR_OUT,
+	.bmAttributes           = USB_ENDPOINT_XFER_BULK,
+	.wMaxPacketSize         = __constant_cpu_to_le16(1024),
+};
+
+static struct usb_ss_ep_comp_descriptor mtp_ss_out_comp_desc = {
+	.bLength                = sizeof(mtp_ss_out_comp_desc),
+	.bDescriptorType        = USB_DT_SS_ENDPOINT_COMP,
+	/* .bMaxBurst           = DYNAMIC, */
+};
+
 static struct usb_endpoint_descriptor mtp_highspeed_in_desc = {
 	.bLength                = USB_DT_ENDPOINT_SIZE,
 	.bDescriptorType        = USB_DT_ENDPOINT,
@@ -174,6 +202,12 @@ static struct usb_endpoint_descriptor mtp_intr_desc = {
 	.bInterval              = 6,
 };
 
+static struct usb_ss_ep_comp_descriptor mtp_intr_ss_comp_desc = {
+	.bLength                = sizeof(mtp_intr_ss_comp_desc),
+	.bDescriptorType        = USB_DT_SS_ENDPOINT_COMP,
+	.wBytesPerInterval      = cpu_to_le16(2),
+};
+
 static struct usb_descriptor_header *fs_mtp_descs[] = {
 	(struct usb_descriptor_header *) &mtp_interface_desc,
 	(struct usb_descriptor_header *) &mtp_fullspeed_in_desc,
@@ -190,6 +224,17 @@ static struct usb_descriptor_header *hs_mtp_descs[] = {
 	NULL,
 };
 
+static struct usb_descriptor_header *ss_mtp_descs[] = {
+	(struct usb_descriptor_header *) &mtp_interface_desc,
+	(struct usb_descriptor_header *) &mtp_ss_in_desc,
+	(struct usb_descriptor_header *) &mtp_ss_in_comp_desc,
+	(struct usb_descriptor_header *) &mtp_ss_out_desc,
+	(struct usb_descriptor_header *) &mtp_ss_out_comp_desc,
+	(struct usb_descriptor_header *) &mtp_intr_desc,
+	(struct usb_descriptor_header *) &mtp_intr_ss_comp_desc,
+	NULL,
+};
+
 static struct usb_descriptor_header *fs_ptp_descs[] = {
 	(struct usb_descriptor_header *) &ptp_interface_desc,
 	(struct usb_descriptor_header *) &mtp_fullspeed_in_desc,
@@ -206,6 +251,17 @@ static struct usb_descriptor_header *hs_ptp_descs[] = {
 	NULL,
 };
 
+static struct usb_descriptor_header *ss_ptp_descs[] = {
+	(struct usb_descriptor_header *) &ptp_interface_desc,
+	(struct usb_descriptor_header *) &mtp_ss_in_desc,
+	(struct usb_descriptor_header *) &mtp_ss_in_comp_desc,
+	(struct usb_descriptor_header *) &mtp_ss_out_desc,
+	(struct usb_descriptor_header *) &mtp_ss_out_comp_desc,
+	(struct usb_descriptor_header *) &mtp_intr_desc,
+	(struct usb_descriptor_header *) &mtp_intr_ss_comp_desc,
+	NULL,
+};
+
 static struct usb_string mtp_string_defs[] = {
 	/* Naming interface "MTP" so libmtp will recognize us */
 	[INTERFACE_STRING_INDEX].s	= "MTP",
@@ -1131,10 +1187,24 @@ mtp_function_bind(struct usb_configuration *c, struct usb_function *f)
 		mtp_highspeed_out_desc.bEndpointAddress =
 			mtp_fullspeed_out_desc.bEndpointAddress;
 	}
+	/* support super speed hardware */
+	if (gadget_is_superspeed(c->cdev->gadget)) {
+		unsigned max_burst;
+
+		/* Calculate bMaxBurst, we know packet size is 1024 */
+		max_burst = min_t(unsigned, MTP_BULK_BUFFER_SIZE / 1024, 15);
+		mtp_ss_in_desc.bEndpointAddress =
+			mtp_fullspeed_in_desc.bEndpointAddress;
+		mtp_ss_in_comp_desc.bMaxBurst = max_burst;
+		mtp_ss_out_desc.bEndpointAddress =
+			mtp_fullspeed_out_desc.bEndpointAddress;
+		mtp_ss_out_comp_desc.bMaxBurst = max_burst;
+	}
 
 	DBG(cdev, "%s speed %s: IN/%s, OUT/%s\n",
-			gadget_is_dualspeed(c->cdev->gadget) ? "dual" : "full",
-			f->name, dev->ep_in->name, dev->ep_out->name);
+		gadget_is_superspeed(c->cdev->gadget) ? "super" :
+		(gadget_is_dualspeed(c->cdev->gadget) ? "dual" : "full"),
+		f->name, dev->ep_in->name, dev->ep_out->name);
 	return 0;
 }
 
@@ -1410,9 +1480,11 @@ struct usb_function *function_alloc_mtp_ptp(struct usb_function_instance *fi,
 	if (mtp_config) {
 		dev->function.fs_descriptors = fs_mtp_descs;
 		dev->function.hs_descriptors = hs_mtp_descs;
+		dev->function.ss_descriptors = ss_mtp_descs;
 	} else {
 		dev->function.fs_descriptors = fs_ptp_descs;
 		dev->function.hs_descriptors = hs_ptp_descs;
+		dev->function.ss_descriptors = ss_ptp_descs;
 	}
 	dev->function.bind = mtp_function_bind;
 	dev->function.unbind = mtp_function_unbind;

From 714ef4a05ab0239c7ad27e96780e7fe6f05ec5d2 Mon Sep 17 00:00:00 2001
From: Mark Kuo <mkuo@nvidia.com>
Date: Fri, 11 Sep 2015 16:12:59 +0800
Subject: [PATCH 033/797] CHROMIUM: usb: gadget: f_mtp: fix
 usb_ss_ep_comp_descriptor

wBytesPerInterval in SuperSpeed Endpoint Companion Descriptor needs
to be set large enough to reserve enough bus time for associated
periodic endpoint.

Originally, wBytesPerInterval for mtp's interrupt IN endpoint is set
to 2 and its single interrupt transfer will be split into many 2 bytes
interrupt transfers. So, we change wBytesPerInterval to INTR_BUFFER_SIZE
to ensure interrupt transfer will not be split.

BUG=none
TEST=Smaug works as a MTP device

Change-Id: I49c0df892b2d9e0193a684eef23f73664ced9f91
Signed-off-by: Henry Lin <henryl@nvidia.com>
Signed-off-by: Mark Kuo <mkuo@nvidia.com>
Reviewed-on: https://chromium-review.googlesource.com/299091
Reviewed-by: Andrew Bresticker <abrestic@chromium.org>
---
 drivers/usb/gadget/function/f_mtp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/function/f_mtp.c b/drivers/usb/gadget/function/f_mtp.c
index 148f8fcecc80..7f5c390885fe 100644
--- a/drivers/usb/gadget/function/f_mtp.c
+++ b/drivers/usb/gadget/function/f_mtp.c
@@ -205,7 +205,7 @@ static struct usb_endpoint_descriptor mtp_intr_desc = {
 static struct usb_ss_ep_comp_descriptor mtp_intr_ss_comp_desc = {
 	.bLength                = sizeof(mtp_intr_ss_comp_desc),
 	.bDescriptorType        = USB_DT_SS_ENDPOINT_COMP,
-	.wBytesPerInterval      = cpu_to_le16(2),
+	.wBytesPerInterval      = cpu_to_le16(INTR_BUFFER_SIZE),
 };
 
 static struct usb_descriptor_header *fs_mtp_descs[] = {

From 324e42bec18db251c70c73f6ac1b3e09a9535042 Mon Sep 17 00:00:00 2001
From: Mark Kuo <mkuo@nvidia.com>
Date: Mon, 11 Jan 2016 17:49:16 +0800
Subject: [PATCH 034/797] CHROMIUM: usb: gadget: audio_source: add .free_func
 callback

When userspace unbinds gadget functions through configfs, the
.free_func() callback is always invoked. (in config_usb_cfg_unlink())
Implement it as a no-op to avoid the following crash:

[ 68.125679] configfs-gadget gadget: unbind function 'accessory'/ffffffc0720bf000
[ 68.133202] configfs-gadget gadget: unbind function 'audio_source'/ffffffc0012ca3c0
[ 68.142668] tegra-xudc 700d0000.usb-device: ep 0 disabled
[ 68.148186] Bad mode in Synchronous Abort handler detected, code 0x86000006
[ 68.155144] CPU: 2 PID: 1 Comm: init Tainted: G    U W 3.18.0-09419-g87296c3-dirty #561
[ 68.163743] Hardware name: Google Tegra210 Smaug Rev 1,3+ (DT)
[ 68.169566] task: ffffffc0bc8d0000 ti: ffffffc0bc8bc000 task.ti: ffffffc0bc8bc000
[ 68.177039] PC is at 0x0
[ 68.179577] LR is at usb_put_function+0x14/0x1c
....

BUG=chrome-os-partner:49140
TEST="setprop sys.usb.config accessory,audio_source" on A44 and then
switch back to default: "setprop sys.usb.config mtp,adb", no crash will
be seen.

Change-Id: I5b6141964aab861e86e3afb139ded02d4d122dab
Signed-off-by: Mark Kuo <mkuo@nvidia.com>
Reviewed-on: https://chromium-review.googlesource.com/321013
Commit-Ready: Andrew Bresticker <abrestic@chromium.org>
Tested-by: Andrew Bresticker <abrestic@chromium.org>
Reviewed-by: Andrew Bresticker <abrestic@chromium.org>
---
 drivers/usb/gadget/function/f_audio_source.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/usb/gadget/function/f_audio_source.c b/drivers/usb/gadget/function/f_audio_source.c
index 39645be93502..bcd817439dbf 100644
--- a/drivers/usb/gadget/function/f_audio_source.c
+++ b/drivers/usb/gadget/function/f_audio_source.c
@@ -583,6 +583,11 @@ static void audio_disable(struct usb_function *f)
 	usb_ep_disable(audio->in_ep);
 }
 
+static void audio_free_func(struct usb_function *f)
+{
+	/* no-op */
+}
+
 /*-------------------------------------------------------------------------*/
 
 static void audio_build_desc(struct audio_dev *audio)
@@ -827,6 +832,7 @@ static struct audio_dev _audio_dev = {
 		.set_alt = audio_set_alt,
 		.setup = audio_setup,
 		.disable = audio_disable,
+		.free_func = audio_free_func,
 	},
 	.lock = __SPIN_LOCK_UNLOCKED(_audio_dev.lock),
 	.idle_reqs = LIST_HEAD_INIT(_audio_dev.idle_reqs),

From 584d9e82958e16fd3f38c9206f4ef9b64f632a18 Mon Sep 17 00:00:00 2001
From: Mark Kuo <mkuo@nvidia.com>
Date: Mon, 11 Jan 2016 19:07:12 +0800
Subject: [PATCH 035/797] CHROMIUM: usb: gadget: f_accessory: add .raw_request
 callback

After this upstream commit: 3c86726cfe38952f0366f86acfbbb025813ec1c2,
.raw_request is mandatory in hid_ll_driver structure, hence add an empty
raw_request() function.

BUG=chrome-os-partner:49140
TEST=none

Change-Id: Idd0bbe6960aad2c557376e4a24827d7e1df8e023
Signed-off-by: Mark Kuo <mkuo@nvidia.com>
Reviewed-on: https://chromium-review.googlesource.com/321038
Commit-Ready: Andrew Bresticker <abrestic@chromium.org>
Tested-by: Andrew Bresticker <abrestic@chromium.org>
Reviewed-by: Andrew Bresticker <abrestic@chromium.org>
---
 drivers/usb/gadget/function/f_accessory.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/usb/gadget/function/f_accessory.c b/drivers/usb/gadget/function/f_accessory.c
index 1be93a7ca4a1..c62123560143 100644
--- a/drivers/usb/gadget/function/f_accessory.c
+++ b/drivers/usb/gadget/function/f_accessory.c
@@ -404,12 +404,19 @@ static void acc_hid_close(struct hid_device *hid)
 {
 }
 
+static int acc_hid_raw_request(struct hid_device *hid, unsigned char reportnum,
+	__u8 *buf, size_t len, unsigned char rtype, int reqtype)
+{
+	return 0;
+}
+
 static struct hid_ll_driver acc_hid_ll_driver = {
 	.parse = acc_hid_parse,
 	.start = acc_hid_start,
 	.stop = acc_hid_stop,
 	.open = acc_hid_open,
 	.close = acc_hid_close,
+	.raw_request = acc_hid_raw_request,
 };
 
 static struct acc_hid_dev *acc_hid_new(struct acc_dev *dev,

From abf8eef2ed2e5270047a2ae524d161e0f2c04350 Mon Sep 17 00:00:00 2001
From: Badhri Jagan Sridharan <Badhri@google.com>
Date: Tue, 6 Oct 2015 20:32:01 -0700
Subject: [PATCH 036/797] ANDROID: usb: gadget: Add support for MTP OS desc

Windows requires OS specific descriptors for automatic
install of drivers for MTP devices.

https://msdn.microsoft.com/en-us/library/windows/
hardware/gg463179.aspx

BUG=24583401
BUG=chrome-os-partner:43409

Change-Id: I9397072ca3d183efbc9571c6cde3790f10d8851e
Signed-off-by: Badhri Jagan Sridharan <Badhri@google.com>
Reviewed-on: https://chromium-review.googlesource.com/304346
Commit-Ready: Andrew Bresticker <abrestic@chromium.org>
Tested-by: Andrew Bresticker <abrestic@chromium.org>
Reviewed-by: Andrew Bresticker <abrestic@chromium.org>
Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
---
 drivers/usb/gadget/function/f_mtp.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/drivers/usb/gadget/function/f_mtp.c b/drivers/usb/gadget/function/f_mtp.c
index 7f5c390885fe..74195be9b054 100644
--- a/drivers/usb/gadget/function/f_mtp.c
+++ b/drivers/usb/gadget/function/f_mtp.c
@@ -346,6 +346,8 @@ struct mtp_instance {
 	struct usb_function_instance func_inst;
 	const char *name;
 	struct mtp_dev *dev;
+	char mtp_ext_compat_id[16];
+	struct usb_os_desc mtp_os_desc;
 };
 
 /* temporary variable used between mtp_open() and mtp_gadget_bind() */
@@ -1157,6 +1159,7 @@ mtp_function_bind(struct usb_configuration *c, struct usb_function *f)
 	struct mtp_dev	*dev = func_to_mtp(f);
 	int			id;
 	int			ret;
+	struct mtp_instance *fi_mtp;
 
 	dev->cdev = cdev;
 	DBG(cdev, "mtp_function_bind dev: %p\n", dev);
@@ -1174,6 +1177,18 @@ mtp_function_bind(struct usb_configuration *c, struct usb_function *f)
 		mtp_string_defs[INTERFACE_STRING_INDEX].id = ret;
 		mtp_interface_desc.iInterface = ret;
 	}
+
+	fi_mtp = container_of(f->fi, struct mtp_instance, func_inst);
+
+	if (cdev->use_os_string) {
+		f->os_desc_table = kzalloc(sizeof(*f->os_desc_table),
+					GFP_KERNEL);
+		if (!f->os_desc_table)
+			return -ENOMEM;
+		f->os_desc_n = 1;
+		f->os_desc_table[0].os_desc = &fi_mtp->mtp_os_desc;
+	}
+
 	/* allocate endpoints */
 	ret = mtp_create_bulk_endpoints(dev, &mtp_fullspeed_in_desc,
 			&mtp_fullspeed_out_desc, &mtp_intr_desc);
@@ -1223,6 +1238,8 @@ mtp_function_unbind(struct usb_configuration *c, struct usb_function *f)
 	while ((req = mtp_req_get(dev, &dev->intr_idle)))
 		mtp_request_free(req, dev->ep_intr);
 	dev->state = STATE_OFFLINE;
+	kfree(f->os_desc_table);
+	f->os_desc_n = 0;
 }
 
 static int mtp_function_set_alt(struct usb_function *f,
@@ -1406,6 +1423,7 @@ static void mtp_free_inst(struct usb_function_instance *fi)
 	fi_mtp = to_fi_mtp(fi);
 	kfree(fi_mtp->name);
 	mtp_cleanup();
+	kfree(fi_mtp->mtp_os_desc.group.default_groups);
 	kfree(fi_mtp);
 }
 
@@ -1413,6 +1431,8 @@ struct usb_function_instance *alloc_inst_mtp_ptp(bool mtp_config)
 {
 	struct mtp_instance *fi_mtp;
 	int ret = 0;
+	struct usb_os_desc *descs[1];
+	char *names[1];
 
 	fi_mtp = kzalloc(sizeof(*fi_mtp), GFP_KERNEL);
 	if (!fi_mtp)
@@ -1420,6 +1440,13 @@ struct usb_function_instance *alloc_inst_mtp_ptp(bool mtp_config)
 	fi_mtp->func_inst.set_inst_name = mtp_set_inst_name;
 	fi_mtp->func_inst.free_func_inst = mtp_free_inst;
 
+	fi_mtp->mtp_os_desc.ext_compat_id = fi_mtp->mtp_ext_compat_id;
+	INIT_LIST_HEAD(&fi_mtp->mtp_os_desc.ext_prop);
+	descs[0] = &fi_mtp->mtp_os_desc;
+	names[0] = "MTP";
+	usb_os_desc_prepare_interf_dir(&fi_mtp->func_inst.group, 1,
+					descs, names, THIS_MODULE);
+
 	if (mtp_config) {
 		ret = mtp_setup_configfs(fi_mtp);
 		if (ret) {

From c4190692cd1fe8b3ba4a3953b3fc0d539d01560d Mon Sep 17 00:00:00 2001
From: Daniel Campello <campello@google.com>
Date: Mon, 20 Jul 2015 16:23:50 -0700
Subject: [PATCH 037/797] Included sdcardfs source code for kernel 3.0

Only included the source code as is for kernel 3.0. Following patches
take care of porting this file system to version 3.10.

Change-Id: I09e76db77cd98a059053ba5b6fd88572a4b75b5b
Signed-off-by: Daniel Campello <campello@google.com>
---
 fs/Kconfig                 |   1 +
 fs/Makefile                |   5 +-
 fs/sdcardfs/Kconfig        |  18 +
 fs/sdcardfs/Makefile       |   7 +
 fs/sdcardfs/dentry.c       | 182 ++++++++
 fs/sdcardfs/derived_perm.c | 290 ++++++++++++
 fs/sdcardfs/file.c         | 357 +++++++++++++++
 fs/sdcardfs/hashtable.h    | 190 ++++++++
 fs/sdcardfs/inode.c        | 886 +++++++++++++++++++++++++++++++++++++
 fs/sdcardfs/lookup.c       | 386 ++++++++++++++++
 fs/sdcardfs/main.c         | 425 ++++++++++++++++++
 fs/sdcardfs/mmap.c         |  82 ++++
 fs/sdcardfs/multiuser.h    |  37 ++
 fs/sdcardfs/packagelist.c  | 458 +++++++++++++++++++
 fs/sdcardfs/sdcardfs.h     | 493 +++++++++++++++++++++
 fs/sdcardfs/strtok.h       |  75 ++++
 fs/sdcardfs/super.c        | 229 ++++++++++
 include/linux/namei.h      |   3 +
 include/uapi/linux/magic.h |   2 +
 19 files changed, 4124 insertions(+), 2 deletions(-)
 create mode 100644 fs/sdcardfs/Kconfig
 create mode 100644 fs/sdcardfs/Makefile
 create mode 100644 fs/sdcardfs/dentry.c
 create mode 100644 fs/sdcardfs/derived_perm.c
 create mode 100644 fs/sdcardfs/file.c
 create mode 100644 fs/sdcardfs/hashtable.h
 create mode 100644 fs/sdcardfs/inode.c
 create mode 100644 fs/sdcardfs/lookup.c
 create mode 100644 fs/sdcardfs/main.c
 create mode 100644 fs/sdcardfs/mmap.c
 create mode 100644 fs/sdcardfs/multiuser.h
 create mode 100644 fs/sdcardfs/packagelist.c
 create mode 100644 fs/sdcardfs/sdcardfs.h
 create mode 100644 fs/sdcardfs/strtok.h
 create mode 100644 fs/sdcardfs/super.c

diff --git a/fs/Kconfig b/fs/Kconfig
index 6ce72d8d1ee1..a5d2dc39ba07 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -199,6 +199,7 @@ if MISC_FILESYSTEMS
 source "fs/adfs/Kconfig"
 source "fs/affs/Kconfig"
 source "fs/ecryptfs/Kconfig"
+source "fs/sdcardfs/Kconfig"
 source "fs/hfs/Kconfig"
 source "fs/hfsplus/Kconfig"
 source "fs/befs/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index 79f522575cba..3b54070cd629 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -3,7 +3,7 @@
 #
 # 14 Sep 2000, Christoph Hellwig <hch@infradead.org>
 # Rewritten to use lists instead of if-statements.
-# 
+#
 
 obj-y :=	open.o read_write.o file_table.o super.o \
 		char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
@@ -59,7 +59,7 @@ obj-y				+= devpts/
 
 obj-$(CONFIG_PROFILING)		+= dcookies.o
 obj-$(CONFIG_DLM)		+= dlm/
- 
+
 # Do not add any filesystems before this line
 obj-$(CONFIG_FSCACHE)		+= fscache/
 obj-$(CONFIG_REISERFS_FS)	+= reiserfs/
@@ -81,6 +81,7 @@ obj-$(CONFIG_ISO9660_FS)	+= isofs/
 obj-$(CONFIG_HFSPLUS_FS)	+= hfsplus/ # Before hfs to find wrapped HFS+
 obj-$(CONFIG_HFS_FS)		+= hfs/
 obj-$(CONFIG_ECRYPT_FS)		+= ecryptfs/
+obj-$(CONFIG_SDCARD_FS)		+= sdcardfs/
 obj-$(CONFIG_VXFS_FS)		+= freevxfs/
 obj-$(CONFIG_NFS_FS)		+= nfs/
 obj-$(CONFIG_EXPORTFS)		+= exportfs/
diff --git a/fs/sdcardfs/Kconfig b/fs/sdcardfs/Kconfig
new file mode 100644
index 000000000000..657f4958e8d6
--- /dev/null
+++ b/fs/sdcardfs/Kconfig
@@ -0,0 +1,18 @@
+config SDCARD_FS
+	tristate "sdcard file system"
+	depends on EXPERIMENTAL
+	default n
+	help
+	  Sdcardfs is based on Wrapfs file system.
+
+config SDCARD_FS_FADV_NOACTIVE
+	bool "sdcardfs fadvise noactive support"
+	depends on FADV_NOACTIVE
+	default y
+	help
+	  Sdcardfs supports fadvise noactive mode.
+
+config SDCARD_FS_CI_SEARCH
+	tristate "sdcardfs case-insensitive search support"
+	depends on SDCARD_FS
+	default y
diff --git a/fs/sdcardfs/Makefile b/fs/sdcardfs/Makefile
new file mode 100644
index 000000000000..b84fbb2b45a4
--- /dev/null
+++ b/fs/sdcardfs/Makefile
@@ -0,0 +1,7 @@
+SDCARDFS_VERSION="0.1"
+
+EXTRA_CFLAGS += -DSDCARDFS_VERSION=\"$(SDCARDFS_VERSION)\"
+
+obj-$(CONFIG_SDCARD_FS) += sdcardfs.o
+
+sdcardfs-y := dentry.o file.o inode.o main.o super.o lookup.o mmap.o packagelist.o derived_perm.o
diff --git a/fs/sdcardfs/dentry.c b/fs/sdcardfs/dentry.c
new file mode 100644
index 000000000000..4572a5403bb2
--- /dev/null
+++ b/fs/sdcardfs/dentry.c
@@ -0,0 +1,182 @@
+/*
+ * fs/sdcardfs/dentry.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+#include "linux/ctype.h"
+
+/*
+ * returns: -ERRNO if error (returned to user)
+ *          0: tell VFS to invalidate dentry
+ *          1: dentry is valid
+ */
+static int sdcardfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
+	int err = 1;
+	struct path parent_lower_path, lower_path;
+	struct dentry *parent_dentry = NULL;
+	struct dentry *parent_lower_dentry = NULL;
+	struct dentry *lower_cur_parent_dentry = NULL;
+	struct dentry *lower_dentry = NULL;
+
+	if (nd && nd->flags & LOOKUP_RCU)
+		return -ECHILD;
+
+	spin_lock(&dentry->d_lock);
+	if (IS_ROOT(dentry)) {
+		spin_unlock(&dentry->d_lock);
+		return 1;
+	}
+	spin_unlock(&dentry->d_lock);
+
+	/* check uninitialized obb_dentry and
+	 * whether the base obbpath has been changed or not */
+	if (is_obbpath_invalid(dentry)) {
+		d_drop(dentry);
+		return 0;
+	}
+
+	parent_dentry = dget_parent(dentry);
+	sdcardfs_get_lower_path(parent_dentry, &parent_lower_path);
+	sdcardfs_get_real_lower(dentry, &lower_path);
+	parent_lower_dentry = parent_lower_path.dentry;
+	lower_dentry = lower_path.dentry;
+	lower_cur_parent_dentry = dget_parent(lower_dentry);
+
+	spin_lock(&lower_dentry->d_lock);
+	if (d_unhashed(lower_dentry)) {
+		spin_unlock(&lower_dentry->d_lock);
+		d_drop(dentry);
+		err = 0;
+		goto out;
+	}
+	spin_unlock(&lower_dentry->d_lock);
+
+	if (parent_lower_dentry != lower_cur_parent_dentry) {
+		d_drop(dentry);
+		err = 0;
+		goto out;
+	}
+
+	if (dentry < lower_dentry) {
+		spin_lock(&dentry->d_lock);
+		spin_lock(&lower_dentry->d_lock);
+	} else {
+		spin_lock(&lower_dentry->d_lock);
+		spin_lock(&dentry->d_lock);
+	}
+
+	if (dentry->d_name.len != lower_dentry->d_name.len) {
+		__d_drop(dentry);
+		err = 0;
+	} else if (strncasecmp(dentry->d_name.name, lower_dentry->d_name.name,
+				dentry->d_name.len) != 0) {
+		__d_drop(dentry);
+		err = 0;
+	}
+
+	if (dentry < lower_dentry) {
+		spin_unlock(&lower_dentry->d_lock);
+		spin_unlock(&dentry->d_lock);
+	} else {
+		spin_unlock(&dentry->d_lock);
+		spin_unlock(&lower_dentry->d_lock);
+	}
+
+out:
+	dput(parent_dentry);
+	dput(lower_cur_parent_dentry);
+	sdcardfs_put_lower_path(parent_dentry, &parent_lower_path);
+	sdcardfs_put_real_lower(dentry, &lower_path);
+	return err;
+}
+
+static void sdcardfs_d_release(struct dentry *dentry)
+{
+	/* release and reset the lower paths */
+	if(has_graft_path(dentry)) {
+		sdcardfs_put_reset_orig_path(dentry);
+	}
+	sdcardfs_put_reset_lower_path(dentry);
+	free_dentry_private_data(dentry);
+	return;
+}
+
+static int sdcardfs_hash_ci(const struct dentry *dentry,
+				const struct inode *inode, struct qstr *qstr)
+{
+	/*
+	 * This function is copy of vfat_hashi.
+	 * FIXME Should we support national language?
+	 *       Refer to vfat_hashi()
+	 * struct nls_table *t = MSDOS_SB(dentry->d_sb)->nls_io;
+	 */
+	const unsigned char *name;
+	unsigned int len;
+	unsigned long hash;
+
+	name = qstr->name;
+	//len = vfat_striptail_len(qstr);
+	len = qstr->len;
+
+	hash = init_name_hash();
+	while (len--)
+		//hash = partial_name_hash(nls_tolower(t, *name++), hash);
+		hash = partial_name_hash(tolower(*name++), hash);
+	qstr->hash = end_name_hash(hash);
+
+	return 0;
+}
+
+/*
+ * Case insensitive compare of two vfat names.
+ */
+static int sdcardfs_cmp_ci(const struct dentry *parent,
+		const struct inode *pinode,
+		const struct dentry *dentry, const struct inode *inode,
+		unsigned int len, const char *str, const struct qstr *name)
+{
+	/* This function is copy of vfat_cmpi */
+	// FIXME Should we support national language?
+	//struct nls_table *t = MSDOS_SB(parent->d_sb)->nls_io;
+	//unsigned int alen, blen;
+
+	/* A filename cannot end in '.' or we treat it like it has none */
+	/*
+	alen = vfat_striptail_len(name);
+	blen = __vfat_striptail_len(len, str);
+	if (alen == blen) {
+		if (nls_strnicmp(t, name->name, str, alen) == 0)
+			return 0;
+	}
+	*/
+	if (name->len == len) {
+		if (strncasecmp(name->name, str, len) == 0)
+			return 0;
+	}
+	return 1;
+}
+
+const struct dentry_operations sdcardfs_ci_dops = {
+	.d_revalidate	= sdcardfs_d_revalidate,
+	.d_release	= sdcardfs_d_release,
+	.d_hash 	= sdcardfs_hash_ci,
+	.d_compare	= sdcardfs_cmp_ci,
+};
+
diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c
new file mode 100644
index 000000000000..00c33a471dcc
--- /dev/null
+++ b/fs/sdcardfs/derived_perm.c
@@ -0,0 +1,290 @@
+/*
+ * fs/sdcardfs/derived_perm.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+
+/* copy derived state from parent inode */
+static void inherit_derived_state(struct inode *parent, struct inode *child)
+{
+	struct sdcardfs_inode_info *pi = SDCARDFS_I(parent);
+	struct sdcardfs_inode_info *ci = SDCARDFS_I(child);
+
+	ci->perm = PERM_INHERIT;
+	ci->userid = pi->userid;
+	ci->d_uid = pi->d_uid;
+	ci->d_gid = pi->d_gid;
+	ci->d_mode = pi->d_mode;
+}
+
+/* helper function for derived state */
+void setup_derived_state(struct inode *inode, perm_t perm,
+                        userid_t userid, uid_t uid, gid_t gid, mode_t mode)
+{
+	struct sdcardfs_inode_info *info = SDCARDFS_I(inode);
+
+	info->perm = perm;
+	info->userid = userid;
+	info->d_uid = uid;
+	info->d_gid = gid;
+	info->d_mode = mode;
+}
+
+void get_derived_permission(struct dentry *parent, struct dentry *dentry)
+{
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+	struct sdcardfs_inode_info *info = SDCARDFS_I(dentry->d_inode);
+	struct sdcardfs_inode_info *parent_info= SDCARDFS_I(parent->d_inode);
+	appid_t appid;
+
+	/* By default, each inode inherits from its parent.
+	 * the properties are maintained on its private fields
+	 * because the inode attributes will be modified with that of
+	 * its lower inode.
+	 * The derived state will be updated on the last
+	 * stage of each system call by fix_derived_permission(inode).
+	 */
+
+	inherit_derived_state(parent->d_inode, dentry->d_inode);
+
+	//printk(KERN_INFO "sdcardfs: derived: %s, %s, %d\n", parent->d_name.name,
+	//				dentry->d_name.name, parent_info->perm);
+
+	if (sbi->options.derive == DERIVE_NONE) {
+		return;
+	}
+
+	/* Derive custom permissions based on parent and current node */
+	switch (parent_info->perm) {
+		case PERM_INHERIT:
+			/* Already inherited above */
+			break;
+		case PERM_LEGACY_PRE_ROOT:
+			/* Legacy internal layout places users at top level */
+			info->perm = PERM_ROOT;
+			info->userid = simple_strtoul(dentry->d_name.name, NULL, 10);
+			break;
+		case PERM_ROOT:
+			/* Assume masked off by default. */
+			info->d_mode = 00770;
+			if (!strcasecmp(dentry->d_name.name, "Android")) {
+				/* App-specific directories inside; let anyone traverse */
+				info->perm = PERM_ANDROID;
+				info->d_mode = 00771;
+			} else if (sbi->options.split_perms) {
+				if (!strcasecmp(dentry->d_name.name, "DCIM")
+					|| !strcasecmp(dentry->d_name.name, "Pictures")) {
+					info->d_gid = AID_SDCARD_PICS;
+				} else if (!strcasecmp(dentry->d_name.name, "Alarms")
+						|| !strcasecmp(dentry->d_name.name, "Movies")
+						|| !strcasecmp(dentry->d_name.name, "Music")
+						|| !strcasecmp(dentry->d_name.name, "Notifications")
+						|| !strcasecmp(dentry->d_name.name, "Podcasts")
+						|| !strcasecmp(dentry->d_name.name, "Ringtones")) {
+					info->d_gid = AID_SDCARD_AV;
+				}
+			}
+			break;
+		case PERM_ANDROID:
+			if (!strcasecmp(dentry->d_name.name, "data")) {
+				/* App-specific directories inside; let anyone traverse */
+				info->perm = PERM_ANDROID_DATA;
+				info->d_mode = 00771;
+			} else if (!strcasecmp(dentry->d_name.name, "obb")) {
+				/* App-specific directories inside; let anyone traverse */
+				info->perm = PERM_ANDROID_OBB;
+				info->d_mode = 00771;
+				// FIXME : this feature will be implemented later.
+				/* Single OBB directory is always shared */
+			} else if (!strcasecmp(dentry->d_name.name, "user")) {
+				/* User directories must only be accessible to system, protected
+				 * by sdcard_all. Zygote will bind mount the appropriate user-
+				 * specific path. */
+				info->perm = PERM_ANDROID_USER;
+				info->d_gid = AID_SDCARD_ALL;
+				info->d_mode = 00770;
+			}
+			break;
+		/* same policy will be applied on PERM_ANDROID_DATA
+		 * and PERM_ANDROID_OBB */
+		case PERM_ANDROID_DATA:
+		case PERM_ANDROID_OBB:
+			appid = get_appid(sbi->pkgl_id, dentry->d_name.name);
+			if (appid != 0) {
+				info->d_uid = multiuser_get_uid(parent_info->userid, appid);
+			}
+			info->d_mode = 00770;
+			break;
+		case PERM_ANDROID_USER:
+			/* Root of a secondary user */
+			info->perm = PERM_ROOT;
+			info->userid = simple_strtoul(dentry->d_name.name, NULL, 10);
+			info->d_gid = AID_SDCARD_R;
+			info->d_mode = 00771;
+			break;
+	}
+}
+
+/* main function for updating derived permission */
+inline void update_derived_permission(struct dentry *dentry)
+{
+	struct dentry *parent;
+
+	if(!dentry || !dentry->d_inode) {
+		printk(KERN_ERR "sdcardfs: %s: invalid dentry\n", __func__);
+		return;
+	}
+	/* FIXME:
+	 * 1. need to check whether the dentry is updated or not
+	 * 2. remove the root dentry update
+	 */
+	if(IS_ROOT(dentry)) {
+		//setup_default_pre_root_state(dentry->d_inode);
+	} else {
+		parent = dget_parent(dentry);
+		if(parent) {
+			get_derived_permission(parent, dentry);
+			dput(parent);
+		}
+	}
+	fix_derived_permission(dentry->d_inode);
+}
+
+int need_graft_path(struct dentry *dentry)
+{
+	int ret = 0;
+	struct dentry *parent = dget_parent(dentry);
+	struct sdcardfs_inode_info *parent_info= SDCARDFS_I(parent->d_inode);
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+
+	if(parent_info->perm == PERM_ANDROID &&
+			!strcasecmp(dentry->d_name.name, "obb")) {
+
+		/* /Android/obb is the base obbpath of DERIVED_UNIFIED */
+		if(!(sbi->options.derive == DERIVE_UNIFIED
+				&& parent_info->userid == 0)) {
+			ret = 1;
+		}
+	}
+	dput(parent);
+	return ret;
+}
+
+int is_obbpath_invalid(struct dentry *dent)
+{
+	int ret = 0;
+	struct sdcardfs_dentry_info *di = SDCARDFS_D(dent);
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dent->d_sb);
+	char *path_buf, *obbpath_s;
+
+	/* check the base obbpath has been changed.
+	 * this routine can check an uninitialized obb dentry as well.
+	 * regarding the uninitialized obb, refer to the sdcardfs_mkdir() */
+	spin_lock(&di->lock);
+	if(di->orig_path.dentry) {
+ 		if(!di->lower_path.dentry) {
+			ret = 1;
+		} else {
+			path_get(&di->lower_path);
+			//lower_parent = lock_parent(lower_path->dentry);
+
+			path_buf = kmalloc(PATH_MAX, GFP_ATOMIC);
+			if(!path_buf) {
+				ret = 1;
+				printk(KERN_ERR "sdcardfs: "
+					"fail to allocate path_buf in %s.\n", __func__);
+			} else {
+				obbpath_s = d_path(&di->lower_path, path_buf, PATH_MAX);
+				if (d_unhashed(di->lower_path.dentry) ||
+					strcasecmp(sbi->obbpath_s, obbpath_s)) {
+					ret = 1;
+				}
+				kfree(path_buf);
+			}
+
+			//unlock_dir(lower_parent);
+			path_put(&di->lower_path);
+		}
+	}
+	spin_unlock(&di->lock);
+	return ret;
+}
+
+int is_base_obbpath(struct dentry *dentry)
+{
+	int ret = 0;
+	struct dentry *parent = dget_parent(dentry);
+	struct sdcardfs_inode_info *parent_info= SDCARDFS_I(parent->d_inode);
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+
+	spin_lock(&SDCARDFS_D(dentry)->lock);
+	/* DERIVED_LEGACY */
+	if(parent_info->perm == PERM_LEGACY_PRE_ROOT &&
+			!strcasecmp(dentry->d_name.name, "obb")) {
+		ret = 1;
+	}
+	/* DERIVED_UNIFIED :/Android/obb is the base obbpath */
+	else if (parent_info->perm == PERM_ANDROID &&
+			!strcasecmp(dentry->d_name.name, "obb")) {
+		if((sbi->options.derive == DERIVE_UNIFIED
+				&& parent_info->userid == 0)) {
+			ret = 1;
+		}
+	}
+	spin_unlock(&SDCARDFS_D(dentry)->lock);
+	dput(parent);
+	return ret;
+}
+
+/* The lower_path will be stored to the dentry's orig_path
+ * and the base obbpath will be copyed to the lower_path variable.
+ * if an error returned, there's no change in the lower_path
+ * returns: -ERRNO if error (0: no error) */
+int setup_obb_dentry(struct dentry *dentry, struct path *lower_path)
+{
+	int err = 0;
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+	struct path obbpath;
+
+	/* A local obb dentry must have its own orig_path to support rmdir
+	 * and mkdir of itself. Usually, we expect that the sbi->obbpath
+	 * is avaiable on this stage. */
+	sdcardfs_set_orig_path(dentry, lower_path);
+
+	err = kern_path(sbi->obbpath_s,
+			LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &obbpath);
+
+	if(!err) {
+		/* the obbpath base has been found */
+		printk(KERN_INFO "sdcardfs: "
+				"the sbi->obbpath is found\n");
+		pathcpy(lower_path, &obbpath);
+	} else {
+		/* if the sbi->obbpath is not available, we can optionally
+		 * setup the lower_path with its orig_path.
+		 * but, the current implementation just returns an error
+		 * because the sdcard daemon also regards this case as
+		 * a lookup fail. */
+		printk(KERN_INFO "sdcardfs: "
+				"the sbi->obbpath is not available\n");
+	}
+	return err;
+}
+
+
diff --git a/fs/sdcardfs/file.c b/fs/sdcardfs/file.c
new file mode 100644
index 000000000000..bcacb947c874
--- /dev/null
+++ b/fs/sdcardfs/file.c
@@ -0,0 +1,357 @@
+/*
+ * fs/sdcardfs/file.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+#ifdef CONFIG_SDCARD_FS_FADV_NOACTIVE
+#include <linux/backing-dev.h>
+#endif
+
+static ssize_t sdcardfs_read(struct file *file, char __user *buf,
+			   size_t count, loff_t *ppos)
+{
+	int err;
+	struct file *lower_file;
+	struct dentry *dentry = file->f_path.dentry;
+#ifdef CONFIG_SDCARD_FS_FADV_NOACTIVE
+	struct backing_dev_info *bdi;
+#endif
+
+	lower_file = sdcardfs_lower_file(file);
+
+#ifdef CONFIG_SDCARD_FS_FADV_NOACTIVE
+	if (file->f_mode & FMODE_NOACTIVE) {
+		if (!(lower_file->f_mode & FMODE_NOACTIVE)) {
+			bdi = lower_file->f_mapping->backing_dev_info;
+			lower_file->f_ra.ra_pages = bdi->ra_pages * 2;
+			spin_lock(&lower_file->f_lock);
+			lower_file->f_mode |= FMODE_NOACTIVE;
+			spin_unlock(&lower_file->f_lock);
+		}
+	}
+#endif
+
+	err = vfs_read(lower_file, buf, count, ppos);
+	/* update our inode atime upon a successful lower read */
+	if (err >= 0)
+		fsstack_copy_attr_atime(dentry->d_inode,
+					lower_file->f_path.dentry->d_inode);
+
+	return err;
+}
+
+static ssize_t sdcardfs_write(struct file *file, const char __user *buf,
+			    size_t count, loff_t *ppos)
+{
+	int err = 0;
+	struct file *lower_file;
+	struct dentry *dentry = file->f_path.dentry;
+
+	/* check disk space */
+	if (!check_min_free_space(dentry, count, 0)) {
+		printk(KERN_INFO "No minimum free space.\n");
+		return -ENOSPC;
+	}
+
+	lower_file = sdcardfs_lower_file(file);
+	err = vfs_write(lower_file, buf, count, ppos);
+	/* update our inode times+sizes upon a successful lower write */
+	if (err >= 0) {
+		fsstack_copy_inode_size(dentry->d_inode,
+					lower_file->f_path.dentry->d_inode);
+		fsstack_copy_attr_times(dentry->d_inode,
+					lower_file->f_path.dentry->d_inode);
+	}
+
+	return err;
+}
+
+static int sdcardfs_readdir(struct file *file, void *dirent, filldir_t filldir)
+{
+	int err = 0;
+	struct file *lower_file = NULL;
+	struct dentry *dentry = file->f_path.dentry;
+
+	lower_file = sdcardfs_lower_file(file);
+
+	lower_file->f_pos = file->f_pos;
+	err = vfs_readdir(lower_file, filldir, dirent);
+	file->f_pos = lower_file->f_pos;
+	if (err >= 0)		/* copy the atime */
+		fsstack_copy_attr_atime(dentry->d_inode,
+					lower_file->f_path.dentry->d_inode);
+	return err;
+}
+
+static long sdcardfs_unlocked_ioctl(struct file *file, unsigned int cmd,
+				  unsigned long arg)
+{
+	long err = -ENOTTY;
+	struct file *lower_file;
+
+	lower_file = sdcardfs_lower_file(file);
+
+	/* XXX: use vfs_ioctl if/when VFS exports it */
+	if (!lower_file || !lower_file->f_op)
+		goto out;
+	if (lower_file->f_op->unlocked_ioctl)
+		err = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
+
+out:
+	return err;
+}
+
+#ifdef CONFIG_COMPAT
+static long sdcardfs_compat_ioctl(struct file *file, unsigned int cmd,
+				unsigned long arg)
+{
+	long err = -ENOTTY;
+	struct file *lower_file;
+
+	lower_file = sdcardfs_lower_file(file);
+
+	/* XXX: use vfs_ioctl if/when VFS exports it */
+	if (!lower_file || !lower_file->f_op)
+		goto out;
+	if (lower_file->f_op->compat_ioctl)
+		err = lower_file->f_op->compat_ioctl(lower_file, cmd, arg);
+
+out:
+	return err;
+}
+#endif
+
+static int sdcardfs_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	int err = 0;
+	bool willwrite;
+	struct file *lower_file;
+	const struct vm_operations_struct *saved_vm_ops = NULL;
+
+	/* this might be deferred to mmap's writepage */
+	willwrite = ((vma->vm_flags | VM_SHARED | VM_WRITE) == vma->vm_flags);
+
+	/*
+	 * File systems which do not implement ->writepage may use
+	 * generic_file_readonly_mmap as their ->mmap op.  If you call
+	 * generic_file_readonly_mmap with VM_WRITE, you'd get an -EINVAL.
+	 * But we cannot call the lower ->mmap op, so we can't tell that
+	 * writeable mappings won't work.  Therefore, our only choice is to
+	 * check if the lower file system supports the ->writepage, and if
+	 * not, return EINVAL (the same error that
+	 * generic_file_readonly_mmap returns in that case).
+	 */
+	lower_file = sdcardfs_lower_file(file);
+	if (willwrite && !lower_file->f_mapping->a_ops->writepage) {
+		err = -EINVAL;
+		printk(KERN_ERR "sdcardfs: lower file system does not "
+		       "support writeable mmap\n");
+		goto out;
+	}
+
+	/*
+	 * find and save lower vm_ops.
+	 *
+	 * XXX: the VFS should have a cleaner way of finding the lower vm_ops
+	 */
+	if (!SDCARDFS_F(file)->lower_vm_ops) {
+		err = lower_file->f_op->mmap(lower_file, vma);
+		if (err) {
+			printk(KERN_ERR "sdcardfs: lower mmap failed %d\n", err);
+			goto out;
+		}
+		saved_vm_ops = vma->vm_ops; /* save: came from lower ->mmap */
+		err = do_munmap(current->mm, vma->vm_start,
+				vma->vm_end - vma->vm_start);
+		if (err) {
+			printk(KERN_ERR "sdcardfs: do_munmap failed %d\n", err);
+			goto out;
+		}
+	}
+
+	/*
+	 * Next 3 lines are all I need from generic_file_mmap.  I definitely
+	 * don't want its test for ->readpage which returns -ENOEXEC.
+	 */
+	file_accessed(file);
+	vma->vm_ops = &sdcardfs_vm_ops;
+	vma->vm_flags |= VM_CAN_NONLINEAR;
+
+	file->f_mapping->a_ops = &sdcardfs_aops; /* set our aops */
+	if (!SDCARDFS_F(file)->lower_vm_ops) /* save for our ->fault */
+		SDCARDFS_F(file)->lower_vm_ops = saved_vm_ops;
+
+out:
+	return err;
+}
+
+static int sdcardfs_open(struct inode *inode, struct file *file)
+{
+	int err = 0;
+	struct file *lower_file = NULL;
+	struct path lower_path;
+	struct dentry *dentry = file->f_path.dentry;
+	struct dentry *parent = dget_parent(dentry);
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+	const struct cred *saved_cred = NULL;
+	int has_rw;
+
+	/* don't open unhashed/deleted files */
+	if (d_unhashed(dentry)) {
+		err = -ENOENT;
+		goto out_err;
+	}
+
+	has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive);
+
+	if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name,
+				sbi->options.derive,
+				open_flags_to_access_mode(file->f_flags), has_rw)) {
+		printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n"
+                         "	dentry: %s, task:%s\n",
+						 __func__, dentry->d_name.name, current->comm);
+		err = -EACCES;
+		goto out_err;
+	}
+
+	/* save current_cred and override it */
+	OVERRIDE_CRED(sbi, saved_cred);
+
+	file->private_data =
+		kzalloc(sizeof(struct sdcardfs_file_info), GFP_KERNEL);
+	if (!SDCARDFS_F(file)) {
+		err = -ENOMEM;
+		goto out_revert_cred;
+	}
+
+	/* open lower object and link sdcardfs's file struct to lower's */
+	sdcardfs_get_lower_path(file->f_path.dentry, &lower_path);
+	lower_file = dentry_open(lower_path.dentry, lower_path.mnt,
+				 file->f_flags, current_cred());
+	if (IS_ERR(lower_file)) {
+		err = PTR_ERR(lower_file);
+		lower_file = sdcardfs_lower_file(file);
+		if (lower_file) {
+			sdcardfs_set_lower_file(file, NULL);
+			fput(lower_file); /* fput calls dput for lower_dentry */
+		}
+	} else {
+		sdcardfs_set_lower_file(file, lower_file);
+	}
+
+	if (err)
+		kfree(SDCARDFS_F(file));
+	else {
+		fsstack_copy_attr_all(inode, sdcardfs_lower_inode(inode));
+		fix_derived_permission(inode);
+	}
+
+out_revert_cred:
+	REVERT_CRED(saved_cred);
+out_err:
+	dput(parent);
+	return err;
+}
+
+static int sdcardfs_flush(struct file *file, fl_owner_t id)
+{
+	int err = 0;
+	struct file *lower_file = NULL;
+
+	lower_file = sdcardfs_lower_file(file);
+	if (lower_file && lower_file->f_op && lower_file->f_op->flush)
+		err = lower_file->f_op->flush(lower_file, id);
+
+	return err;
+}
+
+/* release all lower object references & free the file info structure */
+static int sdcardfs_file_release(struct inode *inode, struct file *file)
+{
+	struct file *lower_file;
+
+	lower_file = sdcardfs_lower_file(file);
+	if (lower_file) {
+		sdcardfs_set_lower_file(file, NULL);
+		fput(lower_file);
+	}
+
+	kfree(SDCARDFS_F(file));
+	return 0;
+}
+
+static int
+sdcardfs_fsync(struct file *file, int datasync)
+{
+	int err;
+	struct file *lower_file;
+	struct path lower_path;
+	struct dentry *dentry = file->f_path.dentry;
+
+	lower_file = sdcardfs_lower_file(file);
+	sdcardfs_get_lower_path(dentry, &lower_path);
+	err = vfs_fsync(lower_file, datasync);
+	sdcardfs_put_lower_path(dentry, &lower_path);
+
+	return err;
+}
+
+static int sdcardfs_fasync(int fd, struct file *file, int flag)
+{
+	int err = 0;
+	struct file *lower_file = NULL;
+
+	lower_file = sdcardfs_lower_file(file);
+	if (lower_file->f_op && lower_file->f_op->fasync)
+		err = lower_file->f_op->fasync(fd, lower_file, flag);
+
+	return err;
+}
+
+const struct file_operations sdcardfs_main_fops = {
+	.llseek		= generic_file_llseek,
+	.read		= sdcardfs_read,
+	.write		= sdcardfs_write,
+	.unlocked_ioctl	= sdcardfs_unlocked_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= sdcardfs_compat_ioctl,
+#endif
+	.mmap		= sdcardfs_mmap,
+	.open		= sdcardfs_open,
+	.flush		= sdcardfs_flush,
+	.release	= sdcardfs_file_release,
+	.fsync		= sdcardfs_fsync,
+	.fasync		= sdcardfs_fasync,
+};
+
+/* trimmed directory options */
+const struct file_operations sdcardfs_dir_fops = {
+	.llseek		= generic_file_llseek,
+	.read		= generic_read_dir,
+	.readdir	= sdcardfs_readdir,
+	.unlocked_ioctl	= sdcardfs_unlocked_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= sdcardfs_compat_ioctl,
+#endif
+	.open		= sdcardfs_open,
+	.release	= sdcardfs_file_release,
+	.flush		= sdcardfs_flush,
+	.fsync		= sdcardfs_fsync,
+	.fasync		= sdcardfs_fasync,
+};
diff --git a/fs/sdcardfs/hashtable.h b/fs/sdcardfs/hashtable.h
new file mode 100644
index 000000000000..1e770f3df148
--- /dev/null
+++ b/fs/sdcardfs/hashtable.h
@@ -0,0 +1,190 @@
+/*
+ * Statically sized hash table implementation
+ * (C) 2012  Sasha Levin <levinsasha928@gmail.com>
+ */
+
+#ifndef _LINUX_HASHTABLE_H
+#define _LINUX_HASHTABLE_H
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/hash.h>
+#include <linux/rculist.h>
+
+#define DEFINE_HASHTABLE(name, bits)                                            \
+        struct hlist_head name[1 << (bits)] =                                   \
+                        { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT }
+
+#define DECLARE_HASHTABLE(name, bits)                                           \
+        struct hlist_head name[1 << (bits)]
+
+#define HASH_SIZE(name) (ARRAY_SIZE(name))
+#define HASH_BITS(name) ilog2(HASH_SIZE(name))
+
+/* Use hash_32 when possible to allow for fast 32bit hashing in 64bit kernels. */
+#define hash_min(val, bits)                                                     \
+        (sizeof(val) <= 4 ? hash_32(val, bits) : hash_long(val, bits))
+
+static inline void __hash_init(struct hlist_head *ht, unsigned int sz)
+{
+        unsigned int i;
+
+        for (i = 0; i < sz; i++)
+                INIT_HLIST_HEAD(&ht[i]);
+}
+
+/**
+ * hash_init - initialize a hash table
+ * @hashtable: hashtable to be initialized
+ *
+ * Calculates the size of the hashtable from the given parameter, otherwise
+ * same as hash_init_size.
+ *
+ * This has to be a macro since HASH_BITS() will not work on pointers since
+ * it calculates the size during preprocessing.
+ */
+#define hash_init(hashtable) __hash_init(hashtable, HASH_SIZE(hashtable))
+
+/**
+ * hash_add - add an object to a hashtable
+ * @hashtable: hashtable to add to
+ * @node: the &struct hlist_node of the object to be added
+ * @key: the key of the object to be added
+ */
+#define hash_add(hashtable, node, key)                                          \
+        hlist_add_head(node, &hashtable[hash_min(key, HASH_BITS(hashtable))])
+
+/**
+ * hash_add_rcu - add an object to a rcu enabled hashtable
+ * @hashtable: hashtable to add to
+ * @node: the &struct hlist_node of the object to be added
+ * @key: the key of the object to be added
+ */
+#define hash_add_rcu(hashtable, node, key)                                      \
+        hlist_add_head_rcu(node, &hashtable[hash_min(key, HASH_BITS(hashtable))])
+
+/**
+ * hash_hashed - check whether an object is in any hashtable
+ * @node: the &struct hlist_node of the object to be checked
+ */
+static inline bool hash_hashed(struct hlist_node *node)
+{
+        return !hlist_unhashed(node);
+}
+
+static inline bool __hash_empty(struct hlist_head *ht, unsigned int sz)
+{
+        unsigned int i;
+
+        for (i = 0; i < sz; i++)
+                if (!hlist_empty(&ht[i]))
+                        return false;
+
+        return true;
+}
+
+/**
+ * hash_empty - check whether a hashtable is empty
+ * @hashtable: hashtable to check
+ *
+ * This has to be a macro since HASH_BITS() will not work on pointers since
+ * it calculates the size during preprocessing.
+ */
+#define hash_empty(hashtable) __hash_empty(hashtable, HASH_SIZE(hashtable))
+
+/**
+ * hash_del - remove an object from a hashtable
+ * @node: &struct hlist_node of the object to remove
+ */
+static inline void hash_del(struct hlist_node *node)
+{
+        hlist_del_init(node);
+}
+
+/**
+ * hash_del_rcu - remove an object from a rcu enabled hashtable
+ * @node: &struct hlist_node of the object to remove
+ */
+static inline void hash_del_rcu(struct hlist_node *node)
+{
+        hlist_del_init_rcu(node);
+}
+
+/**
+ * hash_for_each - iterate over a hashtable
+ * @name: hashtable to iterate
+ * @bkt: integer to use as bucket loop cursor
+ * @obj: the type * to use as a loop cursor for each entry
+ * @member: the name of the hlist_node within the struct
+ */
+#define hash_for_each(name, bkt, obj, member, pos)                           \
+        for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\
+                        (bkt)++)\
+                hlist_for_each_entry(obj, pos, &name[bkt], member)
+
+/**
+ * hash_for_each_rcu - iterate over a rcu enabled hashtable
+ * @name: hashtable to iterate
+ * @bkt: integer to use as bucket loop cursor
+ * @obj: the type * to use as a loop cursor for each entry
+ * @member: the name of the hlist_node within the struct
+ */
+#define hash_for_each_rcu(name, bkt, obj, member)                       \
+        for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\
+                        (bkt)++)\
+                hlist_for_each_entry_rcu(obj, &name[bkt], member)
+
+/**
+ * hash_for_each_safe - iterate over a hashtable safe against removal of
+ * hash entry
+ * @name: hashtable to iterate
+ * @bkt: integer to use as bucket loop cursor
+ * @tmp: a &struct used for temporary storage
+ * @obj: the type * to use as a loop cursor for each entry
+ * @member: the name of the hlist_node within the struct
+ */
+#define hash_for_each_safe(name, bkt, tmp, obj, member, pos)                 \
+        for ((bkt) = 0, obj = NULL; (bkt) < HASH_SIZE(name);\
+                        (bkt)++)\
+                hlist_for_each_entry_safe(obj, pos, tmp, &name[bkt], member)
+
+/**
+ * hash_for_each_possible - iterate over all possible objects hashing to the
+ * same bucket
+ * @name: hashtable to iterate
+ * @obj: the type * to use as a loop cursor for each entry
+ * @member: the name of the hlist_node within the struct
+ * @key: the key of the objects to iterate over
+ */
+#define hash_for_each_possible(name, obj, member, key, pos)                  \
+        hlist_for_each_entry(obj, pos, &name[hash_min(key, HASH_BITS(name))], member)
+
+/**
+ * hash_for_each_possible_rcu - iterate over all possible objects hashing to the
+ * same bucket in an rcu enabled hashtable
+ * in a rcu enabled hashtable
+ * @name: hashtable to iterate
+ * @obj: the type * to use as a loop cursor for each entry
+ * @member: the name of the hlist_node within the struct
+ * @key: the key of the objects to iterate over
+ */
+#define hash_for_each_possible_rcu(name, obj, member, key)              \
+        hlist_for_each_entry_rcu(obj, &name[hash_min(key, HASH_BITS(name))],\
+                member)
+
+/**
+ * hash_for_each_possible_safe - iterate over all possible objects hashing to the
+ * same bucket safe against removals
+ * @name: hashtable to iterate
+ * @obj: the type * to use as a loop cursor for each entry
+ * @tmp: a &struct used for temporary storage
+ * @member: the name of the hlist_node within the struct
+ * @key: the key of the objects to iterate over
+ */
+#define hash_for_each_possible_safe(name, obj, tmp, member, key)        \
+        hlist_for_each_entry_safe(obj, tmp,\
+                &name[hash_min(key, HASH_BITS(name))], member)
+
+
+#endif
diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c
new file mode 100644
index 000000000000..e8ed04250ed1
--- /dev/null
+++ b/fs/sdcardfs/inode.c
@@ -0,0 +1,886 @@
+/*
+ * fs/sdcardfs/inode.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+
+/* Do not directly use this function. Use OVERRIDE_CRED() instead. */
+const struct cred * override_fsids(struct sdcardfs_sb_info* sbi)
+{
+	struct cred * cred;
+	const struct cred * old_cred;
+
+	cred = prepare_creds();
+	if (!cred)
+		return NULL;
+
+	cred->fsuid = sbi->options.fs_low_uid;
+	cred->fsgid = sbi->options.fs_low_gid;
+
+	old_cred = override_creds(cred);
+
+	return old_cred;
+}
+
+/* Do not directly use this function, use REVERT_CRED() instead. */
+void revert_fsids(const struct cred * old_cred)
+{
+	const struct cred * cur_cred;
+
+	cur_cred = current->cred;
+	revert_creds(old_cred);
+	put_cred(cur_cred);
+}
+
+static int sdcardfs_create(struct inode *dir, struct dentry *dentry,
+			 int mode, struct nameidata *nd)
+{
+	int err = 0;
+	struct dentry *lower_dentry;
+	struct dentry *lower_parent_dentry = NULL;
+	struct path lower_path, saved_path;
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+	const struct cred *saved_cred = NULL;
+
+	int has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive);
+	if(!check_caller_access_to_name(dir, dentry->d_name.name, sbi->options.derive, 1, has_rw)) {
+		printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n"
+						 "  dentry: %s, task:%s\n",
+						 __func__, dentry->d_name.name, current->comm);
+		err = -EACCES;
+		goto out_eacces;
+	}
+
+	/* save current_cred and override it */
+	OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred);
+
+	sdcardfs_get_lower_path(dentry, &lower_path);
+	lower_dentry = lower_path.dentry;
+	lower_parent_dentry = lock_parent(lower_dentry);
+
+	err = mnt_want_write(lower_path.mnt);
+	if (err)
+		goto out_unlock;
+
+	pathcpy(&saved_path, &nd->path);
+	pathcpy(&nd->path, &lower_path);
+
+	/* set last 16bytes of mode field to 0664 */
+	mode = (mode & S_IFMT) | 00664;
+	err = vfs_create(lower_parent_dentry->d_inode, lower_dentry, mode, nd);
+
+	pathcpy(&nd->path, &saved_path);
+	if (err)
+		goto out;
+
+	err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path);
+	if (err)
+		goto out;
+	fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir));
+	fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode);
+
+out:
+	mnt_drop_write(lower_path.mnt);
+out_unlock:
+	unlock_dir(lower_parent_dentry);
+	sdcardfs_put_lower_path(dentry, &lower_path);
+	REVERT_CRED(saved_cred);
+out_eacces:
+	return err;
+}
+
+#if 0
+static int sdcardfs_link(struct dentry *old_dentry, struct inode *dir,
+		       struct dentry *new_dentry)
+{
+	struct dentry *lower_old_dentry;
+	struct dentry *lower_new_dentry;
+	struct dentry *lower_dir_dentry;
+	u64 file_size_save;
+	int err;
+	struct path lower_old_path, lower_new_path;
+
+	OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb));
+
+	file_size_save = i_size_read(old_dentry->d_inode);
+	sdcardfs_get_lower_path(old_dentry, &lower_old_path);
+	sdcardfs_get_lower_path(new_dentry, &lower_new_path);
+	lower_old_dentry = lower_old_path.dentry;
+	lower_new_dentry = lower_new_path.dentry;
+	lower_dir_dentry = lock_parent(lower_new_dentry);
+
+	err = mnt_want_write(lower_new_path.mnt);
+	if (err)
+		goto out_unlock;
+
+	err = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode,
+		       lower_new_dentry);
+	if (err || !lower_new_dentry->d_inode)
+		goto out;
+
+	err = sdcardfs_interpose(new_dentry, dir->i_sb, &lower_new_path);
+	if (err)
+		goto out;
+	fsstack_copy_attr_times(dir, lower_new_dentry->d_inode);
+	fsstack_copy_inode_size(dir, lower_new_dentry->d_inode);
+	old_dentry->d_inode->i_nlink =
+		  sdcardfs_lower_inode(old_dentry->d_inode)->i_nlink;
+	i_size_write(new_dentry->d_inode, file_size_save);
+out:
+	mnt_drop_write(lower_new_path.mnt);
+out_unlock:
+	unlock_dir(lower_dir_dentry);
+	sdcardfs_put_lower_path(old_dentry, &lower_old_path);
+	sdcardfs_put_lower_path(new_dentry, &lower_new_path);
+	REVERT_CRED();
+	return err;
+}
+#endif
+
+static int sdcardfs_unlink(struct inode *dir, struct dentry *dentry)
+{
+	int err;
+	struct dentry *lower_dentry;
+	struct inode *lower_dir_inode = sdcardfs_lower_inode(dir);
+	struct dentry *lower_dir_dentry;
+	struct path lower_path;
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+	const struct cred *saved_cred = NULL;
+
+	int has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive);
+	if(!check_caller_access_to_name(dir, dentry->d_name.name, sbi->options.derive, 1, has_rw)) {
+		printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n"
+						 "  dentry: %s, task:%s\n",
+						 __func__, dentry->d_name.name, current->comm);
+		err = -EACCES;
+		goto out_eacces;
+	}
+
+	/* save current_cred and override it */
+	OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred);
+
+	sdcardfs_get_lower_path(dentry, &lower_path);
+	lower_dentry = lower_path.dentry;
+	dget(lower_dentry);
+	lower_dir_dentry = lock_parent(lower_dentry);
+
+	err = mnt_want_write(lower_path.mnt);
+	if (err)
+		goto out_unlock;
+	err = vfs_unlink(lower_dir_inode, lower_dentry);
+
+	/*
+	 * Note: unlinking on top of NFS can cause silly-renamed files.
+	 * Trying to delete such files results in EBUSY from NFS
+	 * below.  Silly-renamed files will get deleted by NFS later on, so
+	 * we just need to detect them here and treat such EBUSY errors as
+	 * if the upper file was successfully deleted.
+	 */
+	if (err == -EBUSY && lower_dentry->d_flags & DCACHE_NFSFS_RENAMED)
+		err = 0;
+	if (err)
+		goto out;
+	fsstack_copy_attr_times(dir, lower_dir_inode);
+	fsstack_copy_inode_size(dir, lower_dir_inode);
+	dentry->d_inode->i_nlink =
+		  sdcardfs_lower_inode(dentry->d_inode)->i_nlink;
+	dentry->d_inode->i_ctime = dir->i_ctime;
+	d_drop(dentry); /* this is needed, else LTP fails (VFS won't do it) */
+out:
+	mnt_drop_write(lower_path.mnt);
+out_unlock:
+	unlock_dir(lower_dir_dentry);
+	dput(lower_dentry);
+	sdcardfs_put_lower_path(dentry, &lower_path);
+	REVERT_CRED(saved_cred);
+out_eacces:
+	return err;
+}
+
+#if 0
+static int sdcardfs_symlink(struct inode *dir, struct dentry *dentry,
+			  const char *symname)
+{
+	int err = 0;
+	struct dentry *lower_dentry;
+	struct dentry *lower_parent_dentry = NULL;
+	struct path lower_path;
+
+	OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb));
+
+	sdcardfs_get_lower_path(dentry, &lower_path);
+	lower_dentry = lower_path.dentry;
+	lower_parent_dentry = lock_parent(lower_dentry);
+
+	err = mnt_want_write(lower_path.mnt);
+	if (err)
+		goto out_unlock;
+	err = vfs_symlink(lower_parent_dentry->d_inode, lower_dentry, symname);
+	if (err)
+		goto out;
+	err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path);
+	if (err)
+		goto out;
+	fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir));
+	fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode);
+
+out:
+	mnt_drop_write(lower_path.mnt);
+out_unlock:
+	unlock_dir(lower_parent_dentry);
+	sdcardfs_put_lower_path(dentry, &lower_path);
+	REVERT_CRED();
+	return err;
+}
+#endif
+
+static int touch(char *abs_path, mode_t mode) {
+	struct file *filp = filp_open(abs_path, O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW, mode);
+	if (IS_ERR(filp)) {
+		if (PTR_ERR(filp) == -EEXIST) {
+			return 0;
+		}
+		else {
+			printk(KERN_ERR "sdcardfs: failed to open(%s): %ld\n",
+						abs_path, PTR_ERR(filp));
+			return PTR_ERR(filp);
+		}
+	}
+	filp_close(filp, current->files);
+	return 0;
+}
+
+static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+	int err = 0;
+	int make_nomedia_in_obb = 0;
+	struct dentry *lower_dentry;
+	struct dentry *lower_parent_dentry = NULL;
+	struct path lower_path;
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+	const struct cred *saved_cred = NULL;
+	struct sdcardfs_inode_info *pi = SDCARDFS_I(dir);
+	char *page_buf;
+	char *nomedia_dir_name;
+	char *nomedia_fullpath;
+	int fullpath_namelen;
+	int touch_err = 0;
+
+	int has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive);
+	if(!check_caller_access_to_name(dir, dentry->d_name.name, sbi->options.derive, 1, has_rw)) {
+		printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n"
+						 "  dentry: %s, task:%s\n",
+						 __func__, dentry->d_name.name, current->comm);
+		err = -EACCES;
+		goto out_eacces;
+	}
+
+	/* save current_cred and override it */
+	OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred);
+
+	/* check disk space */
+	if (!check_min_free_space(dentry, 0, 1)) {
+		printk(KERN_INFO "sdcardfs: No minimum free space.\n");
+		err = -ENOSPC;
+		goto out_revert;
+	}
+
+	/* the lower_dentry is negative here */
+	sdcardfs_get_lower_path(dentry, &lower_path);
+	lower_dentry = lower_path.dentry;
+	lower_parent_dentry = lock_parent(lower_dentry);
+
+	err = mnt_want_write(lower_path.mnt);
+	if (err)
+		goto out_unlock;
+
+	/* set last 16bytes of mode field to 0775 */
+	mode = (mode & S_IFMT) | 00775;
+	err = vfs_mkdir(lower_parent_dentry->d_inode, lower_dentry, mode);
+
+	if (err)
+		goto out;
+
+	/* if it is a local obb dentry, setup it with the base obbpath */
+	if(need_graft_path(dentry)) {
+
+		err = setup_obb_dentry(dentry, &lower_path);
+		if(err) {
+			/* if the sbi->obbpath is not available, the lower_path won't be
+			 * changed by setup_obb_dentry() but the lower path is saved to
+             * its orig_path. this dentry will be revalidated later.
+			 * but now, the lower_path should be NULL */
+			sdcardfs_put_reset_lower_path(dentry);
+
+			/* the newly created lower path which saved to its orig_path or
+			 * the lower_path is the base obbpath.
+             * therefore, an additional path_get is required */
+			path_get(&lower_path);
+		} else
+			make_nomedia_in_obb = 1;
+	}
+
+	err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path);
+	if (err)
+		goto out;
+
+	fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir));
+	fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode);
+	/* update number of links on parent directory */
+	dir->i_nlink = sdcardfs_lower_inode(dir)->i_nlink;
+
+	if ((sbi->options.derive == DERIVE_UNIFIED) && (!strcasecmp(dentry->d_name.name, "obb"))
+		&& (pi->perm == PERM_ANDROID) && (pi->userid == 0))
+		make_nomedia_in_obb = 1;
+
+	/* When creating /Android/data and /Android/obb, mark them as .nomedia */
+	if (make_nomedia_in_obb ||
+		((pi->perm == PERM_ANDROID) && (!strcasecmp(dentry->d_name.name, "data")))) {
+
+		page_buf = (char *)__get_free_page(GFP_KERNEL);
+		if (!page_buf) {
+			printk(KERN_ERR "sdcardfs: failed to allocate page buf\n");
+			goto out;
+		}
+
+		nomedia_dir_name = d_absolute_path(&lower_path, page_buf, PAGE_SIZE);
+		if (IS_ERR(nomedia_dir_name)) {
+			free_page((unsigned long)page_buf);
+			printk(KERN_ERR "sdcardfs: failed to get .nomedia dir name\n");
+			goto out;
+		}
+
+		fullpath_namelen = page_buf + PAGE_SIZE - nomedia_dir_name - 1;
+		fullpath_namelen += strlen("/.nomedia");
+		nomedia_fullpath = kzalloc(fullpath_namelen + 1, GFP_KERNEL);
+		if (!nomedia_fullpath) {
+			free_page((unsigned long)page_buf);
+			printk(KERN_ERR "sdcardfs: failed to allocate .nomedia fullpath buf\n");
+			goto out;
+		}
+
+		strcpy(nomedia_fullpath, nomedia_dir_name);
+		free_page((unsigned long)page_buf);
+		strcat(nomedia_fullpath, "/.nomedia");
+		touch_err = touch(nomedia_fullpath, 0664);
+		if (touch_err) {
+			printk(KERN_ERR "sdcardfs: failed to touch(%s): %d\n",
+							nomedia_fullpath, touch_err);
+			kfree(nomedia_fullpath);
+			goto out;
+		}
+		kfree(nomedia_fullpath);
+	}
+out:
+	mnt_drop_write(lower_path.mnt);
+out_unlock:
+	unlock_dir(lower_parent_dentry);
+	sdcardfs_put_lower_path(dentry, &lower_path);
+out_revert:
+	REVERT_CRED(saved_cred);
+out_eacces:
+	return err;
+}
+
+static int sdcardfs_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	struct dentry *lower_dentry;
+	struct dentry *lower_dir_dentry;
+	int err;
+	struct path lower_path;
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+	const struct cred *saved_cred = NULL;
+	//char *path_s = NULL;
+
+	int has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive);
+	if(!check_caller_access_to_name(dir, dentry->d_name.name, sbi->options.derive, 1, has_rw)) {
+		printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n"
+						 "  dentry: %s, task:%s\n",
+						 __func__, dentry->d_name.name, current->comm);
+		err = -EACCES;
+		goto out_eacces;
+	}
+
+	/* save current_cred and override it */
+	OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred);
+
+	/* sdcardfs_get_real_lower(): in case of remove an user's obb dentry
+	 * the dentry on the original path should be deleted. */
+	sdcardfs_get_real_lower(dentry, &lower_path);
+
+	lower_dentry = lower_path.dentry;
+	lower_dir_dentry = lock_parent(lower_dentry);
+
+	err = mnt_want_write(lower_path.mnt);
+	if (err)
+		goto out_unlock;
+	err = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry);
+	if (err)
+		goto out;
+
+	d_drop(dentry);	/* drop our dentry on success (why not VFS's job?) */
+	if (dentry->d_inode)
+		clear_nlink(dentry->d_inode);
+	fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
+	fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode);
+	dir->i_nlink = lower_dir_dentry->d_inode->i_nlink;
+
+out:
+	mnt_drop_write(lower_path.mnt);
+out_unlock:
+	unlock_dir(lower_dir_dentry);
+	sdcardfs_put_real_lower(dentry, &lower_path);
+	REVERT_CRED(saved_cred);
+out_eacces:
+	return err;
+}
+
+#if 0
+static int sdcardfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
+			dev_t dev)
+{
+	int err = 0;
+	struct dentry *lower_dentry;
+	struct dentry *lower_parent_dentry = NULL;
+	struct path lower_path;
+
+	OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb));
+
+	sdcardfs_get_lower_path(dentry, &lower_path);
+	lower_dentry = lower_path.dentry;
+	lower_parent_dentry = lock_parent(lower_dentry);
+
+	err = mnt_want_write(lower_path.mnt);
+	if (err)
+		goto out_unlock;
+	err = vfs_mknod(lower_parent_dentry->d_inode, lower_dentry, mode, dev);
+	if (err)
+		goto out;
+
+	err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path);
+	if (err)
+		goto out;
+	fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir));
+	fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode);
+
+out:
+	mnt_drop_write(lower_path.mnt);
+out_unlock:
+	unlock_dir(lower_parent_dentry);
+	sdcardfs_put_lower_path(dentry, &lower_path);
+	REVERT_CRED();
+	return err;
+}
+#endif
+
+/*
+ * The locking rules in sdcardfs_rename are complex.  We could use a simpler
+ * superblock-level name-space lock for renames and copy-ups.
+ */
+static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+			 struct inode *new_dir, struct dentry *new_dentry)
+{
+	int err = 0;
+	struct dentry *lower_old_dentry = NULL;
+	struct dentry *lower_new_dentry = NULL;
+	struct dentry *lower_old_dir_dentry = NULL;
+	struct dentry *lower_new_dir_dentry = NULL;
+	struct dentry *trap = NULL;
+	struct dentry *new_parent = NULL;
+	struct path lower_old_path, lower_new_path;
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(old_dentry->d_sb);
+	const struct cred *saved_cred = NULL;
+
+	int has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive);
+	if(!check_caller_access_to_name(old_dir, old_dentry->d_name.name,
+			sbi->options.derive, 1, has_rw) ||
+		!check_caller_access_to_name(new_dir, new_dentry->d_name.name,
+			sbi->options.derive, 1, has_rw)) {
+		printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n"
+						 "  new_dentry: %s, task:%s\n",
+						 __func__, new_dentry->d_name.name, current->comm);
+		err = -EACCES;
+		goto out_eacces;
+	}
+
+	/* save current_cred and override it */
+	OVERRIDE_CRED(SDCARDFS_SB(old_dir->i_sb), saved_cred);
+
+	sdcardfs_get_real_lower(old_dentry, &lower_old_path);
+	sdcardfs_get_lower_path(new_dentry, &lower_new_path);
+	lower_old_dentry = lower_old_path.dentry;
+	lower_new_dentry = lower_new_path.dentry;
+	lower_old_dir_dentry = dget_parent(lower_old_dentry);
+	lower_new_dir_dentry = dget_parent(lower_new_dentry);
+
+	trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
+	/* source should not be ancestor of target */
+	if (trap == lower_old_dentry) {
+		err = -EINVAL;
+		goto out;
+	}
+	/* target should not be ancestor of source */
+	if (trap == lower_new_dentry) {
+		err = -ENOTEMPTY;
+		goto out;
+	}
+
+	err = mnt_want_write(lower_old_path.mnt);
+	if (err)
+		goto out;
+	err = mnt_want_write(lower_new_path.mnt);
+	if (err)
+		goto out_drop_old_write;
+
+	err = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
+			 lower_new_dir_dentry->d_inode, lower_new_dentry);
+	if (err)
+		goto out_err;
+
+	/* Copy attrs from lower dir, but i_uid/i_gid */
+	fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
+	fsstack_copy_inode_size(new_dir, lower_new_dir_dentry->d_inode);
+	fix_derived_permission(new_dir);
+	if (new_dir != old_dir) {
+		fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
+		fsstack_copy_inode_size(old_dir, lower_old_dir_dentry->d_inode);
+		fix_derived_permission(old_dir);
+		/* update the derived permission of the old_dentry
+		 * with its new parent
+		 */
+		new_parent = dget_parent(new_dentry);
+		if(new_parent) {
+			if(old_dentry->d_inode) {
+				get_derived_permission(new_parent, old_dentry);
+				fix_derived_permission(old_dentry->d_inode);
+			}
+			dput(new_parent);
+		}
+	}
+
+out_err:
+	mnt_drop_write(lower_new_path.mnt);
+out_drop_old_write:
+	mnt_drop_write(lower_old_path.mnt);
+out:
+	unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
+	dput(lower_old_dir_dentry);
+	dput(lower_new_dir_dentry);
+	sdcardfs_put_real_lower(old_dentry, &lower_old_path);
+	sdcardfs_put_lower_path(new_dentry, &lower_new_path);
+	REVERT_CRED(saved_cred);
+out_eacces:
+	return err;
+}
+
+#if 0
+static int sdcardfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
+{
+	int err;
+	struct dentry *lower_dentry;
+	struct path lower_path;
+	/* XXX readlink does not requires overriding credential */
+
+	sdcardfs_get_lower_path(dentry, &lower_path);
+	lower_dentry = lower_path.dentry;
+	if (!lower_dentry->d_inode->i_op ||
+	    !lower_dentry->d_inode->i_op->readlink) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = lower_dentry->d_inode->i_op->readlink(lower_dentry,
+						    buf, bufsiz);
+	if (err < 0)
+		goto out;
+	fsstack_copy_attr_atime(dentry->d_inode, lower_dentry->d_inode);
+
+out:
+	sdcardfs_put_lower_path(dentry, &lower_path);
+	return err;
+}
+#endif
+
+#if 0
+static void *sdcardfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	char *buf;
+	int len = PAGE_SIZE, err;
+	mm_segment_t old_fs;
+
+	/* This is freed by the put_link method assuming a successful call. */
+	buf = kmalloc(len, GFP_KERNEL);
+	if (!buf) {
+		buf = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	/* read the symlink, and then we will follow it */
+	old_fs = get_fs();
+	set_fs(KERNEL_DS);
+	err = sdcardfs_readlink(dentry, buf, len);
+	set_fs(old_fs);
+	if (err < 0) {
+		kfree(buf);
+		buf = ERR_PTR(err);
+	} else {
+		buf[err] = '\0';
+	}
+out:
+	nd_set_link(nd, buf);
+	return NULL;
+}
+#endif
+
+#if 0
+/* this @nd *IS* still used */
+static void sdcardfs_put_link(struct dentry *dentry, struct nameidata *nd,
+			    void *cookie)
+{
+	char *buf = nd_get_link(nd);
+	if (!IS_ERR(buf))	/* free the char* */
+		kfree(buf);
+}
+#endif
+
+static int sdcardfs_permission(struct inode *inode, int mask, unsigned int flags)
+{
+	int err;
+
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
+
+	/*
+	 * Permission check on sdcardfs inode.
+	 * Calling process should have AID_SDCARD_RW permission
+	 */
+	err = generic_permission(inode, mask, 0, inode->i_op->check_acl);
+
+	/* XXX
+	 * Original sdcardfs code calls inode_permission(lower_inode,.. )
+	 * for checking inode permission. But doing such things here seems
+	 * duplicated work, because the functions called after this func,
+	 * such as vfs_create, vfs_unlink, vfs_rename, and etc,
+	 * does exactly same thing, i.e., they calls inode_permission().
+	 * So we just let they do the things.
+	 * If there are any security hole, just uncomment following if block.
+	 */
+#if 0
+	if (!err) {
+		/*
+		 * Permission check on lower_inode(=EXT4).
+		 * we check it with AID_MEDIA_RW permission
+		 */
+		struct inode *lower_inode;
+		OVERRIDE_CRED(SDCARDFS_SB(inode->sb));
+
+		lower_inode = sdcardfs_lower_inode(inode);
+		err = inode_permission(lower_inode, mask);
+
+		REVERT_CRED();
+	}
+#endif
+	return err;
+
+}
+
+static int sdcardfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
+		 struct kstat *stat)
+{
+	struct dentry *lower_dentry;
+	struct inode *inode;
+	struct inode *lower_inode;
+	struct path lower_path;
+	struct dentry *parent;
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+
+	parent = dget_parent(dentry);
+	if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name,
+						sbi->options.derive, 0, 0)) {
+		printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n"
+						 "  dentry: %s, task:%s\n",
+						 __func__, dentry->d_name.name, current->comm);
+		dput(parent);
+		return -EACCES;
+	}
+	dput(parent);
+
+	inode = dentry->d_inode;
+
+	sdcardfs_get_lower_path(dentry, &lower_path);
+	lower_dentry = lower_path.dentry;
+	lower_inode = sdcardfs_lower_inode(inode);
+
+	fsstack_copy_attr_all(inode, lower_inode);
+	fsstack_copy_inode_size(inode, lower_inode);
+	/* if the dentry has been moved from other location
+	 * so, on this stage, its derived permission must be
+	 * rechecked from its private field.
+	 */
+	fix_derived_permission(inode);
+
+	generic_fillattr(inode, stat);
+	sdcardfs_put_lower_path(dentry, &lower_path);
+	return 0;
+}
+
+static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia)
+{
+	int err = 0;
+	struct dentry *lower_dentry;
+	struct inode *inode;
+	struct inode *lower_inode;
+	struct path lower_path;
+	struct iattr lower_ia;
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+	struct dentry *parent;
+	int has_rw;
+
+	inode = dentry->d_inode;
+
+	/*
+	 * Check if user has permission to change inode.  We don't check if
+	 * this user can change the lower inode: that should happen when
+	 * calling notify_change on the lower inode.
+	 */
+	err = inode_change_ok(inode, ia);
+
+	/* no vfs_XXX operations required, cred overriding will be skipped. wj*/
+	if (!err) {
+		/* check the Android group ID */
+		has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive);
+		parent = dget_parent(dentry);
+		if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name,
+						sbi->options.derive, 1, has_rw)) {
+			printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n"
+							 "  dentry: %s, task:%s\n",
+							 __func__, dentry->d_name.name, current->comm);
+			err = -EACCES;
+		}
+		dput(parent);
+	}
+
+	if (err)
+		goto out_err;
+
+	sdcardfs_get_lower_path(dentry, &lower_path);
+	lower_dentry = lower_path.dentry;
+	lower_inode = sdcardfs_lower_inode(inode);
+
+	/* prepare our own lower struct iattr (with the lower file) */
+	memcpy(&lower_ia, ia, sizeof(lower_ia));
+	if (ia->ia_valid & ATTR_FILE)
+		lower_ia.ia_file = sdcardfs_lower_file(ia->ia_file);
+
+	lower_ia.ia_valid &= ~(ATTR_UID | ATTR_GID | ATTR_MODE);
+
+	/*
+	 * If shrinking, first truncate upper level to cancel writing dirty
+	 * pages beyond the new eof; and also if its' maxbytes is more
+	 * limiting (fail with -EFBIG before making any change to the lower
+	 * level).  There is no need to vmtruncate the upper level
+	 * afterwards in the other cases: we fsstack_copy_inode_size from
+	 * the lower level.
+	 */
+	if (current->mm)
+		down_write(&current->mm->mmap_sem);
+	if (ia->ia_valid & ATTR_SIZE) {
+		err = inode_newsize_ok(inode, ia->ia_size);
+		if (err) {
+			if (current->mm)
+				up_write(&current->mm->mmap_sem);
+			goto out;
+		}
+		truncate_setsize(inode, ia->ia_size);
+	}
+
+	/*
+	 * mode change is for clearing setuid/setgid bits. Allow lower fs
+	 * to interpret this in its own way.
+	 */
+	if (lower_ia.ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
+		lower_ia.ia_valid &= ~ATTR_MODE;
+
+	/* notify the (possibly copied-up) lower inode */
+	/*
+	 * Note: we use lower_dentry->d_inode, because lower_inode may be
+	 * unlinked (no inode->i_sb and i_ino==0.  This happens if someone
+	 * tries to open(), unlink(), then ftruncate() a file.
+	 */
+	mutex_lock(&lower_dentry->d_inode->i_mutex);
+	err = notify_change(lower_dentry, &lower_ia); /* note: lower_ia */
+	mutex_unlock(&lower_dentry->d_inode->i_mutex);
+	if (current->mm)
+		up_write(&current->mm->mmap_sem);
+	if (err)
+		goto out;
+
+	/* get attributes from the lower inode */
+	fsstack_copy_attr_all(inode, lower_inode);
+	/* update derived permission of the upper inode */
+	fix_derived_permission(inode);
+
+	/*
+	 * Not running fsstack_copy_inode_size(inode, lower_inode), because
+	 * VFS should update our inode size, and notify_change on
+	 * lower_inode should update its size.
+	 */
+
+out:
+	sdcardfs_put_lower_path(dentry, &lower_path);
+out_err:
+	return err;
+}
+
+const struct inode_operations sdcardfs_symlink_iops = {
+	.permission	= sdcardfs_permission,
+	.setattr	= sdcardfs_setattr,
+	/* XXX Following operations are implemented,
+	 *     but FUSE(sdcard) or FAT does not support them
+	 *     These methods are *NOT* perfectly tested.
+	.readlink	= sdcardfs_readlink,
+	.follow_link	= sdcardfs_follow_link,
+	.put_link	= sdcardfs_put_link,
+	 */
+};
+
+const struct inode_operations sdcardfs_dir_iops = {
+	.create		= sdcardfs_create,
+	.lookup		= sdcardfs_lookup,
+	.permission	= sdcardfs_permission,
+	.unlink		= sdcardfs_unlink,
+	.mkdir		= sdcardfs_mkdir,
+	.rmdir		= sdcardfs_rmdir,
+	.rename		= sdcardfs_rename,
+	.setattr	= sdcardfs_setattr,
+	.getattr	= sdcardfs_getattr,
+	/* XXX Following operations are implemented,
+	 *     but FUSE(sdcard) or FAT does not support them
+	 *     These methods are *NOT* perfectly tested.
+	.symlink	= sdcardfs_symlink,
+	.link		= sdcardfs_link,
+	.mknod		= sdcardfs_mknod,
+	 */
+};
+
+const struct inode_operations sdcardfs_main_iops = {
+	.permission	= sdcardfs_permission,
+	.setattr	= sdcardfs_setattr,
+	.getattr	= sdcardfs_getattr,
+};
diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c
new file mode 100644
index 000000000000..c0b12375b1bf
--- /dev/null
+++ b/fs/sdcardfs/lookup.c
@@ -0,0 +1,386 @@
+/*
+ * fs/sdcardfs/lookup.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+#include "linux/delay.h"
+
+/* The dentry cache is just so we have properly sized dentries */
+static struct kmem_cache *sdcardfs_dentry_cachep;
+
+int sdcardfs_init_dentry_cache(void)
+{
+	sdcardfs_dentry_cachep =
+		kmem_cache_create("sdcardfs_dentry",
+				  sizeof(struct sdcardfs_dentry_info),
+				  0, SLAB_RECLAIM_ACCOUNT, NULL);
+
+	return sdcardfs_dentry_cachep ? 0 : -ENOMEM;
+}
+
+void sdcardfs_destroy_dentry_cache(void)
+{
+	if (sdcardfs_dentry_cachep)
+		kmem_cache_destroy(sdcardfs_dentry_cachep);
+}
+
+void free_dentry_private_data(struct dentry *dentry)
+{
+	if (!dentry || !dentry->d_fsdata)
+		return;
+	kmem_cache_free(sdcardfs_dentry_cachep, dentry->d_fsdata);
+	dentry->d_fsdata = NULL;
+}
+
+/* allocate new dentry private data */
+int new_dentry_private_data(struct dentry *dentry)
+{
+	struct sdcardfs_dentry_info *info = SDCARDFS_D(dentry);
+
+	/* use zalloc to init dentry_info.lower_path */
+	info = kmem_cache_zalloc(sdcardfs_dentry_cachep, GFP_ATOMIC);
+	if (!info)
+		return -ENOMEM;
+
+	spin_lock_init(&info->lock);
+	dentry->d_fsdata = info;
+
+	return 0;
+}
+
+static int sdcardfs_inode_test(struct inode *inode, void *candidate_lower_inode)
+{
+	struct inode *current_lower_inode = sdcardfs_lower_inode(inode);
+	if (current_lower_inode == (struct inode *)candidate_lower_inode)
+		return 1; /* found a match */
+	else
+		return 0; /* no match */
+}
+
+static int sdcardfs_inode_set(struct inode *inode, void *lower_inode)
+{
+	/* we do actual inode initialization in sdcardfs_iget */
+	return 0;
+}
+
+static struct inode *sdcardfs_iget(struct super_block *sb,
+				 struct inode *lower_inode)
+{
+	struct sdcardfs_inode_info *info;
+	struct inode *inode; /* the new inode to return */
+	int err;
+
+	inode = iget5_locked(sb, /* our superblock */
+			     /*
+			      * hashval: we use inode number, but we can
+			      * also use "(unsigned long)lower_inode"
+			      * instead.
+			      */
+			     lower_inode->i_ino, /* hashval */
+			     sdcardfs_inode_test,	/* inode comparison function */
+			     sdcardfs_inode_set, /* inode init function */
+			     lower_inode); /* data passed to test+set fxns */
+	if (!inode) {
+		err = -EACCES;
+		iput(lower_inode);
+		return ERR_PTR(err);
+	}
+	/* if found a cached inode, then just return it */
+	if (!(inode->i_state & I_NEW))
+		return inode;
+
+	/* initialize new inode */
+	info = SDCARDFS_I(inode);
+
+	inode->i_ino = lower_inode->i_ino;
+	if (!igrab(lower_inode)) {
+		err = -ESTALE;
+		return ERR_PTR(err);
+	}
+	sdcardfs_set_lower_inode(inode, lower_inode);
+
+	inode->i_version++;
+
+	/* use different set of inode ops for symlinks & directories */
+	if (S_ISDIR(lower_inode->i_mode))
+		inode->i_op = &sdcardfs_dir_iops;
+	else if (S_ISLNK(lower_inode->i_mode))
+		inode->i_op = &sdcardfs_symlink_iops;
+	else
+		inode->i_op = &sdcardfs_main_iops;
+
+	/* use different set of file ops for directories */
+	if (S_ISDIR(lower_inode->i_mode))
+		inode->i_fop = &sdcardfs_dir_fops;
+	else
+		inode->i_fop = &sdcardfs_main_fops;
+
+	inode->i_mapping->a_ops = &sdcardfs_aops;
+
+	inode->i_atime.tv_sec = 0;
+	inode->i_atime.tv_nsec = 0;
+	inode->i_mtime.tv_sec = 0;
+	inode->i_mtime.tv_nsec = 0;
+	inode->i_ctime.tv_sec = 0;
+	inode->i_ctime.tv_nsec = 0;
+
+	/* properly initialize special inodes */
+	if (S_ISBLK(lower_inode->i_mode) || S_ISCHR(lower_inode->i_mode) ||
+	    S_ISFIFO(lower_inode->i_mode) || S_ISSOCK(lower_inode->i_mode))
+		init_special_inode(inode, lower_inode->i_mode,
+				   lower_inode->i_rdev);
+
+	/* all well, copy inode attributes */
+	fsstack_copy_attr_all(inode, lower_inode);
+	fsstack_copy_inode_size(inode, lower_inode);
+
+	fix_derived_permission(inode);
+
+	unlock_new_inode(inode);
+	return inode;
+}
+
+/*
+ * Connect a sdcardfs inode dentry/inode with several lower ones.  This is
+ * the classic stackable file system "vnode interposition" action.
+ *
+ * @dentry: sdcardfs's dentry which interposes on lower one
+ * @sb: sdcardfs's super_block
+ * @lower_path: the lower path (caller does path_get/put)
+ */
+int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb,
+		     struct path *lower_path)
+{
+	int err = 0;
+	struct inode *inode;
+	struct inode *lower_inode;
+	struct super_block *lower_sb;
+
+	lower_inode = lower_path->dentry->d_inode;
+	lower_sb = sdcardfs_lower_super(sb);
+
+	/* check that the lower file system didn't cross a mount point */
+	if (lower_inode->i_sb != lower_sb) {
+		err = -EXDEV;
+		goto out;
+	}
+
+	/*
+	 * We allocate our new inode below by calling sdcardfs_iget,
+	 * which will initialize some of the new inode's fields
+	 */
+
+	/* inherit lower inode number for sdcardfs's inode */
+	inode = sdcardfs_iget(sb, lower_inode);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
+		goto out;
+	}
+
+	d_add(dentry, inode);
+	update_derived_permission(dentry);
+out:
+	return err;
+}
+
+/*
+ * Main driver function for sdcardfs's lookup.
+ *
+ * Returns: NULL (ok), ERR_PTR if an error occurred.
+ * Fills in lower_parent_path with <dentry,mnt> on success.
+ */
+static struct dentry *__sdcardfs_lookup(struct dentry *dentry,
+		struct nameidata *nd, struct path *lower_parent_path)
+{
+	int err = 0;
+	struct vfsmount *lower_dir_mnt;
+	struct dentry *lower_dir_dentry = NULL;
+	struct dentry *lower_dentry;
+	const char *name;
+	struct nameidata lower_nd;
+	struct path lower_path;
+	struct qstr this;
+	struct sdcardfs_sb_info *sbi;
+
+	sbi = SDCARDFS_SB(dentry->d_sb);
+	/* must initialize dentry operations */
+	d_set_d_op(dentry, &sdcardfs_ci_dops);
+
+	if (IS_ROOT(dentry))
+		goto out;
+
+	name = dentry->d_name.name;
+
+	/* now start the actual lookup procedure */
+	lower_dir_dentry = lower_parent_path->dentry;
+	lower_dir_mnt = lower_parent_path->mnt;
+
+	/* Use vfs_path_lookup to check if the dentry exists or not */
+	if (sbi->options.lower_fs == LOWER_FS_EXT4) {
+		err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name,
+				LOOKUP_CASE_INSENSITIVE, &lower_nd);
+	} else if (sbi->options.lower_fs == LOWER_FS_FAT) {
+		err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name, 0,
+				&lower_nd);
+	}
+
+	/* no error: handle positive dentries */
+	if (!err) {
+		/* check if the dentry is an obb dentry
+		 * if true, the lower_inode must be replaced with
+		 * the inode of the graft path */
+
+		if(need_graft_path(dentry)) {
+
+			/* setup_obb_dentry()
+ 			 * The lower_path will be stored to the dentry's orig_path
+			 * and the base obbpath will be copyed to the lower_path variable.
+			 * if an error returned, there's no change in the lower_path
+			 * 		returns: -ERRNO if error (0: no error) */
+			err = setup_obb_dentry(dentry, &lower_nd.path);
+
+			if(err) {
+				/* if the sbi->obbpath is not available, we can optionally
+				 * setup the lower_path with its orig_path.
+				 * but, the current implementation just returns an error
+				 * because the sdcard daemon also regards this case as
+				 * a lookup fail. */
+				printk(KERN_INFO "sdcardfs: base obbpath is not available\n");
+				sdcardfs_put_reset_orig_path(dentry);
+				goto out;
+			}
+		}
+
+		sdcardfs_set_lower_path(dentry, &lower_nd.path);
+		err = sdcardfs_interpose(dentry, dentry->d_sb, &lower_nd.path);
+		if (err) /* path_put underlying path on error */
+			sdcardfs_put_reset_lower_path(dentry);
+		goto out;
+	}
+
+	/*
+	 * We don't consider ENOENT an error, and we want to return a
+	 * negative dentry.
+	 */
+	if (err && err != -ENOENT)
+		goto out;
+
+	/* instatiate a new negative dentry */
+	this.name = name;
+	this.len = strlen(name);
+	this.hash = full_name_hash(this.name, this.len);
+	lower_dentry = d_lookup(lower_dir_dentry, &this);
+	if (lower_dentry)
+		goto setup_lower;
+
+	lower_dentry = d_alloc(lower_dir_dentry, &this);
+	if (!lower_dentry) {
+		err = -ENOMEM;
+		goto out;
+	}
+	d_add(lower_dentry, NULL); /* instantiate and hash */
+
+setup_lower:
+	lower_path.dentry = lower_dentry;
+	lower_path.mnt = mntget(lower_dir_mnt);
+	sdcardfs_set_lower_path(dentry, &lower_path);
+
+	/*
+	 * If the intent is to create a file, then don't return an error, so
+	 * the VFS will continue the process of making this negative dentry
+	 * into a positive one.
+	 */
+	if (nd) {
+		if (nd->flags & (LOOKUP_CREATE|LOOKUP_RENAME_TARGET))
+			err = 0;
+	} else
+		err = 0;
+
+out:
+	return ERR_PTR(err);
+}
+
+/*
+ * On success:
+ * 	fills dentry object appropriate values and returns NULL.
+ * On fail (== error)
+ * 	returns error ptr
+ *
+ * @dir : Parent inode. It is locked (dir->i_mutex)
+ * @dentry : Target dentry to lookup. we should set each of fields.
+ *	     (dentry->d_name is initialized already)
+ * @nd : nameidata of parent inode
+ */
+struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry,
+			     struct nameidata *nd)
+{
+	struct dentry *ret = NULL, *parent;
+	struct path lower_parent_path;
+	int err = 0;
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+	const struct cred *saved_cred = NULL;
+
+	parent = dget_parent(dentry);
+
+	if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name,
+						sbi->options.derive, 0, 0)) {
+		ret = ERR_PTR(-EACCES);
+		printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n"
+                         "	dentry: %s, task:%s\n",
+						 __func__, dentry->d_name.name, current->comm);
+		goto out_err;
+        }
+
+	/* save current_cred and override it */
+	OVERRIDE_CRED_PTR(SDCARDFS_SB(dir->i_sb), saved_cred);
+
+	sdcardfs_get_lower_path(parent, &lower_parent_path);
+
+	/* allocate dentry private data.  We free it in ->d_release */
+	err = new_dentry_private_data(dentry);
+	if (err) {
+		ret = ERR_PTR(err);
+		goto out;
+	}
+
+	ret = __sdcardfs_lookup(dentry, nd, &lower_parent_path);
+	if (IS_ERR(ret))
+	{
+		goto out;
+	}
+	if (ret)
+		dentry = ret;
+	if (dentry->d_inode) {
+		fsstack_copy_attr_times(dentry->d_inode,
+					sdcardfs_lower_inode(dentry->d_inode));
+		/* get drived permission */
+		get_derived_permission(parent, dentry);
+		fix_derived_permission(dentry->d_inode);
+	}
+	/* update parent directory's atime */
+	fsstack_copy_attr_atime(parent->d_inode,
+				sdcardfs_lower_inode(parent->d_inode));
+
+out:
+	sdcardfs_put_lower_path(parent, &lower_parent_path);
+	REVERT_CRED(saved_cred);
+out_err:
+	dput(parent);
+	return ret;
+}
diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c
new file mode 100644
index 000000000000..1fdceffec72c
--- /dev/null
+++ b/fs/sdcardfs/main.c
@@ -0,0 +1,425 @@
+/*
+ * fs/sdcardfs/main.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/parser.h>
+
+enum {
+	Opt_uid,
+	Opt_gid,
+	Opt_wgid,
+	Opt_debug,
+	Opt_split,
+	Opt_derive,
+	Opt_lower_fs,
+	Opt_reserved_mb,
+	Opt_err,
+};
+
+static const match_table_t sdcardfs_tokens = {
+	{Opt_uid, "uid=%u"},
+	{Opt_gid, "gid=%u"},
+	{Opt_wgid, "wgid=%u"},
+	{Opt_debug, "debug"},
+	{Opt_split, "split"},
+	{Opt_derive, "derive=%s"},
+	{Opt_lower_fs, "lower_fs=%s"},
+	{Opt_reserved_mb, "reserved_mb=%u"},
+	{Opt_err, NULL}
+};
+
+static int parse_options(struct super_block *sb, char *options, int silent,
+				int *debug, struct sdcardfs_mount_options *opts)
+{
+	char *p;
+	substring_t args[MAX_OPT_ARGS];
+	int option;
+	char *string_option;
+
+	/* by default, we use AID_MEDIA_RW as uid, gid */
+	opts->fs_low_uid = AID_MEDIA_RW;
+	opts->fs_low_gid = AID_MEDIA_RW;
+	/* by default, we use AID_SDCARD_RW as write_gid */
+	opts->write_gid = AID_SDCARD_RW;
+	/* default permission policy
+	 * (DERIVE_NONE | DERIVE_LEGACY | DERIVE_UNIFIED) */
+	opts->derive = DERIVE_NONE;
+	opts->split_perms = 0;
+	/* by default, we use LOWER_FS_EXT4 as lower fs type */
+	opts->lower_fs = LOWER_FS_EXT4;
+	/* by default, 0MB is reserved */
+	opts->reserved_mb = 0;
+
+	*debug = 0;
+
+	if (!options)
+		return 0;
+
+	while ((p = strsep(&options, ",")) != NULL) {
+		int token;
+		if (!*p)
+			continue;
+
+		token = match_token(p, sdcardfs_tokens, args);
+
+		switch (token) {
+		case Opt_debug:
+			*debug = 1;
+			break;
+		case Opt_uid:
+			if (match_int(&args[0], &option))
+				return 0;
+			opts->fs_low_uid = option;
+			break;
+		case Opt_gid:
+			if (match_int(&args[0], &option))
+				return 0;
+			opts->fs_low_gid = option;
+			break;
+		case Opt_wgid:
+			if (match_int(&args[0], &option))
+				return 0;
+			opts->write_gid = option;
+			break;
+		case Opt_split:
+			opts->split_perms=1;
+			break;
+		case Opt_derive:
+			string_option = match_strdup(&args[0]);
+			if (!strcmp("none", string_option)) {
+				opts->derive = DERIVE_NONE;
+			} else if (!strcmp("legacy", string_option)) {
+				opts->derive = DERIVE_LEGACY;
+			} else if (!strcmp("unified", string_option)) {
+				opts->derive = DERIVE_UNIFIED;
+			} else {
+				kfree(string_option);
+				goto invalid_option;
+			}
+			kfree(string_option);
+			break;
+		case Opt_lower_fs:
+			string_option = match_strdup(&args[0]);
+			if (!strcmp("ext4", string_option)) {
+				opts->lower_fs = LOWER_FS_EXT4;
+			} else if (!strcmp("fat", string_option)) {
+				opts->lower_fs = LOWER_FS_FAT;
+			} else {
+				kfree(string_option);
+				goto invalid_option;
+			}
+			kfree(string_option);
+			break;
+		case Opt_reserved_mb:
+			if (match_int(&args[0], &option))
+				return 0;
+			opts->reserved_mb = option;
+			break;
+		/* unknown option */
+		default:
+invalid_option:
+			if (!silent) {
+				printk( KERN_ERR "Unrecognized mount option \"%s\" "
+						"or missing value", p);
+			}
+			return -EINVAL;
+		}
+	}
+
+	if (*debug) {
+		printk( KERN_INFO "sdcardfs : options - debug:%d\n", *debug);
+		printk( KERN_INFO "sdcardfs : options - uid:%d\n",
+							opts->fs_low_uid);
+		printk( KERN_INFO "sdcardfs : options - gid:%d\n",
+							opts->fs_low_gid);
+	}
+
+	return 0;
+}
+
+/*
+ * our custom d_alloc_root work-alike
+ *
+ * we can't use d_alloc_root if we want to use our own interpose function
+ * unchanged, so we simply call our own "fake" d_alloc_root
+ */
+static struct dentry *sdcardfs_d_alloc_root(struct super_block *sb)
+{
+	struct dentry *ret = NULL;
+
+	if (sb) {
+		static const struct qstr name = {
+			.name = "/",
+			.len = 1
+		};
+
+		ret = d_alloc(NULL, &name);
+		if (ret) {
+			d_set_d_op(ret, &sdcardfs_ci_dops);
+			ret->d_sb = sb;
+			ret->d_parent = ret;
+		}
+	}
+	return ret;
+}
+
+/*
+ * There is no need to lock the sdcardfs_super_info's rwsem as there is no
+ * way anyone can have a reference to the superblock at this point in time.
+ */
+static int sdcardfs_read_super(struct super_block *sb, const char *dev_name,
+						void *raw_data, int silent)
+{
+	int err = 0;
+	int debug;
+	struct super_block *lower_sb;
+	struct path lower_path;
+	struct sdcardfs_sb_info *sb_info;
+	void *pkgl_id;
+
+	printk(KERN_INFO "sdcardfs version 2.0\n");
+
+	if (!dev_name) {
+		printk(KERN_ERR
+		       "sdcardfs: read_super: missing dev_name argument\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	printk(KERN_INFO "sdcardfs: dev_name -> %s\n", dev_name);
+	printk(KERN_INFO "sdcardfs: options -> %s\n", (char *)raw_data);
+
+	/* parse lower path */
+	err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
+			&lower_path);
+	if (err) {
+		printk(KERN_ERR	"sdcardfs: error accessing "
+		       "lower directory '%s'\n", dev_name);
+		goto out;
+	}
+
+	/* allocate superblock private data */
+	sb->s_fs_info = kzalloc(sizeof(struct sdcardfs_sb_info), GFP_KERNEL);
+	if (!SDCARDFS_SB(sb)) {
+		printk(KERN_CRIT "sdcardfs: read_super: out of memory\n");
+		err = -ENOMEM;
+		goto out_free;
+	}
+
+	sb_info = sb->s_fs_info;
+
+	/* parse options */
+	err = parse_options(sb, raw_data, silent, &debug, &sb_info->options);
+	if (err) {
+		printk(KERN_ERR	"sdcardfs: invalid options\n");
+		goto out_freesbi;
+	}
+
+	if (sb_info->options.derive != DERIVE_NONE) {
+		pkgl_id = packagelist_create(sb_info->options.write_gid);
+		if(IS_ERR(pkgl_id))
+			goto out_freesbi;
+		else
+			sb_info->pkgl_id = pkgl_id;
+	}
+
+	/* set the lower superblock field of upper superblock */
+	lower_sb = lower_path.dentry->d_sb;
+	atomic_inc(&lower_sb->s_active);
+	sdcardfs_set_lower_super(sb, lower_sb);
+
+	/* inherit maxbytes from lower file system */
+	sb->s_maxbytes = lower_sb->s_maxbytes;
+
+	/*
+	 * Our c/m/atime granularity is 1 ns because we may stack on file
+	 * systems whose granularity is as good.
+	 */
+	sb->s_time_gran = 1;
+
+	sb->s_magic = SDCARDFS_SUPER_MAGIC;
+	sb->s_op = &sdcardfs_sops;
+
+	/* see comment next to the definition of sdcardfs_d_alloc_root */
+	sb->s_root = sdcardfs_d_alloc_root(sb);
+	if (!sb->s_root) {
+		err = -ENOMEM;
+		goto out_sput;
+	}
+
+	/* link the upper and lower dentries */
+	sb->s_root->d_fsdata = NULL;
+	err = new_dentry_private_data(sb->s_root);
+	if (err)
+		goto out_freeroot;
+
+	/* set the lower dentries for s_root */
+	sdcardfs_set_lower_path(sb->s_root, &lower_path);
+
+	/* call interpose to create the upper level inode */
+	err = sdcardfs_interpose(sb->s_root, sb, &lower_path);
+	if (!err) {
+		/* setup permission policy */
+		switch(sb_info->options.derive) {
+			case DERIVE_NONE:
+				setup_derived_state(sb->s_root->d_inode,
+					PERM_ROOT, 0, AID_ROOT, AID_SDCARD_RW, 00775);
+				sb_info->obbpath_s = NULL;
+				break;
+			case DERIVE_LEGACY:
+				/* Legacy behavior used to support internal multiuser layout which
+				 * places user_id at the top directory level, with the actual roots
+				 * just below that. Shared OBB path is also at top level. */
+				setup_derived_state(sb->s_root->d_inode,
+				        PERM_LEGACY_PRE_ROOT, 0, AID_ROOT, AID_SDCARD_R, 00771);
+				/* initialize the obbpath string and lookup the path
+				 * sb_info->obb_path will be deactivated by path_put
+				 * on sdcardfs_put_super */
+				sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL);
+				snprintf(sb_info->obbpath_s, PATH_MAX, "%s/obb", dev_name);
+				err =  prepare_dir(sb_info->obbpath_s,
+							sb_info->options.fs_low_uid,
+							sb_info->options.fs_low_gid, 00755);
+				if(err)
+					printk(KERN_ERR "sdcardfs: %s: %d, error on creating %s\n",
+							__func__,__LINE__, sb_info->obbpath_s);
+				break;
+			case DERIVE_UNIFIED:
+				/* Unified multiuser layout which places secondary user_id under
+				 * /Android/user and shared OBB path under /Android/obb. */
+				setup_derived_state(sb->s_root->d_inode,
+						PERM_ROOT, 0, AID_ROOT, AID_SDCARD_R, 00771);
+
+				sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL);
+				snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name);
+				break;
+		}
+		fix_derived_permission(sb->s_root->d_inode);
+
+		if (!silent)
+			printk(KERN_INFO "sdcardfs: mounted on top of %s type %s\n",
+						dev_name, lower_sb->s_type->name);
+		goto out;
+	}
+	/* else error: fall through */
+
+	free_dentry_private_data(sb->s_root);
+out_freeroot:
+	dput(sb->s_root);
+out_sput:
+	/* drop refs we took earlier */
+	atomic_dec(&lower_sb->s_active);
+	packagelist_destroy(sb_info->pkgl_id);
+out_freesbi:
+	kfree(SDCARDFS_SB(sb));
+	sb->s_fs_info = NULL;
+out_free:
+	path_put(&lower_path);
+
+out:
+	return err;
+}
+
+/* A feature which supports mount_nodev() with options */
+static struct dentry *mount_nodev_with_options(struct file_system_type *fs_type,
+        int flags, const char *dev_name, void *data,
+        int (*fill_super)(struct super_block *, const char *, void *, int))
+
+{
+	int error;
+	struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
+
+	if (IS_ERR(s))
+		return ERR_CAST(s);
+
+	s->s_flags = flags;
+
+	error = fill_super(s, dev_name, data, flags & MS_SILENT ? 1 : 0);
+	if (error) {
+		deactivate_locked_super(s);
+		return ERR_PTR(error);
+	}
+	s->s_flags |= MS_ACTIVE;
+	return dget(s->s_root);
+}
+
+struct dentry *sdcardfs_mount(struct file_system_type *fs_type, int flags,
+			    const char *dev_name, void *raw_data)
+{
+	/*
+	 * dev_name is a lower_path_name,
+	 * raw_data is a option string.
+	 */
+	return mount_nodev_with_options(fs_type, flags, dev_name,
+					raw_data, sdcardfs_read_super);
+}
+
+static struct file_system_type sdcardfs_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= SDCARDFS_NAME,
+	.mount		= sdcardfs_mount,
+	.kill_sb	= generic_shutdown_super,
+	.fs_flags	= FS_REVAL_DOT,
+};
+
+static int __init init_sdcardfs_fs(void)
+{
+	int err;
+
+	pr_info("Registering sdcardfs " SDCARDFS_VERSION "\n");
+
+	err = sdcardfs_init_inode_cache();
+	if (err)
+		goto out;
+	err = sdcardfs_init_dentry_cache();
+	if (err)
+		goto out;
+	err = packagelist_init();
+	if (err)
+		goto out;
+	err = register_filesystem(&sdcardfs_fs_type);
+out:
+	if (err) {
+		sdcardfs_destroy_inode_cache();
+		sdcardfs_destroy_dentry_cache();
+		packagelist_exit();
+	}
+	return err;
+}
+
+static void __exit exit_sdcardfs_fs(void)
+{
+	sdcardfs_destroy_inode_cache();
+	sdcardfs_destroy_dentry_cache();
+	packagelist_exit();
+	unregister_filesystem(&sdcardfs_fs_type);
+	pr_info("Completed sdcardfs module unload\n");
+}
+
+MODULE_AUTHOR("Erez Zadok, Filesystems and Storage Lab, Stony Brook University"
+	      " (http://www.fsl.cs.sunysb.edu/)");
+MODULE_DESCRIPTION("Wrapfs " SDCARDFS_VERSION
+		   " (http://wrapfs.filesystems.org/)");
+MODULE_LICENSE("GPL");
+
+module_init(init_sdcardfs_fs);
+module_exit(exit_sdcardfs_fs);
diff --git a/fs/sdcardfs/mmap.c b/fs/sdcardfs/mmap.c
new file mode 100644
index 000000000000..c807d7f18f8b
--- /dev/null
+++ b/fs/sdcardfs/mmap.c
@@ -0,0 +1,82 @@
+/*
+ * fs/sdcardfs/mmap.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+
+static int sdcardfs_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	int err;
+	struct file *file, *lower_file;
+	const struct vm_operations_struct *lower_vm_ops;
+	struct vm_area_struct lower_vma;
+
+	memcpy(&lower_vma, vma, sizeof(struct vm_area_struct));
+	file = lower_vma.vm_file;
+	lower_vm_ops = SDCARDFS_F(file)->lower_vm_ops;
+	BUG_ON(!lower_vm_ops);
+
+	lower_file = sdcardfs_lower_file(file);
+	/*
+	 * XXX: vm_ops->fault may be called in parallel.  Because we have to
+	 * resort to temporarily changing the vma->vm_file to point to the
+	 * lower file, a concurrent invocation of sdcardfs_fault could see a
+	 * different value.  In this workaround, we keep a different copy of
+	 * the vma structure in our stack, so we never expose a different
+	 * value of the vma->vm_file called to us, even temporarily.  A
+	 * better fix would be to change the calling semantics of ->fault to
+	 * take an explicit file pointer.
+	 */
+	lower_vma.vm_file = lower_file;
+	err = lower_vm_ops->fault(&lower_vma, vmf);
+	return err;
+}
+
+static ssize_t sdcardfs_direct_IO(int rw, struct kiocb *iocb,
+			      const struct iovec *iov, loff_t offset,
+			      unsigned long nr_segs)
+{
+	/*
+     * This function returns zero on purpose in order to support direct IO.
+	 * __dentry_open checks a_ops->direct_IO and returns EINVAL if it is null.
+     *
+	 * However, this function won't be called by certain file operations
+     * including generic fs functions.  * reads and writes are delivered to
+     * the lower file systems and the direct IOs will be handled by them.
+	 *
+     * NOTE: exceptionally, on the recent kernels (since Linux 3.8.x),
+     * swap_writepage invokes this function directly.
+	 */
+	printk(KERN_INFO "%s, operation is not supported\n", __func__);
+	return 0;
+}
+
+/*
+ * XXX: the default address_space_ops for sdcardfs is empty.  We cannot set
+ * our inode->i_mapping->a_ops to NULL because too many code paths expect
+ * the a_ops vector to be non-NULL.
+ */
+const struct address_space_operations sdcardfs_aops = {
+	/* empty on purpose */
+	.direct_IO	= sdcardfs_direct_IO,
+};
+
+const struct vm_operations_struct sdcardfs_vm_ops = {
+	.fault		= sdcardfs_fault,
+};
diff --git a/fs/sdcardfs/multiuser.h b/fs/sdcardfs/multiuser.h
new file mode 100644
index 000000000000..923ba101dfa9
--- /dev/null
+++ b/fs/sdcardfs/multiuser.h
@@ -0,0 +1,37 @@
+/*
+ * fs/sdcardfs/multiuser.h
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#define MULTIUSER_APP_PER_USER_RANGE 100000
+
+typedef uid_t userid_t;
+typedef uid_t appid_t;
+
+static inline userid_t multiuser_get_user_id(uid_t uid) {
+    return uid / MULTIUSER_APP_PER_USER_RANGE;
+}
+
+static inline appid_t multiuser_get_app_id(uid_t uid) {
+    return uid % MULTIUSER_APP_PER_USER_RANGE;
+}
+
+static inline uid_t multiuser_get_uid(userid_t userId, appid_t appId) {
+    return userId * MULTIUSER_APP_PER_USER_RANGE + (appId % MULTIUSER_APP_PER_USER_RANGE);
+}
+
diff --git a/fs/sdcardfs/packagelist.c b/fs/sdcardfs/packagelist.c
new file mode 100644
index 000000000000..c786d8f92203
--- /dev/null
+++ b/fs/sdcardfs/packagelist.c
@@ -0,0 +1,458 @@
+/*
+ * fs/sdcardfs/packagelist.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+#include "strtok.h"
+#include "hashtable.h"
+#include <linux/syscalls.h>
+#include <linux/kthread.h>
+#include <linux/inotify.h>
+#include <linux/delay.h>
+
+#define STRING_BUF_SIZE		(512)
+
+struct hashtable_entry {
+        struct hlist_node hlist;
+        void *key;
+	int value;
+};
+
+struct packagelist_data {
+	DECLARE_HASHTABLE(package_to_appid,8);
+	DECLARE_HASHTABLE(appid_with_rw,7);
+	struct mutex hashtable_lock;
+	struct task_struct *thread_id;
+	gid_t write_gid;
+	char *strtok_last;
+	char read_buf[STRING_BUF_SIZE];
+	char event_buf[STRING_BUF_SIZE];
+	char app_name_buf[STRING_BUF_SIZE];
+	char gids_buf[STRING_BUF_SIZE];
+};
+
+static struct kmem_cache *hashtable_entry_cachep;
+
+/* Path to system-provided mapping of package name to appIds */
+static const char* const kpackageslist_file = "/data/system/packages.list";
+/* Supplementary groups to execute with */
+static const gid_t kgroups[1] = { AID_PACKAGE_INFO };
+
+static unsigned int str_hash(void *key) {
+	int i;
+	unsigned int h = strlen(key);
+	char *data = (char *)key;
+
+	for (i = 0; i < strlen(key); i++) {
+		h = h * 31 + *data;
+		data++;
+	}
+	return h;
+}
+
+static int contain_appid_key(struct packagelist_data *pkgl_dat, void *appid) {
+        struct hashtable_entry *hash_cur;
+	struct hlist_node *h_n;
+
+        hash_for_each_possible(pkgl_dat->appid_with_rw,	hash_cur, hlist, (unsigned int)appid, h_n)
+                if (appid == hash_cur->key)
+                        return 1;
+	return 0;
+}
+
+/* Return if the calling UID holds sdcard_rw. */
+int get_caller_has_rw_locked(void *pkgl_id, derive_t derive) {
+	struct packagelist_data *pkgl_dat = (struct packagelist_data *)pkgl_id;
+	appid_t appid;
+	int ret;
+
+	/* No additional permissions enforcement */
+	if (derive == DERIVE_NONE) {
+		return 1;
+	}
+
+	appid = multiuser_get_app_id(current_fsuid());
+	mutex_lock(&pkgl_dat->hashtable_lock);
+	ret = contain_appid_key(pkgl_dat, (void *)appid);
+	mutex_unlock(&pkgl_dat->hashtable_lock);
+	return ret;
+}
+
+appid_t get_appid(void *pkgl_id, const char *app_name)
+{
+	struct packagelist_data *pkgl_dat = (struct packagelist_data *)pkgl_id;
+	struct hashtable_entry *hash_cur;
+	struct hlist_node *h_n;
+	unsigned int hash = str_hash((void *)app_name);
+	appid_t ret_id;
+
+	//printk(KERN_INFO "sdcardfs: %s: %s, %u\n", __func__, (char *)app_name, hash);
+	mutex_lock(&pkgl_dat->hashtable_lock);
+	hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash, h_n) {
+		//printk(KERN_INFO "sdcardfs: %s: %s\n", __func__, (char *)hash_cur->key);
+		if (!strcasecmp(app_name, hash_cur->key)) {
+			ret_id = (appid_t)hash_cur->value;
+			mutex_unlock(&pkgl_dat->hashtable_lock);
+			//printk(KERN_INFO "=> app_id: %d\n", (int)ret_id);
+			return ret_id;
+		}
+	}
+	mutex_unlock(&pkgl_dat->hashtable_lock);
+	//printk(KERN_INFO "=> app_id: %d\n", 0);
+	return 0;
+}
+
+/* Kernel has already enforced everything we returned through
+ * derive_permissions_locked(), so this is used to lock down access
+ * even further, such as enforcing that apps hold sdcard_rw. */
+int check_caller_access_to_name(struct inode *parent_node, const char* name,
+					derive_t derive, int w_ok, int has_rw) {
+
+	/* Always block security-sensitive files at root */
+	if (parent_node && SDCARDFS_I(parent_node)->perm == PERM_ROOT) {
+		if (!strcasecmp(name, "autorun.inf")
+			|| !strcasecmp(name, ".android_secure")
+			|| !strcasecmp(name, "android_secure")) {
+			return 0;
+		}
+	}
+
+	/* No additional permissions enforcement */
+	if (derive == DERIVE_NONE) {
+		return 1;
+	}
+
+	/* Root always has access; access for any other UIDs should always
+	 * be controlled through packages.list. */
+	if (current_fsuid() == 0) {
+		return 1;
+	}
+
+	/* If asking to write, verify that caller either owns the
+	 * parent or holds sdcard_rw. */
+	if (w_ok) {
+		if (parent_node &&
+			(current_fsuid() == SDCARDFS_I(parent_node)->d_uid)) {
+			return 1;
+		}
+		return has_rw;
+	}
+
+	/* No extra permissions to enforce */
+	return 1;
+}
+
+/* This function is used when file opening. The open flags must be
+ * checked before calling check_caller_access_to_name() */
+int open_flags_to_access_mode(int open_flags) {
+	if((open_flags & O_ACCMODE) == O_RDONLY) {
+		return 0; /* R_OK */
+	} else if ((open_flags & O_ACCMODE) == O_WRONLY) {
+		return 1; /* W_OK */
+	} else {
+		/* Probably O_RDRW, but treat as default to be safe */
+		return 1; /* R_OK | W_OK */
+	}
+}
+
+static int insert_str_to_int(struct packagelist_data *pkgl_dat, void *key, int value) {
+	struct hashtable_entry *hash_cur;
+	struct hashtable_entry *new_entry;
+	struct hlist_node *h_n;
+	unsigned int hash = str_hash(key);
+
+	//printk(KERN_INFO "sdcardfs: %s: %s: %d, %u\n", __func__, (char *)key, value, hash);
+	hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash, h_n) {
+		if (!strcasecmp(key, hash_cur->key)) {
+			hash_cur->value = value;
+			return 0;
+		}
+	}
+	new_entry = kmem_cache_alloc(hashtable_entry_cachep, GFP_KERNEL);
+	if (!new_entry)
+		return -ENOMEM;
+	new_entry->key = kstrdup(key, GFP_KERNEL);
+	new_entry->value = value;
+	hash_add(pkgl_dat->package_to_appid, &new_entry->hlist, hash);
+	return 0;
+}
+
+static void remove_str_to_int(struct hashtable_entry *h_entry) {
+	//printk(KERN_INFO "sdcardfs: %s: %s: %d\n", __func__, (char *)h_entry->key, h_entry->value);
+	kfree(h_entry->key);
+	kmem_cache_free(hashtable_entry_cachep, h_entry);
+}
+
+static int insert_int_to_null(struct packagelist_data *pkgl_dat, void *key, int value) {
+	struct hashtable_entry *hash_cur;
+	struct hashtable_entry *new_entry;
+	struct hlist_node *h_n;
+
+	//printk(KERN_INFO "sdcardfs: %s: %d: %d\n", __func__, (int)key, value);
+	hash_for_each_possible(pkgl_dat->appid_with_rw,	hash_cur, hlist,
+					(unsigned int)key, h_n) {
+		if (key == hash_cur->key) {
+			hash_cur->value = value;
+			return 0;
+		}
+	}
+	new_entry = kmem_cache_alloc(hashtable_entry_cachep, GFP_KERNEL);
+	if (!new_entry)
+		return -ENOMEM;
+	new_entry->key = key;
+	new_entry->value = value;
+	hash_add(pkgl_dat->appid_with_rw, &new_entry->hlist,
+			(unsigned int)new_entry->key);
+	return 0;
+}
+
+static void remove_int_to_null(struct hashtable_entry *h_entry) {
+	//printk(KERN_INFO "sdcardfs: %s: %d: %d\n", __func__, (int)h_entry->key, h_entry->value);
+	kmem_cache_free(hashtable_entry_cachep, h_entry);
+}
+
+static void remove_all_hashentrys(struct packagelist_data *pkgl_dat)
+{
+	struct hashtable_entry *hash_cur;
+	struct hlist_node *h_n;
+	struct hlist_node *h_t;
+	int i;
+
+	hash_for_each_safe(pkgl_dat->package_to_appid, i, h_t, hash_cur, hlist, h_n)
+		remove_str_to_int(hash_cur);
+	hash_for_each_safe(pkgl_dat->appid_with_rw, i, h_t, hash_cur, hlist, h_n)
+                remove_int_to_null(hash_cur);
+
+	hash_init(pkgl_dat->package_to_appid);
+	hash_init(pkgl_dat->appid_with_rw);
+}
+
+static int read_package_list(struct packagelist_data *pkgl_dat) {
+	int ret;
+	int fd;
+	int read_amount;
+
+	printk(KERN_INFO "sdcardfs: read_package_list\n");
+
+	mutex_lock(&pkgl_dat->hashtable_lock);
+
+	remove_all_hashentrys(pkgl_dat);
+
+	fd = sys_open(kpackageslist_file, O_RDONLY, 0);
+	if (fd < 0) {
+		printk(KERN_ERR "sdcardfs: failed to open package list\n");
+		mutex_unlock(&pkgl_dat->hashtable_lock);
+		return fd;
+	}
+
+	while ((read_amount = sys_read(fd, pkgl_dat->read_buf,
+					sizeof(pkgl_dat->read_buf))) > 0) {
+		int appid;
+		char *token;
+		int one_line_len = 0;
+		int additional_read;
+		unsigned long ret_gid;
+
+		while (one_line_len < read_amount) {
+			if (pkgl_dat->read_buf[one_line_len] == '\n') {
+				one_line_len++;
+				break;
+			}
+			one_line_len++;
+		}
+		additional_read = read_amount - one_line_len;
+		if (additional_read > 0)
+			sys_lseek(fd, -additional_read, SEEK_CUR);
+
+		if (sscanf(pkgl_dat->read_buf, "%s %d %*d %*s %*s %s",
+				pkgl_dat->app_name_buf, &appid,
+				pkgl_dat->gids_buf) == 3) {
+			ret = insert_str_to_int(pkgl_dat, pkgl_dat->app_name_buf, appid);
+			if (ret) {
+				sys_close(fd);
+				mutex_unlock(&pkgl_dat->hashtable_lock);
+				return ret;
+			}
+
+			token = strtok_r(pkgl_dat->gids_buf, ",", &pkgl_dat->strtok_last);
+			while (token != NULL) {
+				if (!kstrtoul(token, 10, &ret_gid) &&
+						(ret_gid == pkgl_dat->write_gid)) {
+					ret = insert_int_to_null(pkgl_dat, (void *)appid, 1);
+					if (ret) {
+						sys_close(fd);
+						mutex_unlock(&pkgl_dat->hashtable_lock);
+						return ret;
+					}
+					break;
+				}
+				token = strtok_r(NULL, ",", &pkgl_dat->strtok_last);
+			}
+		}
+	}
+
+	sys_close(fd);
+	mutex_unlock(&pkgl_dat->hashtable_lock);
+	return 0;
+}
+
+static int packagelist_reader(void *thread_data)
+{
+	struct packagelist_data *pkgl_dat = (struct packagelist_data *)thread_data;
+	struct inotify_event *event;
+	bool active = false;
+	int event_pos;
+	int event_size;
+	int res = 0;
+	int nfd;
+
+	allow_signal(SIGINT);
+
+	nfd = sys_inotify_init();
+	if (nfd < 0) {
+		printk(KERN_ERR "sdcardfs: inotify_init failed: %d\n", nfd);
+		return nfd;
+	}
+
+	while (!kthread_should_stop()) {
+		if (signal_pending(current)) {
+			ssleep(1);
+			continue;
+		}
+
+		if (!active) {
+			res = sys_inotify_add_watch(nfd, kpackageslist_file, IN_DELETE_SELF);
+			if (res < 0) {
+				if (res == -ENOENT || res == -EACCES) {
+				/* Framework may not have created yet, sleep and retry */
+					printk(KERN_ERR "sdcardfs: missing packages.list; retrying\n");
+					ssleep(2);
+					printk(KERN_ERR "sdcardfs: missing packages.list_end; retrying\n");
+					continue;
+				} else {
+					printk(KERN_ERR "sdcardfs: inotify_add_watch failed: %d\n", res);
+					goto interruptable_sleep;
+				}
+			}
+			/* Watch above will tell us about any future changes, so
+			 * read the current state. */
+			res = read_package_list(pkgl_dat);
+			if (res) {
+				printk(KERN_ERR "sdcardfs: read_package_list failed: %d\n", res);
+				goto interruptable_sleep;
+			}
+			active = true;
+		}
+
+		event_pos = 0;
+		res = sys_read(nfd, pkgl_dat->event_buf, sizeof(pkgl_dat->event_buf));
+		if (res < (int) sizeof(*event)) {
+			if (res == -EINTR)
+				continue;
+			printk(KERN_ERR "sdcardfs: failed to read inotify event: %d\n", res);
+			goto interruptable_sleep;
+		}
+
+		while (res >= (int) sizeof(*event)) {
+			event = (struct inotify_event *) (pkgl_dat->event_buf + event_pos);
+
+			printk(KERN_INFO "sdcardfs: inotify event: %08x\n", event->mask);
+			if ((event->mask & IN_IGNORED) == IN_IGNORED) {
+				/* Previously watched file was deleted, probably due to move
+				 * that swapped in new data; re-arm the watch and read. */
+				active = false;
+			}
+
+			event_size = sizeof(*event) + event->len;
+			res -= event_size;
+			event_pos += event_size;
+		}
+		continue;
+
+interruptable_sleep:
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule();
+	}
+	flush_signals(current);
+	sys_close(nfd);
+	return res;
+}
+
+void * packagelist_create(gid_t write_gid)
+{
+	struct packagelist_data *pkgl_dat;
+        struct task_struct *packagelist_thread;
+
+	pkgl_dat = kmalloc(sizeof(*pkgl_dat), GFP_KERNEL | __GFP_ZERO);
+	if (!pkgl_dat) {
+                printk(KERN_ERR "sdcardfs: creating kthread failed\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	mutex_init(&pkgl_dat->hashtable_lock);
+	hash_init(pkgl_dat->package_to_appid);
+	hash_init(pkgl_dat->appid_with_rw);
+	pkgl_dat->write_gid = write_gid;
+
+        packagelist_thread = kthread_run(packagelist_reader, (void *)pkgl_dat, "pkgld");
+        if (IS_ERR(packagelist_thread)) {
+                printk(KERN_ERR "sdcardfs: creating kthread failed\n");
+		kfree(pkgl_dat);
+		return packagelist_thread;
+        }
+	pkgl_dat->thread_id = packagelist_thread;
+
+	printk(KERN_INFO "sdcardfs: created packagelist pkgld/%d\n",
+				(int)pkgl_dat->thread_id->pid);
+
+	return (void *)pkgl_dat;
+}
+
+void packagelist_destroy(void *pkgl_id)
+{
+	struct packagelist_data *pkgl_dat = (struct packagelist_data *)pkgl_id;
+	pid_t pkgl_pid = pkgl_dat->thread_id->pid;
+
+	force_sig_info(SIGINT, SEND_SIG_PRIV, pkgl_dat->thread_id);
+	kthread_stop(pkgl_dat->thread_id);
+	remove_all_hashentrys(pkgl_dat);
+	printk(KERN_INFO "sdcardfs: destroyed packagelist pkgld/%d\n", (int)pkgl_pid);
+	kfree(pkgl_dat);
+}
+
+int packagelist_init(void)
+{
+	hashtable_entry_cachep =
+		kmem_cache_create("packagelist_hashtable_entry",
+					sizeof(struct hashtable_entry), 0, 0, NULL);
+	if (!hashtable_entry_cachep) {
+		printk(KERN_ERR "sdcardfs: failed creating pkgl_hashtable entry slab cache\n");
+		return -ENOMEM;
+	}
+
+        return 0;
+}
+
+void packagelist_exit(void)
+{
+	if (hashtable_entry_cachep)
+		kmem_cache_destroy(hashtable_entry_cachep);
+}
+
+
diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h
new file mode 100644
index 000000000000..90f8b24e4a52
--- /dev/null
+++ b/fs/sdcardfs/sdcardfs.h
@@ -0,0 +1,493 @@
+/*
+ * fs/sdcardfs/sdcardfs.h
+ *
+ * The sdcardfs v2.0
+ *   This file system replaces the sdcard daemon on Android
+ *   On version 2.0, some of the daemon functions have been ported
+ *   to support the multi-user concepts of Android 4.4
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#ifndef _SDCARDFS_H_
+#define _SDCARDFS_H_
+
+#include <linux/dcache.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/seq_file.h>
+#include <linux/statfs.h>
+#include <linux/fs_stack.h>
+#include <linux/magic.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/security.h>
+#include <linux/string.h>
+#include "multiuser.h"
+
+/* the file system name */
+#define SDCARDFS_NAME "sdcardfs"
+
+/* sdcardfs root inode number */
+#define SDCARDFS_ROOT_INO     1
+
+/* useful for tracking code reachability */
+#define UDBG printk(KERN_DEFAULT "DBG:%s:%s:%d\n", __FILE__, __func__, __LINE__)
+
+#define SDCARDFS_DIRENT_SIZE 256
+
+/* temporary static uid settings for development */
+#define AID_ROOT             0	/* uid for accessing /mnt/sdcard & extSdcard */
+#define AID_MEDIA_RW      1023	/* internal media storage write access */
+
+#define AID_SDCARD_RW     1015	/* external storage write access */
+#define AID_SDCARD_R      1028	/* external storage read access */
+#define AID_SDCARD_PICS   1033	/* external storage photos access */
+#define AID_SDCARD_AV     1034	/* external storage audio/video access */
+#define AID_SDCARD_ALL    1035	/* access all users external storage */
+
+#define AID_PACKAGE_INFO  1027
+
+#define fix_derived_permission(x)	\
+	do {						\
+		(x)->i_uid = SDCARDFS_I(x)->d_uid;	\
+		(x)->i_gid = SDCARDFS_I(x)->d_gid;	\
+		(x)->i_mode = ((x)->i_mode & S_IFMT) | SDCARDFS_I(x)->d_mode;\
+	} while (0)
+
+/* OVERRIDE_CRED() and REVERT_CRED()
+ * 	OVERRID_CRED()
+ * 		backup original task->cred
+ * 		and modifies task->cred->fsuid/fsgid to specified value.
+ *	REVERT_CRED()
+ * 		restore original task->cred->fsuid/fsgid.
+ * These two macro should be used in pair, and OVERRIDE_CRED() should be
+ * placed at the beginning of a function, right after variable declaration.
+ */
+#define OVERRIDE_CRED(sdcardfs_sbi, saved_cred)		\
+	saved_cred = override_fsids(sdcardfs_sbi);	\
+	if (!saved_cred) { return -ENOMEM; }
+
+#define OVERRIDE_CRED_PTR(sdcardfs_sbi, saved_cred)	\
+	saved_cred = override_fsids(sdcardfs_sbi);	\
+	if (!saved_cred) { return ERR_PTR(-ENOMEM); }
+
+#define REVERT_CRED(saved_cred)	revert_fsids(saved_cred)
+
+#define DEBUG_CRED()		\
+	printk("KAKJAGI: %s:%d fsuid %d fsgid %d\n", 	\
+		__FUNCTION__, __LINE__, 		\
+		(int)current->cred->fsuid, 		\
+		(int)current->cred->fsgid);
+
+/* Android 4.4 support */
+
+/* Permission mode for a specific node. Controls how file permissions
+ * are derived for children nodes. */
+typedef enum {
+	/* Nothing special; this node should just inherit from its parent. */
+	PERM_INHERIT,
+	/* This node is one level above a normal root; used for legacy layouts
+	 * which use the first level to represent user_id. */
+	PERM_LEGACY_PRE_ROOT,
+	/* This node is "/" */
+	PERM_ROOT,
+	/* This node is "/Android" */
+	PERM_ANDROID,
+	/* This node is "/Android/data" */
+	PERM_ANDROID_DATA,
+	/* This node is "/Android/obb" */
+	PERM_ANDROID_OBB,
+	/* This node is "/Android/user" */
+	PERM_ANDROID_USER,
+} perm_t;
+
+/* Permissions structure to derive */
+typedef enum {
+	DERIVE_NONE,
+	DERIVE_LEGACY,
+	DERIVE_UNIFIED,
+} derive_t;
+
+typedef enum {
+	LOWER_FS_EXT4,
+	LOWER_FS_FAT,
+} lower_fs_t;
+
+struct sdcardfs_sb_info;
+struct sdcardfs_mount_options;
+
+/* Do not directly use this function. Use OVERRIDE_CRED() instead. */
+const struct cred * override_fsids(struct sdcardfs_sb_info* sbi);
+/* Do not directly use this function, use REVERT_CRED() instead. */
+void revert_fsids(const struct cred * old_cred);
+
+/* operations vectors defined in specific files */
+extern const struct file_operations sdcardfs_main_fops;
+extern const struct file_operations sdcardfs_dir_fops;
+extern const struct inode_operations sdcardfs_main_iops;
+extern const struct inode_operations sdcardfs_dir_iops;
+extern const struct inode_operations sdcardfs_symlink_iops;
+extern const struct super_operations sdcardfs_sops;
+extern const struct dentry_operations sdcardfs_ci_dops;
+extern const struct address_space_operations sdcardfs_aops, sdcardfs_dummy_aops;
+extern const struct vm_operations_struct sdcardfs_vm_ops;
+
+extern int sdcardfs_init_inode_cache(void);
+extern void sdcardfs_destroy_inode_cache(void);
+extern int sdcardfs_init_dentry_cache(void);
+extern void sdcardfs_destroy_dentry_cache(void);
+extern int new_dentry_private_data(struct dentry *dentry);
+extern void free_dentry_private_data(struct dentry *dentry);
+extern struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry,
+				    struct nameidata *nd);
+extern int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb,
+			    struct path *lower_path);
+
+/* file private data */
+struct sdcardfs_file_info {
+	struct file *lower_file;
+	const struct vm_operations_struct *lower_vm_ops;
+};
+
+/* sdcardfs inode data in memory */
+struct sdcardfs_inode_info {
+	struct inode *lower_inode;
+	/* state derived based on current position in hierachy
+	 * caution: d_mode does not include file types
+	 */
+	perm_t perm;
+	userid_t userid;
+	uid_t d_uid;
+	gid_t d_gid;
+	mode_t d_mode;
+
+	struct inode vfs_inode;
+};
+
+/* sdcardfs dentry data in memory */
+struct sdcardfs_dentry_info {
+	spinlock_t lock;	/* protects lower_path */
+	struct path lower_path;
+	struct path orig_path;
+};
+
+struct sdcardfs_mount_options {
+	uid_t fs_low_uid;
+	gid_t fs_low_gid;
+	gid_t write_gid;
+	int split_perms;
+	derive_t derive;
+	lower_fs_t lower_fs;
+	unsigned int reserved_mb;
+};
+
+/* sdcardfs super-block data in memory */
+struct sdcardfs_sb_info {
+	struct super_block *lower_sb;
+	/* derived perm policy : some of options have been added
+	 * to sdcardfs_mount_options (Android 4.4 support) */
+	struct sdcardfs_mount_options options;
+	spinlock_t lock;	/* protects obbpath */
+	char *obbpath_s;
+	struct path obbpath;
+	void *pkgl_id;
+};
+
+/*
+ * inode to private data
+ *
+ * Since we use containers and the struct inode is _inside_ the
+ * sdcardfs_inode_info structure, SDCARDFS_I will always (given a non-NULL
+ * inode pointer), return a valid non-NULL pointer.
+ */
+static inline struct sdcardfs_inode_info *SDCARDFS_I(const struct inode *inode)
+{
+	return container_of(inode, struct sdcardfs_inode_info, vfs_inode);
+}
+
+/* dentry to private data */
+#define SDCARDFS_D(dent) ((struct sdcardfs_dentry_info *)(dent)->d_fsdata)
+
+/* superblock to private data */
+#define SDCARDFS_SB(super) ((struct sdcardfs_sb_info *)(super)->s_fs_info)
+
+/* file to private Data */
+#define SDCARDFS_F(file) ((struct sdcardfs_file_info *)((file)->private_data))
+
+/* file to lower file */
+static inline struct file *sdcardfs_lower_file(const struct file *f)
+{
+	return SDCARDFS_F(f)->lower_file;
+}
+
+static inline void sdcardfs_set_lower_file(struct file *f, struct file *val)
+{
+	SDCARDFS_F(f)->lower_file = val;
+}
+
+/* inode to lower inode. */
+static inline struct inode *sdcardfs_lower_inode(const struct inode *i)
+{
+	return SDCARDFS_I(i)->lower_inode;
+}
+
+static inline void sdcardfs_set_lower_inode(struct inode *i, struct inode *val)
+{
+	SDCARDFS_I(i)->lower_inode = val;
+}
+
+/* superblock to lower superblock */
+static inline struct super_block *sdcardfs_lower_super(
+	const struct super_block *sb)
+{
+	return SDCARDFS_SB(sb)->lower_sb;
+}
+
+static inline void sdcardfs_set_lower_super(struct super_block *sb,
+					  struct super_block *val)
+{
+	SDCARDFS_SB(sb)->lower_sb = val;
+}
+
+/* path based (dentry/mnt) macros */
+static inline void pathcpy(struct path *dst, const struct path *src)
+{
+	dst->dentry = src->dentry;
+	dst->mnt = src->mnt;
+}
+
+/* sdcardfs_get_pname functions calls path_get()
+ * therefore, the caller must call "proper" path_put functions
+ */
+#define SDCARDFS_DENT_FUNC(pname) \
+static inline void sdcardfs_get_##pname(const struct dentry *dent, \
+					struct path *pname) \
+{ \
+	spin_lock(&SDCARDFS_D(dent)->lock); \
+	pathcpy(pname, &SDCARDFS_D(dent)->pname); \
+	path_get(pname); \
+	spin_unlock(&SDCARDFS_D(dent)->lock); \
+	return; \
+} \
+static inline void sdcardfs_put_##pname(const struct dentry *dent, \
+					struct path *pname) \
+{ \
+	path_put(pname); \
+	return; \
+} \
+static inline void sdcardfs_set_##pname(const struct dentry *dent, \
+					struct path *pname) \
+{ \
+	spin_lock(&SDCARDFS_D(dent)->lock); \
+	pathcpy(&SDCARDFS_D(dent)->pname, pname); \
+	spin_unlock(&SDCARDFS_D(dent)->lock); \
+	return; \
+} \
+static inline void sdcardfs_reset_##pname(const struct dentry *dent) \
+{ \
+	spin_lock(&SDCARDFS_D(dent)->lock); \
+	SDCARDFS_D(dent)->pname.dentry = NULL; \
+	SDCARDFS_D(dent)->pname.mnt = NULL; \
+	spin_unlock(&SDCARDFS_D(dent)->lock); \
+	return; \
+} \
+static inline void sdcardfs_put_reset_##pname(const struct dentry *dent) \
+{ \
+	struct path pname; \
+	spin_lock(&SDCARDFS_D(dent)->lock); \
+	if(SDCARDFS_D(dent)->pname.dentry) { \
+		pathcpy(&pname, &SDCARDFS_D(dent)->pname); \
+		SDCARDFS_D(dent)->pname.dentry = NULL; \
+		SDCARDFS_D(dent)->pname.mnt = NULL; \
+		spin_unlock(&SDCARDFS_D(dent)->lock); \
+		path_put(&pname); \
+	} else \
+		spin_unlock(&SDCARDFS_D(dent)->lock); \
+	return; \
+}
+
+SDCARDFS_DENT_FUNC(lower_path)
+SDCARDFS_DENT_FUNC(orig_path)
+
+static inline int has_graft_path(const struct dentry *dent)
+{
+	int ret = 0;
+
+	spin_lock(&SDCARDFS_D(dent)->lock);
+	if (SDCARDFS_D(dent)->orig_path.dentry != NULL)
+		ret = 1;
+	spin_unlock(&SDCARDFS_D(dent)->lock);
+
+	return ret;
+}
+
+static inline void sdcardfs_get_real_lower(const struct dentry *dent,
+						struct path *real_lower)
+{
+	/* in case of a local obb dentry
+	 * the orig_path should be returned
+	 */
+	if(has_graft_path(dent))
+		sdcardfs_get_orig_path(dent, real_lower);
+	else
+		sdcardfs_get_lower_path(dent, real_lower);
+}
+
+static inline void sdcardfs_put_real_lower(const struct dentry *dent,
+						struct path *real_lower)
+{
+	if(has_graft_path(dent))
+		sdcardfs_put_orig_path(dent, real_lower);
+	else
+		sdcardfs_put_lower_path(dent, real_lower);
+}
+
+/* for packagelist.c */
+extern int get_caller_has_rw_locked(void *pkgl_id, derive_t derive);
+extern appid_t get_appid(void *pkgl_id, const char *app_name);
+extern int check_caller_access_to_name(struct inode *parent_node, const char* name,
+                                        derive_t derive, int w_ok, int has_rw);
+extern int open_flags_to_access_mode(int open_flags);
+extern void * packagelist_create(gid_t write_gid);
+extern void packagelist_destroy(void *pkgl_id);
+extern int packagelist_init(void);
+extern void packagelist_exit(void);
+
+/* for derived_perm.c */
+extern void setup_derived_state(struct inode *inode, perm_t perm,
+			userid_t userid, uid_t uid, gid_t gid, mode_t mode);
+extern void get_derived_permission(struct dentry *parent, struct dentry *dentry);
+extern void update_derived_permission(struct dentry *dentry);
+extern int need_graft_path(struct dentry *dentry);
+extern int is_base_obbpath(struct dentry *dentry);
+extern int is_obbpath_invalid(struct dentry *dentry);
+extern int setup_obb_dentry(struct dentry *dentry, struct path *lower_path);
+
+/* locking helpers */
+static inline struct dentry *lock_parent(struct dentry *dentry)
+{
+	struct dentry *dir = dget_parent(dentry);
+	mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
+	return dir;
+}
+
+static inline void unlock_dir(struct dentry *dir)
+{
+	mutex_unlock(&dir->d_inode->i_mutex);
+	dput(dir);
+}
+
+static inline int prepare_dir(const char *path_s, uid_t uid, gid_t gid, mode_t mode)
+{
+	int err;
+	struct dentry *dent;
+	struct iattr attrs;
+	struct nameidata nd;
+
+	err = kern_path_parent(path_s, &nd);
+	if (err) {
+		if (err == -EEXIST)
+			err = 0;
+		goto out;
+	}
+
+	dent = lookup_create(&nd, 1);
+	if (IS_ERR(dent)) {
+		err = PTR_ERR(dent);
+		if (err == -EEXIST)
+			err = 0;
+		goto out_unlock;
+	}
+
+	err = vfs_mkdir(nd.path.dentry->d_inode, dent, mode);
+	if (err) {
+		if (err == -EEXIST)
+			err = 0;
+		goto out_dput;
+	}
+
+	attrs.ia_uid = uid;
+	attrs.ia_gid = gid;
+	attrs.ia_valid = ATTR_UID | ATTR_GID;
+	mutex_lock(&dent->d_inode->i_mutex);
+	notify_change(dent, &attrs);
+	mutex_unlock(&dent->d_inode->i_mutex);
+
+out_dput:
+	dput(dent);
+
+out_unlock:
+	/* parent dentry locked by lookup_create */
+	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+	path_put(&nd.path);
+
+out:
+	return err;
+}
+
+/*
+ * Return 1, if a disk has enough free space, otherwise 0.
+ * We assume that any files can not be overwritten.
+ */
+static inline int check_min_free_space(struct dentry *dentry, size_t size, int dir)
+{
+	int err;
+	struct path lower_path;
+	struct kstatfs statfs;
+	u64 avail;
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+
+	if (sbi->options.reserved_mb) {
+		/* Get fs stat of lower filesystem. */
+		sdcardfs_get_lower_path(dentry, &lower_path);
+		err = vfs_statfs(&lower_path, &statfs);
+		sdcardfs_put_lower_path(dentry, &lower_path);
+
+		if (unlikely(err))
+			return 0;
+
+		/* Invalid statfs informations. */
+		if (unlikely(statfs.f_bsize == 0))
+			return 0;
+
+		/* if you are checking directory, set size to f_bsize. */
+		if (unlikely(dir))
+			size = statfs.f_bsize;
+
+		/* available size */
+		avail = statfs.f_bavail * statfs.f_bsize;
+
+		/* not enough space */
+		if ((u64)size > avail)
+			return 0;
+
+		/* enough space */
+		if ((avail - size) > (sbi->options.reserved_mb * 1024 * 1024))
+			return 1;
+
+		return 0;
+	} else
+		return 1;
+}
+
+#endif	/* not _SDCARDFS_H_ */
diff --git a/fs/sdcardfs/strtok.h b/fs/sdcardfs/strtok.h
new file mode 100644
index 000000000000..50ab25aa0bc4
--- /dev/null
+++ b/fs/sdcardfs/strtok.h
@@ -0,0 +1,75 @@
+/*
+ * fs/sdcardfs/strtok.h
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+static char *
+strtok_r(char *s, const char *delim, char **last)
+{
+        char *spanp;
+        int c, sc;
+        char *tok;
+
+
+        /* if (s == NULL && (s = *last) == NULL)
+                return NULL;     */
+        if (s == NULL) {
+                s = *last;
+                if (s == NULL)
+                        return NULL;
+        }
+
+        /*
+         * Skip (span) leading delimiters (s += strspn(s, delim), sort of).
+         */
+cont:
+        c = *s++;
+        for (spanp = (char *)delim; (sc = *spanp++) != 0;) {
+                if (c == sc)
+                        goto cont;
+        }
+
+        if (c == 0) {           /* no non-delimiter characters */
+                *last = NULL;
+                return NULL;
+        }
+        tok = s - 1;
+
+        /*
+         * Scan token (scan for delimiters: s += strcspn(s, delim), sort of).
+         * Note that delim must have one NUL; we stop if we see that, too.
+         */
+        for (;;) {
+                c = *s++;
+                spanp = (char *)delim;
+                do {
+                        sc = *spanp++;
+                        if (sc == c) {
+                                if (c == 0)
+                                        s = NULL;
+                                else
+                                        s[-1] = 0;
+                                *last = s;
+                                return tok;
+                        }
+                } while (sc != 0);
+        }
+
+        /* NOTREACHED */
+}
+
diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c
new file mode 100644
index 000000000000..1d206c82dfdf
--- /dev/null
+++ b/fs/sdcardfs/super.c
@@ -0,0 +1,229 @@
+/*
+ * fs/sdcardfs/super.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+
+/*
+ * The inode cache is used with alloc_inode for both our inode info and the
+ * vfs inode.
+ */
+static struct kmem_cache *sdcardfs_inode_cachep;
+
+/* final actions when unmounting a file system */
+static void sdcardfs_put_super(struct super_block *sb)
+{
+	struct sdcardfs_sb_info *spd;
+	struct super_block *s;
+
+	spd = SDCARDFS_SB(sb);
+	if (!spd)
+		return;
+
+	if(spd->obbpath_s) {
+		kfree(spd->obbpath_s);
+		path_put(&spd->obbpath);
+	}
+
+	/* decrement lower super references */
+	s = sdcardfs_lower_super(sb);
+	sdcardfs_set_lower_super(sb, NULL);
+	atomic_dec(&s->s_active);
+
+	if(spd->pkgl_id)
+		packagelist_destroy(spd->pkgl_id);
+
+	kfree(spd);
+	sb->s_fs_info = NULL;
+}
+
+static int sdcardfs_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+	int err;
+	struct path lower_path;
+	u32 min_blocks;
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+
+	sdcardfs_get_lower_path(dentry, &lower_path);
+	err = vfs_statfs(&lower_path, buf);
+	sdcardfs_put_lower_path(dentry, &lower_path);
+
+	if (sbi->options.reserved_mb) {
+		/* Invalid statfs informations. */
+		if (buf->f_bsize == 0) {
+			printk(KERN_ERR "Returned block size is zero.\n");
+			return -EINVAL;
+		}
+
+		min_blocks = ((sbi->options.reserved_mb * 1024 * 1024)/buf->f_bsize);
+		buf->f_blocks -= min_blocks;
+
+		if (buf->f_bavail > min_blocks)
+			buf->f_bavail -= min_blocks;
+		else
+			buf->f_bavail = 0;
+
+		/* Make reserved blocks invisiable to media storage */
+		buf->f_bfree = buf->f_bavail;
+	}
+
+	/* set return buf to our f/s to avoid confusing user-level utils */
+	buf->f_type = SDCARDFS_SUPER_MAGIC;
+
+	return err;
+}
+
+/*
+ * @flags: numeric mount options
+ * @options: mount options string
+ */
+static int sdcardfs_remount_fs(struct super_block *sb, int *flags, char *options)
+{
+	int err = 0;
+
+	/*
+	 * The VFS will take care of "ro" and "rw" flags among others.  We
+	 * can safely accept a few flags (RDONLY, MANDLOCK), and honor
+	 * SILENT, but anything else left over is an error.
+	 */
+	if ((*flags & ~(MS_RDONLY | MS_MANDLOCK | MS_SILENT)) != 0) {
+		printk(KERN_ERR
+		       "sdcardfs: remount flags 0x%x unsupported\n", *flags);
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
+/*
+ * Called by iput() when the inode reference count reached zero
+ * and the inode is not hashed anywhere.  Used to clear anything
+ * that needs to be, before the inode is completely destroyed and put
+ * on the inode free list.
+ */
+static void sdcardfs_evict_inode(struct inode *inode)
+{
+	struct inode *lower_inode;
+
+	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
+	/*
+	 * Decrement a reference to a lower_inode, which was incremented
+	 * by our read_inode when it was created initially.
+	 */
+	lower_inode = sdcardfs_lower_inode(inode);
+	sdcardfs_set_lower_inode(inode, NULL);
+	iput(lower_inode);
+}
+
+static struct inode *sdcardfs_alloc_inode(struct super_block *sb)
+{
+	struct sdcardfs_inode_info *i;
+
+	i = kmem_cache_alloc(sdcardfs_inode_cachep, GFP_KERNEL);
+	if (!i)
+		return NULL;
+
+	/* memset everything up to the inode to 0 */
+	memset(i, 0, offsetof(struct sdcardfs_inode_info, vfs_inode));
+
+	i->vfs_inode.i_version = 1;
+	return &i->vfs_inode;
+}
+
+static void sdcardfs_destroy_inode(struct inode *inode)
+{
+	kmem_cache_free(sdcardfs_inode_cachep, SDCARDFS_I(inode));
+}
+
+/* sdcardfs inode cache constructor */
+static void init_once(void *obj)
+{
+	struct sdcardfs_inode_info *i = obj;
+
+	inode_init_once(&i->vfs_inode);
+}
+
+int sdcardfs_init_inode_cache(void)
+{
+	int err = 0;
+
+	sdcardfs_inode_cachep =
+		kmem_cache_create("sdcardfs_inode_cache",
+				  sizeof(struct sdcardfs_inode_info), 0,
+				  SLAB_RECLAIM_ACCOUNT, init_once);
+	if (!sdcardfs_inode_cachep)
+		err = -ENOMEM;
+	return err;
+}
+
+/* sdcardfs inode cache destructor */
+void sdcardfs_destroy_inode_cache(void)
+{
+	if (sdcardfs_inode_cachep)
+		kmem_cache_destroy(sdcardfs_inode_cachep);
+}
+
+/*
+ * Used only in nfs, to kill any pending RPC tasks, so that subsequent
+ * code can actually succeed and won't leave tasks that need handling.
+ */
+static void sdcardfs_umount_begin(struct super_block *sb)
+{
+	struct super_block *lower_sb;
+
+	lower_sb = sdcardfs_lower_super(sb);
+	if (lower_sb && lower_sb->s_op && lower_sb->s_op->umount_begin)
+		lower_sb->s_op->umount_begin(lower_sb);
+}
+
+static int sdcardfs_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(mnt->mnt_sb);
+	struct sdcardfs_mount_options *opts = &sbi->options;
+
+	if (opts->fs_low_uid != 0)
+		seq_printf(m, ",uid=%u", opts->fs_low_uid);
+	if (opts->fs_low_gid != 0)
+		seq_printf(m, ",gid=%u", opts->fs_low_gid);
+
+	if (opts->derive == DERIVE_NONE)
+		seq_printf(m, ",derive=none");
+	else if (opts->derive == DERIVE_LEGACY)
+		seq_printf(m, ",derive=legacy");
+	else if (opts->derive == DERIVE_UNIFIED)
+		seq_printf(m, ",derive=unified");
+
+	if (opts->reserved_mb != 0)
+		seq_printf(m, ",reserved=%uMB", opts->reserved_mb);
+
+	return 0;
+};
+
+const struct super_operations sdcardfs_sops = {
+	.put_super	= sdcardfs_put_super,
+	.statfs		= sdcardfs_statfs,
+	.remount_fs	= sdcardfs_remount_fs,
+	.evict_inode	= sdcardfs_evict_inode,
+	.umount_begin	= sdcardfs_umount_begin,
+	.show_options	= sdcardfs_show_options,
+	.alloc_inode	= sdcardfs_alloc_inode,
+	.destroy_inode	= sdcardfs_destroy_inode,
+	.drop_inode	= generic_delete_inode,
+};
diff --git a/include/linux/namei.h b/include/linux/namei.h
index d8c6334cd150..ef3b4f74eaf0 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -43,6 +43,9 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
 #define LOOKUP_JUMPED		0x1000
 #define LOOKUP_ROOT		0x2000
 #define LOOKUP_EMPTY		0x4000
+#ifdef CONFIG_SDCARD_FS_CI_SEARCH
+#define LOOKUP_CASE_INSENSITIVE 0x8000
+#endif
 
 extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, int *empty);
 
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index accb036bbc9c..cfb5c406f344 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -52,6 +52,8 @@
 #define REISER2FS_SUPER_MAGIC_STRING	"ReIsEr2Fs"
 #define REISER2FS_JR_SUPER_MAGIC_STRING	"ReIsEr3Fs"
 
+#define SDCARDFS_SUPER_MAGIC	0xb550ca10
+
 #define SMB_SUPER_MAGIC		0x517B
 #define CGROUP_SUPER_MAGIC	0x27e0eb
 

From d2d6d73bc37156b6402fdaef787e0be86b6d5901 Mon Sep 17 00:00:00 2001
From: Daniel Campello <campello@google.com>
Date: Mon, 20 Jul 2015 16:27:37 -0700
Subject: [PATCH 038/797] Port of sdcardfs to 4.4

Change-Id: I25b99ecf214e72ebf6a57ec3085972542a8d7951
Signed-off-by: Daniel Rosenberg <drosen@google.com>
---
 fs/sdcardfs/Kconfig       |   1 -
 fs/sdcardfs/dentry.c      |   9 +-
 fs/sdcardfs/file.c        |  47 ++++---
 fs/sdcardfs/hashtable.h   | 190 --------------------------
 fs/sdcardfs/inode.c       | 280 +++++++++++++++-----------------------
 fs/sdcardfs/lookup.c      |  25 ++--
 fs/sdcardfs/main.c        | 113 ++++++++-------
 fs/sdcardfs/mmap.c        |   5 +-
 fs/sdcardfs/packagelist.c |  39 +++---
 fs/sdcardfs/sdcardfs.h    |  41 +++---
 fs/sdcardfs/super.c       |   6 +-
 include/linux/namei.h     |   2 +
 12 files changed, 252 insertions(+), 506 deletions(-)
 delete mode 100644 fs/sdcardfs/hashtable.h

diff --git a/fs/sdcardfs/Kconfig b/fs/sdcardfs/Kconfig
index 657f4958e8d6..d995f3eaae6d 100644
--- a/fs/sdcardfs/Kconfig
+++ b/fs/sdcardfs/Kconfig
@@ -1,6 +1,5 @@
 config SDCARD_FS
 	tristate "sdcard file system"
-	depends on EXPERIMENTAL
 	default n
 	help
 	  Sdcardfs is based on Wrapfs file system.
diff --git a/fs/sdcardfs/dentry.c b/fs/sdcardfs/dentry.c
index 4572a5403bb2..dbbcfd091fc7 100644
--- a/fs/sdcardfs/dentry.c
+++ b/fs/sdcardfs/dentry.c
@@ -26,7 +26,7 @@
  *          0: tell VFS to invalidate dentry
  *          1: dentry is valid
  */
-static int sdcardfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int sdcardfs_d_revalidate(struct dentry *dentry, unsigned int flags)
 {
 	int err = 1;
 	struct path parent_lower_path, lower_path;
@@ -35,7 +35,7 @@ static int sdcardfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 	struct dentry *lower_cur_parent_dentry = NULL;
 	struct dentry *lower_dentry = NULL;
 
-	if (nd && nd->flags & LOOKUP_RCU)
+	if (flags & LOOKUP_RCU)
 		return -ECHILD;
 
 	spin_lock(&dentry->d_lock);
@@ -119,7 +119,7 @@ static void sdcardfs_d_release(struct dentry *dentry)
 }
 
 static int sdcardfs_hash_ci(const struct dentry *dentry,
-				const struct inode *inode, struct qstr *qstr)
+				struct qstr *qstr)
 {
 	/*
 	 * This function is copy of vfat_hashi.
@@ -148,8 +148,7 @@ static int sdcardfs_hash_ci(const struct dentry *dentry,
  * Case insensitive compare of two vfat names.
  */
 static int sdcardfs_cmp_ci(const struct dentry *parent,
-		const struct inode *pinode,
-		const struct dentry *dentry, const struct inode *inode,
+		const struct dentry *dentry,
 		unsigned int len, const char *str, const struct qstr *name)
 {
 	/* This function is copy of vfat_cmpi */
diff --git a/fs/sdcardfs/file.c b/fs/sdcardfs/file.c
index bcacb947c874..f9c5eaafc619 100644
--- a/fs/sdcardfs/file.c
+++ b/fs/sdcardfs/file.c
@@ -50,8 +50,8 @@ static ssize_t sdcardfs_read(struct file *file, char __user *buf,
 	err = vfs_read(lower_file, buf, count, ppos);
 	/* update our inode atime upon a successful lower read */
 	if (err >= 0)
-		fsstack_copy_attr_atime(dentry->d_inode,
-					lower_file->f_path.dentry->d_inode);
+		fsstack_copy_attr_atime(d_inode(dentry),
+					file_inode(lower_file));
 
 	return err;
 }
@@ -59,7 +59,7 @@ static ssize_t sdcardfs_read(struct file *file, char __user *buf,
 static ssize_t sdcardfs_write(struct file *file, const char __user *buf,
 			    size_t count, loff_t *ppos)
 {
-	int err = 0;
+	int err;
 	struct file *lower_file;
 	struct dentry *dentry = file->f_path.dentry;
 
@@ -73,29 +73,29 @@ static ssize_t sdcardfs_write(struct file *file, const char __user *buf,
 	err = vfs_write(lower_file, buf, count, ppos);
 	/* update our inode times+sizes upon a successful lower write */
 	if (err >= 0) {
-		fsstack_copy_inode_size(dentry->d_inode,
-					lower_file->f_path.dentry->d_inode);
-		fsstack_copy_attr_times(dentry->d_inode,
-					lower_file->f_path.dentry->d_inode);
+		fsstack_copy_inode_size(d_inode(dentry),
+					file_inode(lower_file));
+		fsstack_copy_attr_times(d_inode(dentry),
+					file_inode(lower_file));
 	}
 
 	return err;
 }
 
-static int sdcardfs_readdir(struct file *file, void *dirent, filldir_t filldir)
+static int sdcardfs_readdir(struct file *file, struct dir_context *ctx)
 {
-	int err = 0;
+	int err;
 	struct file *lower_file = NULL;
 	struct dentry *dentry = file->f_path.dentry;
 
 	lower_file = sdcardfs_lower_file(file);
 
 	lower_file->f_pos = file->f_pos;
-	err = vfs_readdir(lower_file, filldir, dirent);
+	err = iterate_dir(lower_file, ctx);
 	file->f_pos = lower_file->f_pos;
 	if (err >= 0)		/* copy the atime */
-		fsstack_copy_attr_atime(dentry->d_inode,
-					lower_file->f_path.dentry->d_inode);
+		fsstack_copy_attr_atime(d_inode(dentry),
+					file_inode(lower_file));
 	return err;
 }
 
@@ -191,7 +191,6 @@ static int sdcardfs_mmap(struct file *file, struct vm_area_struct *vma)
 	 */
 	file_accessed(file);
 	vma->vm_ops = &sdcardfs_vm_ops;
-	vma->vm_flags |= VM_CAN_NONLINEAR;
 
 	file->f_mapping->a_ops = &sdcardfs_aops; /* set our aops */
 	if (!SDCARDFS_F(file)->lower_vm_ops) /* save for our ->fault */
@@ -242,8 +241,8 @@ static int sdcardfs_open(struct inode *inode, struct file *file)
 
 	/* open lower object and link sdcardfs's file struct to lower's */
 	sdcardfs_get_lower_path(file->f_path.dentry, &lower_path);
-	lower_file = dentry_open(lower_path.dentry, lower_path.mnt,
-				 file->f_flags, current_cred());
+	lower_file = dentry_open(&lower_path, file->f_flags, current_cred());
+	path_put(&lower_path);
 	if (IS_ERR(lower_file)) {
 		err = PTR_ERR(lower_file);
 		lower_file = sdcardfs_lower_file(file);
@@ -275,8 +274,10 @@ static int sdcardfs_flush(struct file *file, fl_owner_t id)
 	struct file *lower_file = NULL;
 
 	lower_file = sdcardfs_lower_file(file);
-	if (lower_file && lower_file->f_op && lower_file->f_op->flush)
+	if (lower_file && lower_file->f_op && lower_file->f_op->flush) {
+		filemap_write_and_wait(file->f_mapping);
 		err = lower_file->f_op->flush(lower_file, id);
+	}
 
 	return err;
 }
@@ -296,19 +297,23 @@ static int sdcardfs_file_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static int
-sdcardfs_fsync(struct file *file, int datasync)
+static int sdcardfs_fsync(struct file *file, loff_t start, loff_t end,
+			int datasync)
 {
 	int err;
 	struct file *lower_file;
 	struct path lower_path;
 	struct dentry *dentry = file->f_path.dentry;
 
+	err = __generic_file_fsync(file, start, end, datasync);
+	if (err)
+		goto out;
+
 	lower_file = sdcardfs_lower_file(file);
 	sdcardfs_get_lower_path(dentry, &lower_path);
-	err = vfs_fsync(lower_file, datasync);
+	err = vfs_fsync_range(lower_file, start, end, datasync);
 	sdcardfs_put_lower_path(dentry, &lower_path);
-
+out:
 	return err;
 }
 
@@ -344,7 +349,7 @@ const struct file_operations sdcardfs_main_fops = {
 const struct file_operations sdcardfs_dir_fops = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
-	.readdir	= sdcardfs_readdir,
+	.iterate	= sdcardfs_readdir,
 	.unlocked_ioctl	= sdcardfs_unlocked_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= sdcardfs_compat_ioctl,
diff --git a/fs/sdcardfs/hashtable.h b/fs/sdcardfs/hashtable.h
deleted file mode 100644
index 1e770f3df148..000000000000
--- a/fs/sdcardfs/hashtable.h
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * Statically sized hash table implementation
- * (C) 2012  Sasha Levin <levinsasha928@gmail.com>
- */
-
-#ifndef _LINUX_HASHTABLE_H
-#define _LINUX_HASHTABLE_H
-
-#include <linux/list.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/hash.h>
-#include <linux/rculist.h>
-
-#define DEFINE_HASHTABLE(name, bits)                                            \
-        struct hlist_head name[1 << (bits)] =                                   \
-                        { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT }
-
-#define DECLARE_HASHTABLE(name, bits)                                           \
-        struct hlist_head name[1 << (bits)]
-
-#define HASH_SIZE(name) (ARRAY_SIZE(name))
-#define HASH_BITS(name) ilog2(HASH_SIZE(name))
-
-/* Use hash_32 when possible to allow for fast 32bit hashing in 64bit kernels. */
-#define hash_min(val, bits)                                                     \
-        (sizeof(val) <= 4 ? hash_32(val, bits) : hash_long(val, bits))
-
-static inline void __hash_init(struct hlist_head *ht, unsigned int sz)
-{
-        unsigned int i;
-
-        for (i = 0; i < sz; i++)
-                INIT_HLIST_HEAD(&ht[i]);
-}
-
-/**
- * hash_init - initialize a hash table
- * @hashtable: hashtable to be initialized
- *
- * Calculates the size of the hashtable from the given parameter, otherwise
- * same as hash_init_size.
- *
- * This has to be a macro since HASH_BITS() will not work on pointers since
- * it calculates the size during preprocessing.
- */
-#define hash_init(hashtable) __hash_init(hashtable, HASH_SIZE(hashtable))
-
-/**
- * hash_add - add an object to a hashtable
- * @hashtable: hashtable to add to
- * @node: the &struct hlist_node of the object to be added
- * @key: the key of the object to be added
- */
-#define hash_add(hashtable, node, key)                                          \
-        hlist_add_head(node, &hashtable[hash_min(key, HASH_BITS(hashtable))])
-
-/**
- * hash_add_rcu - add an object to a rcu enabled hashtable
- * @hashtable: hashtable to add to
- * @node: the &struct hlist_node of the object to be added
- * @key: the key of the object to be added
- */
-#define hash_add_rcu(hashtable, node, key)                                      \
-        hlist_add_head_rcu(node, &hashtable[hash_min(key, HASH_BITS(hashtable))])
-
-/**
- * hash_hashed - check whether an object is in any hashtable
- * @node: the &struct hlist_node of the object to be checked
- */
-static inline bool hash_hashed(struct hlist_node *node)
-{
-        return !hlist_unhashed(node);
-}
-
-static inline bool __hash_empty(struct hlist_head *ht, unsigned int sz)
-{
-        unsigned int i;
-
-        for (i = 0; i < sz; i++)
-                if (!hlist_empty(&ht[i]))
-                        return false;
-
-        return true;
-}
-
-/**
- * hash_empty - check whether a hashtable is empty
- * @hashtable: hashtable to check
- *
- * This has to be a macro since HASH_BITS() will not work on pointers since
- * it calculates the size during preprocessing.
- */
-#define hash_empty(hashtable) __hash_empty(hashtable, HASH_SIZE(hashtable))
-
-/**
- * hash_del - remove an object from a hashtable
- * @node: &struct hlist_node of the object to remove
- */
-static inline void hash_del(struct hlist_node *node)
-{
-        hlist_del_init(node);
-}
-
-/**
- * hash_del_rcu - remove an object from a rcu enabled hashtable
- * @node: &struct hlist_node of the object to remove
- */
-static inline void hash_del_rcu(struct hlist_node *node)
-{
-        hlist_del_init_rcu(node);
-}
-
-/**
- * hash_for_each - iterate over a hashtable
- * @name: hashtable to iterate
- * @bkt: integer to use as bucket loop cursor
- * @obj: the type * to use as a loop cursor for each entry
- * @member: the name of the hlist_node within the struct
- */
-#define hash_for_each(name, bkt, obj, member, pos)                           \
-        for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\
-                        (bkt)++)\
-                hlist_for_each_entry(obj, pos, &name[bkt], member)
-
-/**
- * hash_for_each_rcu - iterate over a rcu enabled hashtable
- * @name: hashtable to iterate
- * @bkt: integer to use as bucket loop cursor
- * @obj: the type * to use as a loop cursor for each entry
- * @member: the name of the hlist_node within the struct
- */
-#define hash_for_each_rcu(name, bkt, obj, member)                       \
-        for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\
-                        (bkt)++)\
-                hlist_for_each_entry_rcu(obj, &name[bkt], member)
-
-/**
- * hash_for_each_safe - iterate over a hashtable safe against removal of
- * hash entry
- * @name: hashtable to iterate
- * @bkt: integer to use as bucket loop cursor
- * @tmp: a &struct used for temporary storage
- * @obj: the type * to use as a loop cursor for each entry
- * @member: the name of the hlist_node within the struct
- */
-#define hash_for_each_safe(name, bkt, tmp, obj, member, pos)                 \
-        for ((bkt) = 0, obj = NULL; (bkt) < HASH_SIZE(name);\
-                        (bkt)++)\
-                hlist_for_each_entry_safe(obj, pos, tmp, &name[bkt], member)
-
-/**
- * hash_for_each_possible - iterate over all possible objects hashing to the
- * same bucket
- * @name: hashtable to iterate
- * @obj: the type * to use as a loop cursor for each entry
- * @member: the name of the hlist_node within the struct
- * @key: the key of the objects to iterate over
- */
-#define hash_for_each_possible(name, obj, member, key, pos)                  \
-        hlist_for_each_entry(obj, pos, &name[hash_min(key, HASH_BITS(name))], member)
-
-/**
- * hash_for_each_possible_rcu - iterate over all possible objects hashing to the
- * same bucket in an rcu enabled hashtable
- * in a rcu enabled hashtable
- * @name: hashtable to iterate
- * @obj: the type * to use as a loop cursor for each entry
- * @member: the name of the hlist_node within the struct
- * @key: the key of the objects to iterate over
- */
-#define hash_for_each_possible_rcu(name, obj, member, key)              \
-        hlist_for_each_entry_rcu(obj, &name[hash_min(key, HASH_BITS(name))],\
-                member)
-
-/**
- * hash_for_each_possible_safe - iterate over all possible objects hashing to the
- * same bucket safe against removals
- * @name: hashtable to iterate
- * @obj: the type * to use as a loop cursor for each entry
- * @tmp: a &struct used for temporary storage
- * @member: the name of the hlist_node within the struct
- * @key: the key of the objects to iterate over
- */
-#define hash_for_each_possible_safe(name, obj, tmp, member, key)        \
-        hlist_for_each_entry_safe(obj, tmp,\
-                &name[hash_min(key, HASH_BITS(name))], member)
-
-
-#endif
diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c
index e8ed04250ed1..75c622bac2f5 100644
--- a/fs/sdcardfs/inode.c
+++ b/fs/sdcardfs/inode.c
@@ -30,8 +30,8 @@ const struct cred * override_fsids(struct sdcardfs_sb_info* sbi)
 	if (!cred)
 		return NULL;
 
-	cred->fsuid = sbi->options.fs_low_uid;
-	cred->fsgid = sbi->options.fs_low_gid;
+	cred->fsuid = make_kuid(&init_user_ns, sbi->options.fs_low_uid);
+	cred->fsgid = make_kgid(&init_user_ns, sbi->options.fs_low_gid);
 
 	old_cred = override_creds(cred);
 
@@ -49,12 +49,12 @@ void revert_fsids(const struct cred * old_cred)
 }
 
 static int sdcardfs_create(struct inode *dir, struct dentry *dentry,
-			 int mode, struct nameidata *nd)
+			 umode_t mode, bool want_excl)
 {
-	int err = 0;
+	int err;
 	struct dentry *lower_dentry;
 	struct dentry *lower_parent_dentry = NULL;
-	struct path lower_path, saved_path;
+	struct path lower_path;
 	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
 	const struct cred *saved_cred = NULL;
 
@@ -74,18 +74,9 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry,
 	lower_dentry = lower_path.dentry;
 	lower_parent_dentry = lock_parent(lower_dentry);
 
-	err = mnt_want_write(lower_path.mnt);
-	if (err)
-		goto out_unlock;
-
-	pathcpy(&saved_path, &nd->path);
-	pathcpy(&nd->path, &lower_path);
-
 	/* set last 16bytes of mode field to 0664 */
 	mode = (mode & S_IFMT) | 00664;
-	err = vfs_create(lower_parent_dentry->d_inode, lower_dentry, mode, nd);
-
-	pathcpy(&nd->path, &saved_path);
+	err = vfs_create(d_inode(lower_parent_dentry), lower_dentry, mode, want_excl);
 	if (err)
 		goto out;
 
@@ -93,11 +84,9 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry,
 	if (err)
 		goto out;
 	fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir));
-	fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode);
+	fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry));
 
 out:
-	mnt_drop_write(lower_path.mnt);
-out_unlock:
 	unlock_dir(lower_parent_dentry);
 	sdcardfs_put_lower_path(dentry, &lower_path);
 	REVERT_CRED(saved_cred);
@@ -118,33 +107,27 @@ static int sdcardfs_link(struct dentry *old_dentry, struct inode *dir,
 
 	OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb));
 
-	file_size_save = i_size_read(old_dentry->d_inode);
+	file_size_save = i_size_read(d_inode(old_dentry));
 	sdcardfs_get_lower_path(old_dentry, &lower_old_path);
 	sdcardfs_get_lower_path(new_dentry, &lower_new_path);
 	lower_old_dentry = lower_old_path.dentry;
 	lower_new_dentry = lower_new_path.dentry;
 	lower_dir_dentry = lock_parent(lower_new_dentry);
 
-	err = mnt_want_write(lower_new_path.mnt);
-	if (err)
-		goto out_unlock;
-
-	err = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode,
-		       lower_new_dentry);
-	if (err || !lower_new_dentry->d_inode)
+	err = vfs_link(lower_old_dentry, d_inode(lower_dir_dentry),
+		       lower_new_dentry, NULL);
+	if (err || !d_inode(lower_new_dentry))
 		goto out;
 
 	err = sdcardfs_interpose(new_dentry, dir->i_sb, &lower_new_path);
 	if (err)
 		goto out;
-	fsstack_copy_attr_times(dir, lower_new_dentry->d_inode);
-	fsstack_copy_inode_size(dir, lower_new_dentry->d_inode);
-	old_dentry->d_inode->i_nlink =
-		  sdcardfs_lower_inode(old_dentry->d_inode)->i_nlink;
-	i_size_write(new_dentry->d_inode, file_size_save);
+	fsstack_copy_attr_times(dir, d_inode(lower_new_dentry));
+	fsstack_copy_inode_size(dir, d_inode(lower_new_dentry));
+	set_nlink(d_inode(old_dentry),
+		  sdcardfs_lower_inode(d_inode(old_dentry))->i_nlink);
+	i_size_write(d_inode(new_dentry), file_size_save);
 out:
-	mnt_drop_write(lower_new_path.mnt);
-out_unlock:
 	unlock_dir(lower_dir_dentry);
 	sdcardfs_put_lower_path(old_dentry, &lower_old_path);
 	sdcardfs_put_lower_path(new_dentry, &lower_new_path);
@@ -180,10 +163,7 @@ static int sdcardfs_unlink(struct inode *dir, struct dentry *dentry)
 	dget(lower_dentry);
 	lower_dir_dentry = lock_parent(lower_dentry);
 
-	err = mnt_want_write(lower_path.mnt);
-	if (err)
-		goto out_unlock;
-	err = vfs_unlink(lower_dir_inode, lower_dentry);
+	err = vfs_unlink(lower_dir_inode, lower_dentry, NULL);
 
 	/*
 	 * Note: unlinking on top of NFS can cause silly-renamed files.
@@ -198,13 +178,11 @@ static int sdcardfs_unlink(struct inode *dir, struct dentry *dentry)
 		goto out;
 	fsstack_copy_attr_times(dir, lower_dir_inode);
 	fsstack_copy_inode_size(dir, lower_dir_inode);
-	dentry->d_inode->i_nlink =
-		  sdcardfs_lower_inode(dentry->d_inode)->i_nlink;
-	dentry->d_inode->i_ctime = dir->i_ctime;
+	set_nlink(d_inode(dentry),
+		  sdcardfs_lower_inode(d_inode(dentry))->i_nlink);
+	d_inode(dentry)->i_ctime = dir->i_ctime;
 	d_drop(dentry); /* this is needed, else LTP fails (VFS won't do it) */
 out:
-	mnt_drop_write(lower_path.mnt);
-out_unlock:
 	unlock_dir(lower_dir_dentry);
 	dput(lower_dentry);
 	sdcardfs_put_lower_path(dentry, &lower_path);
@@ -217,7 +195,7 @@ static int sdcardfs_unlink(struct inode *dir, struct dentry *dentry)
 static int sdcardfs_symlink(struct inode *dir, struct dentry *dentry,
 			  const char *symname)
 {
-	int err = 0;
+	int err;
 	struct dentry *lower_dentry;
 	struct dentry *lower_parent_dentry = NULL;
 	struct path lower_path;
@@ -228,21 +206,16 @@ static int sdcardfs_symlink(struct inode *dir, struct dentry *dentry,
 	lower_dentry = lower_path.dentry;
 	lower_parent_dentry = lock_parent(lower_dentry);
 
-	err = mnt_want_write(lower_path.mnt);
-	if (err)
-		goto out_unlock;
-	err = vfs_symlink(lower_parent_dentry->d_inode, lower_dentry, symname);
+	err = vfs_symlink(d_inode(lower_parent_dentry), lower_dentry, symname);
 	if (err)
 		goto out;
 	err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path);
 	if (err)
 		goto out;
 	fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir));
-	fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode);
+	fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry));
 
 out:
-	mnt_drop_write(lower_path.mnt);
-out_unlock:
 	unlock_dir(lower_parent_dentry);
 	sdcardfs_put_lower_path(dentry, &lower_path);
 	REVERT_CRED();
@@ -266,9 +239,9 @@ static int touch(char *abs_path, mode_t mode) {
 	return 0;
 }
 
-static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
-	int err = 0;
+	int err;
 	int make_nomedia_in_obb = 0;
 	struct dentry *lower_dentry;
 	struct dentry *lower_parent_dentry = NULL;
@@ -306,13 +279,9 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	lower_dentry = lower_path.dentry;
 	lower_parent_dentry = lock_parent(lower_dentry);
 
-	err = mnt_want_write(lower_path.mnt);
-	if (err)
-		goto out_unlock;
-
 	/* set last 16bytes of mode field to 0775 */
 	mode = (mode & S_IFMT) | 00775;
-	err = vfs_mkdir(lower_parent_dentry->d_inode, lower_dentry, mode);
+	err = vfs_mkdir(d_inode(lower_parent_dentry), lower_dentry, mode);
 
 	if (err)
 		goto out;
@@ -341,9 +310,9 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 		goto out;
 
 	fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir));
-	fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode);
+	fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry));
 	/* update number of links on parent directory */
-	dir->i_nlink = sdcardfs_lower_inode(dir)->i_nlink;
+	set_nlink(dir, sdcardfs_lower_inode(dir)->i_nlink);
 
 	if ((sbi->options.derive == DERIVE_UNIFIED) && (!strcasecmp(dentry->d_name.name, "obb"))
 		&& (pi->perm == PERM_ANDROID) && (pi->userid == 0))
@@ -388,8 +357,6 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 		kfree(nomedia_fullpath);
 	}
 out:
-	mnt_drop_write(lower_path.mnt);
-out_unlock:
 	unlock_dir(lower_parent_dentry);
 	sdcardfs_put_lower_path(dentry, &lower_path);
 out_revert:
@@ -427,23 +394,18 @@ static int sdcardfs_rmdir(struct inode *dir, struct dentry *dentry)
 	lower_dentry = lower_path.dentry;
 	lower_dir_dentry = lock_parent(lower_dentry);
 
-	err = mnt_want_write(lower_path.mnt);
-	if (err)
-		goto out_unlock;
-	err = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry);
+	err = vfs_rmdir(d_inode(lower_dir_dentry), lower_dentry);
 	if (err)
 		goto out;
 
 	d_drop(dentry);	/* drop our dentry on success (why not VFS's job?) */
-	if (dentry->d_inode)
-		clear_nlink(dentry->d_inode);
-	fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
-	fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode);
-	dir->i_nlink = lower_dir_dentry->d_inode->i_nlink;
+	if (d_inode(dentry))
+		clear_nlink(d_inode(dentry));
+	fsstack_copy_attr_times(dir, d_inode(lower_dir_dentry));
+	fsstack_copy_inode_size(dir, d_inode(lower_dir_dentry));
+	set_nlink(dir, d_inode(lower_dir_dentry)->i_nlink);
 
 out:
-	mnt_drop_write(lower_path.mnt);
-out_unlock:
 	unlock_dir(lower_dir_dentry);
 	sdcardfs_put_real_lower(dentry, &lower_path);
 	REVERT_CRED(saved_cred);
@@ -452,10 +414,10 @@ static int sdcardfs_rmdir(struct inode *dir, struct dentry *dentry)
 }
 
 #if 0
-static int sdcardfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
+static int sdcardfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
 			dev_t dev)
 {
-	int err = 0;
+	int err;
 	struct dentry *lower_dentry;
 	struct dentry *lower_parent_dentry = NULL;
 	struct path lower_path;
@@ -466,10 +428,7 @@ static int sdcardfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
 	lower_dentry = lower_path.dentry;
 	lower_parent_dentry = lock_parent(lower_dentry);
 
-	err = mnt_want_write(lower_path.mnt);
-	if (err)
-		goto out_unlock;
-	err = vfs_mknod(lower_parent_dentry->d_inode, lower_dentry, mode, dev);
+	err = vfs_mknod(d_inode(lower_parent_dentry), lower_dentry, mode, dev);
 	if (err)
 		goto out;
 
@@ -477,11 +436,9 @@ static int sdcardfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
 	if (err)
 		goto out;
 	fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir));
-	fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode);
+	fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry));
 
 out:
-	mnt_drop_write(lower_path.mnt);
-out_unlock:
 	unlock_dir(lower_parent_dentry);
 	sdcardfs_put_lower_path(dentry, &lower_path);
 	REVERT_CRED();
@@ -541,43 +498,33 @@ static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		goto out;
 	}
 
-	err = mnt_want_write(lower_old_path.mnt);
+	err = vfs_rename(d_inode(lower_old_dir_dentry), lower_old_dentry,
+			 d_inode(lower_new_dir_dentry), lower_new_dentry,
+			 NULL, 0);
 	if (err)
 		goto out;
-	err = mnt_want_write(lower_new_path.mnt);
-	if (err)
-		goto out_drop_old_write;
-
-	err = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
-			 lower_new_dir_dentry->d_inode, lower_new_dentry);
-	if (err)
-		goto out_err;
 
 	/* Copy attrs from lower dir, but i_uid/i_gid */
-	fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
-	fsstack_copy_inode_size(new_dir, lower_new_dir_dentry->d_inode);
+	fsstack_copy_attr_all(new_dir, d_inode(lower_new_dir_dentry));
+	fsstack_copy_inode_size(new_dir, d_inode(lower_new_dir_dentry));
 	fix_derived_permission(new_dir);
 	if (new_dir != old_dir) {
-		fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
-		fsstack_copy_inode_size(old_dir, lower_old_dir_dentry->d_inode);
+		fsstack_copy_attr_all(old_dir, d_inode(lower_old_dir_dentry));
+		fsstack_copy_inode_size(old_dir, d_inode(lower_old_dir_dentry));
 		fix_derived_permission(old_dir);
 		/* update the derived permission of the old_dentry
 		 * with its new parent
 		 */
 		new_parent = dget_parent(new_dentry);
 		if(new_parent) {
-			if(old_dentry->d_inode) {
+			if(d_inode(old_dentry)) {
 				get_derived_permission(new_parent, old_dentry);
-				fix_derived_permission(old_dentry->d_inode);
+				fix_derived_permission(d_inode(old_dentry));
 			}
 			dput(new_parent);
 		}
 	}
 
-out_err:
-	mnt_drop_write(lower_new_path.mnt);
-out_drop_old_write:
-	mnt_drop_write(lower_old_path.mnt);
 out:
 	unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
 	dput(lower_old_dir_dentry);
@@ -599,17 +546,17 @@ static int sdcardfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz
 
 	sdcardfs_get_lower_path(dentry, &lower_path);
 	lower_dentry = lower_path.dentry;
-	if (!lower_dentry->d_inode->i_op ||
-	    !lower_dentry->d_inode->i_op->readlink) {
+	if (!d_inode(lower_dentry)->i_op ||
+	    !d_inode(lower_dentry)->i_op->readlink) {
 		err = -EINVAL;
 		goto out;
 	}
 
-	err = lower_dentry->d_inode->i_op->readlink(lower_dentry,
+	err = d_inode(lower_dentry)->i_op->readlink(lower_dentry,
 						    buf, bufsiz);
 	if (err < 0)
 		goto out;
-	fsstack_copy_attr_atime(dentry->d_inode, lower_dentry->d_inode);
+	fsstack_copy_attr_atime(d_inode(dentry), d_inode(lower_dentry));
 
 out:
 	sdcardfs_put_lower_path(dentry, &lower_path);
@@ -618,7 +565,7 @@ static int sdcardfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz
 #endif
 
 #if 0
-static void *sdcardfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *sdcardfs_follow_link(struct dentry *dentry, void **cookie)
 {
 	char *buf;
 	int len = PAGE_SIZE, err;
@@ -628,7 +575,7 @@ static void *sdcardfs_follow_link(struct dentry *dentry, struct nameidata *nd)
 	buf = kmalloc(len, GFP_KERNEL);
 	if (!buf) {
 		buf = ERR_PTR(-ENOMEM);
-		goto out;
+		return buf;
 	}
 
 	/* read the symlink, and then we will follow it */
@@ -642,35 +589,19 @@ static void *sdcardfs_follow_link(struct dentry *dentry, struct nameidata *nd)
 	} else {
 		buf[err] = '\0';
 	}
-out:
-	nd_set_link(nd, buf);
-	return NULL;
+	return *cookie = buf;
 }
 #endif
 
-#if 0
-/* this @nd *IS* still used */
-static void sdcardfs_put_link(struct dentry *dentry, struct nameidata *nd,
-			    void *cookie)
-{
-	char *buf = nd_get_link(nd);
-	if (!IS_ERR(buf))	/* free the char* */
-		kfree(buf);
-}
-#endif
-
-static int sdcardfs_permission(struct inode *inode, int mask, unsigned int flags)
+static int sdcardfs_permission(struct inode *inode, int mask)
 {
 	int err;
 
-	if (flags & IPERM_FLAG_RCU)
-		return -ECHILD;
-
 	/*
 	 * Permission check on sdcardfs inode.
 	 * Calling process should have AID_SDCARD_RW permission
 	 */
-	err = generic_permission(inode, mask, 0, inode->i_op->check_acl);
+	err = generic_permission(inode, mask);
 
 	/* XXX
 	 * Original sdcardfs code calls inode_permission(lower_inode,.. )
@@ -700,49 +631,9 @@ static int sdcardfs_permission(struct inode *inode, int mask, unsigned int flags
 
 }
 
-static int sdcardfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		 struct kstat *stat)
-{
-	struct dentry *lower_dentry;
-	struct inode *inode;
-	struct inode *lower_inode;
-	struct path lower_path;
-	struct dentry *parent;
-	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
-
-	parent = dget_parent(dentry);
-	if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name,
-						sbi->options.derive, 0, 0)) {
-		printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n"
-						 "  dentry: %s, task:%s\n",
-						 __func__, dentry->d_name.name, current->comm);
-		dput(parent);
-		return -EACCES;
-	}
-	dput(parent);
-
-	inode = dentry->d_inode;
-
-	sdcardfs_get_lower_path(dentry, &lower_path);
-	lower_dentry = lower_path.dentry;
-	lower_inode = sdcardfs_lower_inode(inode);
-
-	fsstack_copy_attr_all(inode, lower_inode);
-	fsstack_copy_inode_size(inode, lower_inode);
-	/* if the dentry has been moved from other location
-	 * so, on this stage, its derived permission must be
-	 * rechecked from its private field.
-	 */
-	fix_derived_permission(inode);
-
-	generic_fillattr(inode, stat);
-	sdcardfs_put_lower_path(dentry, &lower_path);
-	return 0;
-}
-
 static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia)
 {
-	int err = 0;
+	int err;
 	struct dentry *lower_dentry;
 	struct inode *inode;
 	struct inode *lower_inode;
@@ -752,7 +643,7 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia)
 	struct dentry *parent;
 	int has_rw;
 
-	inode = dentry->d_inode;
+	inode = d_inode(dentry);
 
 	/*
 	 * Check if user has permission to change inode.  We don't check if
@@ -766,7 +657,7 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia)
 		/* check the Android group ID */
 		has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive);
 		parent = dget_parent(dentry);
-		if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name,
+		if(!check_caller_access_to_name(d_inode(parent), dentry->d_name.name,
 						sbi->options.derive, 1, has_rw)) {
 			printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n"
 							 "  dentry: %s, task:%s\n",
@@ -819,13 +710,14 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia)
 
 	/* notify the (possibly copied-up) lower inode */
 	/*
-	 * Note: we use lower_dentry->d_inode, because lower_inode may be
+	 * Note: we use d_inode(lower_dentry), because lower_inode may be
 	 * unlinked (no inode->i_sb and i_ino==0.  This happens if someone
 	 * tries to open(), unlink(), then ftruncate() a file.
 	 */
-	mutex_lock(&lower_dentry->d_inode->i_mutex);
-	err = notify_change(lower_dentry, &lower_ia); /* note: lower_ia */
-	mutex_unlock(&lower_dentry->d_inode->i_mutex);
+	mutex_lock(&d_inode(lower_dentry)->i_mutex);
+	err = notify_change(lower_dentry, &lower_ia, /* note: lower_ia */
+			NULL);
+	mutex_unlock(&d_inode(lower_dentry)->i_mutex);
 	if (current->mm)
 		up_write(&current->mm->mmap_sem);
 	if (err)
@@ -848,6 +740,46 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia)
 	return err;
 }
 
+static int sdcardfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
+		 struct kstat *stat)
+{
+	struct dentry *lower_dentry;
+	struct inode *inode;
+	struct inode *lower_inode;
+	struct path lower_path;
+	struct dentry *parent;
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+
+	parent = dget_parent(dentry);
+	if(!check_caller_access_to_name(d_inode(parent), dentry->d_name.name,
+						sbi->options.derive, 0, 0)) {
+		printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n"
+						 "  dentry: %s, task:%s\n",
+						 __func__, dentry->d_name.name, current->comm);
+		dput(parent);
+		return -EACCES;
+	}
+	dput(parent);
+
+	inode = d_inode(dentry);
+
+	sdcardfs_get_lower_path(dentry, &lower_path);
+	lower_dentry = lower_path.dentry;
+	lower_inode = sdcardfs_lower_inode(inode);
+
+	fsstack_copy_attr_all(inode, lower_inode);
+	fsstack_copy_inode_size(inode, lower_inode);
+	/* if the dentry has been moved from other location
+	 * so, on this stage, its derived permission must be
+	 * rechecked from its private field.
+	 */
+	fix_derived_permission(inode);
+
+	generic_fillattr(inode, stat);
+	sdcardfs_put_lower_path(dentry, &lower_path);
+	return 0;
+}
+
 const struct inode_operations sdcardfs_symlink_iops = {
 	.permission	= sdcardfs_permission,
 	.setattr	= sdcardfs_setattr,
@@ -856,14 +788,16 @@ const struct inode_operations sdcardfs_symlink_iops = {
 	 *     These methods are *NOT* perfectly tested.
 	.readlink	= sdcardfs_readlink,
 	.follow_link	= sdcardfs_follow_link,
-	.put_link	= sdcardfs_put_link,
+	.put_link	= kfree_put_link,
 	 */
 };
 
 const struct inode_operations sdcardfs_dir_iops = {
 	.create		= sdcardfs_create,
 	.lookup		= sdcardfs_lookup,
+#if 0
 	.permission	= sdcardfs_permission,
+#endif
 	.unlink		= sdcardfs_unlink,
 	.mkdir		= sdcardfs_mkdir,
 	.rmdir		= sdcardfs_rmdir,
diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c
index c0b12375b1bf..a4b94df99f32 100644
--- a/fs/sdcardfs/lookup.c
+++ b/fs/sdcardfs/lookup.c
@@ -79,8 +79,7 @@ static int sdcardfs_inode_set(struct inode *inode, void *lower_inode)
 	return 0;
 }
 
-static struct inode *sdcardfs_iget(struct super_block *sb,
-				 struct inode *lower_inode)
+struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode)
 {
 	struct sdcardfs_inode_info *info;
 	struct inode *inode; /* the new inode to return */
@@ -206,14 +205,13 @@ int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb,
  * Fills in lower_parent_path with <dentry,mnt> on success.
  */
 static struct dentry *__sdcardfs_lookup(struct dentry *dentry,
-		struct nameidata *nd, struct path *lower_parent_path)
+		unsigned int flags, struct path *lower_parent_path)
 {
 	int err = 0;
 	struct vfsmount *lower_dir_mnt;
 	struct dentry *lower_dir_dentry = NULL;
 	struct dentry *lower_dentry;
 	const char *name;
-	struct nameidata lower_nd;
 	struct path lower_path;
 	struct qstr this;
 	struct sdcardfs_sb_info *sbi;
@@ -234,10 +232,10 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry,
 	/* Use vfs_path_lookup to check if the dentry exists or not */
 	if (sbi->options.lower_fs == LOWER_FS_EXT4) {
 		err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name,
-				LOOKUP_CASE_INSENSITIVE, &lower_nd);
+				LOOKUP_CASE_INSENSITIVE, &lower_path);
 	} else if (sbi->options.lower_fs == LOWER_FS_FAT) {
 		err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name, 0,
-				&lower_nd);
+				&lower_path);
 	}
 
 	/* no error: handle positive dentries */
@@ -253,7 +251,7 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry,
 			 * and the base obbpath will be copyed to the lower_path variable.
 			 * if an error returned, there's no change in the lower_path
 			 * 		returns: -ERRNO if error (0: no error) */
-			err = setup_obb_dentry(dentry, &lower_nd.path);
+			err = setup_obb_dentry(dentry, &lower_path);
 
 			if(err) {
 				/* if the sbi->obbpath is not available, we can optionally
@@ -267,8 +265,8 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry,
 			}
 		}
 
-		sdcardfs_set_lower_path(dentry, &lower_nd.path);
-		err = sdcardfs_interpose(dentry, dentry->d_sb, &lower_nd.path);
+		sdcardfs_set_lower_path(dentry, &lower_path);
+		err = sdcardfs_interpose(dentry, dentry->d_sb, &lower_path);
 		if (err) /* path_put underlying path on error */
 			sdcardfs_put_reset_lower_path(dentry);
 		goto out;
@@ -306,10 +304,7 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry,
 	 * the VFS will continue the process of making this negative dentry
 	 * into a positive one.
 	 */
-	if (nd) {
-		if (nd->flags & (LOOKUP_CREATE|LOOKUP_RENAME_TARGET))
-			err = 0;
-	} else
+	if (flags & (LOOKUP_CREATE|LOOKUP_RENAME_TARGET))
 		err = 0;
 
 out:
@@ -328,7 +323,7 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry,
  * @nd : nameidata of parent inode
  */
 struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry,
-			     struct nameidata *nd)
+			     unsigned int flags)
 {
 	struct dentry *ret = NULL, *parent;
 	struct path lower_parent_path;
@@ -359,7 +354,7 @@ struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry,
 		goto out;
 	}
 
-	ret = __sdcardfs_lookup(dentry, nd, &lower_parent_path);
+	ret = __sdcardfs_lookup(dentry, flags, &lower_parent_path);
 	if (IS_ERR(ret))
 	{
 		goto out;
diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c
index 1fdceffec72c..9d04ae8ceb46 100644
--- a/fs/sdcardfs/main.c
+++ b/fs/sdcardfs/main.c
@@ -156,6 +156,7 @@ static int parse_options(struct super_block *sb, char *options, int silent,
 	return 0;
 }
 
+#if 0
 /*
  * our custom d_alloc_root work-alike
  *
@@ -181,6 +182,7 @@ static struct dentry *sdcardfs_d_alloc_root(struct super_block *sb)
 	}
 	return ret;
 }
+#endif
 
 /*
  * There is no need to lock the sdcardfs_super_info's rwsem as there is no
@@ -195,6 +197,7 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name,
 	struct path lower_path;
 	struct sdcardfs_sb_info *sb_info;
 	void *pkgl_id;
+	struct inode *inode;
 
 	printk(KERN_INFO "sdcardfs version 2.0\n");
 
@@ -259,12 +262,18 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name,
 	sb->s_magic = SDCARDFS_SUPER_MAGIC;
 	sb->s_op = &sdcardfs_sops;
 
-	/* see comment next to the definition of sdcardfs_d_alloc_root */
-	sb->s_root = sdcardfs_d_alloc_root(sb);
-	if (!sb->s_root) {
-		err = -ENOMEM;
+	/* get a new inode and allocate our root dentry */
+	inode = sdcardfs_iget(sb, lower_path.dentry->d_inode);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
 		goto out_sput;
 	}
+	sb->s_root = d_make_root(inode);
+	if (!sb->s_root) {
+		err = -ENOMEM;
+		goto out_iput;
+	}
+	d_set_d_op(sb->s_root, &sdcardfs_ci_dops);
 
 	/* link the upper and lower dentries */
 	sb->s_root->d_fsdata = NULL;
@@ -275,56 +284,60 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name,
 	/* set the lower dentries for s_root */
 	sdcardfs_set_lower_path(sb->s_root, &lower_path);
 
-	/* call interpose to create the upper level inode */
-	err = sdcardfs_interpose(sb->s_root, sb, &lower_path);
-	if (!err) {
-		/* setup permission policy */
-		switch(sb_info->options.derive) {
-			case DERIVE_NONE:
-				setup_derived_state(sb->s_root->d_inode,
+	/*
+	 * No need to call interpose because we already have a positive
+	 * dentry, which was instantiated by d_make_root.  Just need to
+	 * d_rehash it.
+	 */
+	d_rehash(sb->s_root);
+
+	/* setup permission policy */
+	switch(sb_info->options.derive) {
+		case DERIVE_NONE:
+			setup_derived_state(sb->s_root->d_inode,
 					PERM_ROOT, 0, AID_ROOT, AID_SDCARD_RW, 00775);
-				sb_info->obbpath_s = NULL;
-				break;
-			case DERIVE_LEGACY:
-				/* Legacy behavior used to support internal multiuser layout which
-				 * places user_id at the top directory level, with the actual roots
-				 * just below that. Shared OBB path is also at top level. */
-				setup_derived_state(sb->s_root->d_inode,
-				        PERM_LEGACY_PRE_ROOT, 0, AID_ROOT, AID_SDCARD_R, 00771);
-				/* initialize the obbpath string and lookup the path
-				 * sb_info->obb_path will be deactivated by path_put
-				 * on sdcardfs_put_super */
-				sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL);
-				snprintf(sb_info->obbpath_s, PATH_MAX, "%s/obb", dev_name);
-				err =  prepare_dir(sb_info->obbpath_s,
-							sb_info->options.fs_low_uid,
-							sb_info->options.fs_low_gid, 00755);
-				if(err)
-					printk(KERN_ERR "sdcardfs: %s: %d, error on creating %s\n",
-							__func__,__LINE__, sb_info->obbpath_s);
-				break;
-			case DERIVE_UNIFIED:
-				/* Unified multiuser layout which places secondary user_id under
-				 * /Android/user and shared OBB path under /Android/obb. */
-				setup_derived_state(sb->s_root->d_inode,
-						PERM_ROOT, 0, AID_ROOT, AID_SDCARD_R, 00771);
+			sb_info->obbpath_s = NULL;
+			break;
+		case DERIVE_LEGACY:
+			/* Legacy behavior used to support internal multiuser layout which
+			 * places user_id at the top directory level, with the actual roots
+			 * just below that. Shared OBB path is also at top level. */
+			setup_derived_state(sb->s_root->d_inode,
+					PERM_LEGACY_PRE_ROOT, 0, AID_ROOT, AID_SDCARD_R, 00771);
+			/* initialize the obbpath string and lookup the path
+			 * sb_info->obb_path will be deactivated by path_put
+			 * on sdcardfs_put_super */
+			sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL);
+			snprintf(sb_info->obbpath_s, PATH_MAX, "%s/obb", dev_name);
+			err =  prepare_dir(sb_info->obbpath_s,
+					sb_info->options.fs_low_uid,
+					sb_info->options.fs_low_gid, 00755);
+			if(err)
+				printk(KERN_ERR "sdcardfs: %s: %d, error on creating %s\n",
+						__func__,__LINE__, sb_info->obbpath_s);
+			break;
+		case DERIVE_UNIFIED:
+			/* Unified multiuser layout which places secondary user_id under
+			 * /Android/user and shared OBB path under /Android/obb. */
+			setup_derived_state(sb->s_root->d_inode,
+					PERM_ROOT, 0, AID_ROOT, AID_SDCARD_R, 00771);
 
-				sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL);
-				snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name);
-				break;
-		}
-		fix_derived_permission(sb->s_root->d_inode);
-
-		if (!silent)
-			printk(KERN_INFO "sdcardfs: mounted on top of %s type %s\n",
-						dev_name, lower_sb->s_type->name);
-		goto out;
+			sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL);
+			snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name);
+			break;
 	}
-	/* else error: fall through */
+	fix_derived_permission(sb->s_root->d_inode);
 
-	free_dentry_private_data(sb->s_root);
+	if (!silent)
+		printk(KERN_INFO "sdcardfs: mounted on top of %s type %s\n",
+				dev_name, lower_sb->s_type->name);
+	goto out; /* all is well */
+
+	/* no longer needed: free_dentry_private_data(sb->s_root); */
 out_freeroot:
 	dput(sb->s_root);
+out_iput:
+	iput(inode);
 out_sput:
 	/* drop refs we took earlier */
 	atomic_dec(&lower_sb->s_active);
@@ -346,7 +359,7 @@ static struct dentry *mount_nodev_with_options(struct file_system_type *fs_type,
 
 {
 	int error;
-	struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
+	struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL);
 
 	if (IS_ERR(s))
 		return ERR_CAST(s);
@@ -378,7 +391,7 @@ static struct file_system_type sdcardfs_fs_type = {
 	.name		= SDCARDFS_NAME,
 	.mount		= sdcardfs_mount,
 	.kill_sb	= generic_shutdown_super,
-	.fs_flags	= FS_REVAL_DOT,
+	.fs_flags	= 0,
 };
 
 static int __init init_sdcardfs_fs(void)
diff --git a/fs/sdcardfs/mmap.c b/fs/sdcardfs/mmap.c
index c807d7f18f8b..e21f64675a80 100644
--- a/fs/sdcardfs/mmap.c
+++ b/fs/sdcardfs/mmap.c
@@ -48,9 +48,8 @@ static int sdcardfs_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return err;
 }
 
-static ssize_t sdcardfs_direct_IO(int rw, struct kiocb *iocb,
-			      const struct iovec *iov, loff_t offset,
-			      unsigned long nr_segs)
+static ssize_t sdcardfs_direct_IO(struct kiocb *iocb,
+		struct iov_iter *iter, loff_t pos)
 {
 	/*
      * This function returns zero on purpose in order to support direct IO.
diff --git a/fs/sdcardfs/packagelist.c b/fs/sdcardfs/packagelist.c
index c786d8f92203..d7ba8d4a423e 100644
--- a/fs/sdcardfs/packagelist.c
+++ b/fs/sdcardfs/packagelist.c
@@ -20,7 +20,7 @@
 
 #include "sdcardfs.h"
 #include "strtok.h"
-#include "hashtable.h"
+#include <linux/hashtable.h>
 #include <linux/syscalls.h>
 #include <linux/kthread.h>
 #include <linux/inotify.h>
@@ -29,8 +29,8 @@
 #define STRING_BUF_SIZE		(512)
 
 struct hashtable_entry {
-        struct hlist_node hlist;
-        void *key;
+	struct hlist_node hlist;
+	void *key;
 	int value;
 };
 
@@ -67,12 +67,12 @@ static unsigned int str_hash(void *key) {
 }
 
 static int contain_appid_key(struct packagelist_data *pkgl_dat, void *appid) {
-        struct hashtable_entry *hash_cur;
-	struct hlist_node *h_n;
+	struct hashtable_entry *hash_cur;
 
-        hash_for_each_possible(pkgl_dat->appid_with_rw,	hash_cur, hlist, (unsigned int)appid, h_n)
-                if (appid == hash_cur->key)
-                        return 1;
+	hash_for_each_possible(pkgl_dat->appid_with_rw, hash_cur, hlist, (unsigned int)appid)
+
+		if (appid == hash_cur->key)
+			return 1;
 	return 0;
 }
 
@@ -87,7 +87,7 @@ int get_caller_has_rw_locked(void *pkgl_id, derive_t derive) {
 		return 1;
 	}
 
-	appid = multiuser_get_app_id(current_fsuid());
+	appid = multiuser_get_app_id(from_kuid(&init_user_ns, current_fsuid()));
 	mutex_lock(&pkgl_dat->hashtable_lock);
 	ret = contain_appid_key(pkgl_dat, (void *)appid);
 	mutex_unlock(&pkgl_dat->hashtable_lock);
@@ -98,13 +98,12 @@ appid_t get_appid(void *pkgl_id, const char *app_name)
 {
 	struct packagelist_data *pkgl_dat = (struct packagelist_data *)pkgl_id;
 	struct hashtable_entry *hash_cur;
-	struct hlist_node *h_n;
 	unsigned int hash = str_hash((void *)app_name);
 	appid_t ret_id;
 
 	//printk(KERN_INFO "sdcardfs: %s: %s, %u\n", __func__, (char *)app_name, hash);
 	mutex_lock(&pkgl_dat->hashtable_lock);
-	hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash, h_n) {
+	hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash) {
 		//printk(KERN_INFO "sdcardfs: %s: %s\n", __func__, (char *)hash_cur->key);
 		if (!strcasecmp(app_name, hash_cur->key)) {
 			ret_id = (appid_t)hash_cur->value;
@@ -140,7 +139,7 @@ int check_caller_access_to_name(struct inode *parent_node, const char* name,
 
 	/* Root always has access; access for any other UIDs should always
 	 * be controlled through packages.list. */
-	if (current_fsuid() == 0) {
+	if (from_kuid(&init_user_ns, current_fsuid()) == 0) {
 		return 1;
 	}
 
@@ -148,7 +147,8 @@ int check_caller_access_to_name(struct inode *parent_node, const char* name,
 	 * parent or holds sdcard_rw. */
 	if (w_ok) {
 		if (parent_node &&
-			(current_fsuid() == SDCARDFS_I(parent_node)->d_uid)) {
+			(from_kuid(&init_user_ns, current_fsuid()) ==
+			 SDCARDFS_I(parent_node)->d_uid)) {
 			return 1;
 		}
 		return has_rw;
@@ -174,11 +174,10 @@ int open_flags_to_access_mode(int open_flags) {
 static int insert_str_to_int(struct packagelist_data *pkgl_dat, void *key, int value) {
 	struct hashtable_entry *hash_cur;
 	struct hashtable_entry *new_entry;
-	struct hlist_node *h_n;
 	unsigned int hash = str_hash(key);
 
 	//printk(KERN_INFO "sdcardfs: %s: %s: %d, %u\n", __func__, (char *)key, value, hash);
-	hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash, h_n) {
+	hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash) {
 		if (!strcasecmp(key, hash_cur->key)) {
 			hash_cur->value = value;
 			return 0;
@@ -202,11 +201,10 @@ static void remove_str_to_int(struct hashtable_entry *h_entry) {
 static int insert_int_to_null(struct packagelist_data *pkgl_dat, void *key, int value) {
 	struct hashtable_entry *hash_cur;
 	struct hashtable_entry *new_entry;
-	struct hlist_node *h_n;
 
 	//printk(KERN_INFO "sdcardfs: %s: %d: %d\n", __func__, (int)key, value);
 	hash_for_each_possible(pkgl_dat->appid_with_rw,	hash_cur, hlist,
-					(unsigned int)key, h_n) {
+					(unsigned int)key) {
 		if (key == hash_cur->key) {
 			hash_cur->value = value;
 			return 0;
@@ -230,14 +228,13 @@ static void remove_int_to_null(struct hashtable_entry *h_entry) {
 static void remove_all_hashentrys(struct packagelist_data *pkgl_dat)
 {
 	struct hashtable_entry *hash_cur;
-	struct hlist_node *h_n;
 	struct hlist_node *h_t;
 	int i;
 
-	hash_for_each_safe(pkgl_dat->package_to_appid, i, h_t, hash_cur, hlist, h_n)
+	hash_for_each_safe(pkgl_dat->package_to_appid, i, h_t, hash_cur, hlist)
 		remove_str_to_int(hash_cur);
-	hash_for_each_safe(pkgl_dat->appid_with_rw, i, h_t, hash_cur, hlist, h_n)
-                remove_int_to_null(hash_cur);
+	hash_for_each_safe(pkgl_dat->appid_with_rw, i, h_t, hash_cur, hlist)
+		remove_int_to_null(hash_cur);
 
 	hash_init(pkgl_dat->package_to_appid);
 	hash_init(pkgl_dat->appid_with_rw);
diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h
index 90f8b24e4a52..51f6c7912584 100644
--- a/fs/sdcardfs/sdcardfs.h
+++ b/fs/sdcardfs/sdcardfs.h
@@ -69,8 +69,8 @@
 
 #define fix_derived_permission(x)	\
 	do {						\
-		(x)->i_uid = SDCARDFS_I(x)->d_uid;	\
-		(x)->i_gid = SDCARDFS_I(x)->d_gid;	\
+		(x)->i_uid = make_kuid(&init_user_ns, SDCARDFS_I(x)->d_uid);	\
+		(x)->i_gid = make_kgid(&init_user_ns, SDCARDFS_I(x)->d_gid);	\
 		(x)->i_mode = ((x)->i_mode & S_IFMT) | SDCARDFS_I(x)->d_mode;\
 	} while (0)
 
@@ -159,7 +159,9 @@ extern void sdcardfs_destroy_dentry_cache(void);
 extern int new_dentry_private_data(struct dentry *dentry);
 extern void free_dentry_private_data(struct dentry *dentry);
 extern struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry,
-				    struct nameidata *nd);
+				unsigned int flags);
+extern struct inode *sdcardfs_iget(struct super_block *sb,
+				 struct inode *lower_inode);
 extern int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb,
 			    struct path *lower_path);
 
@@ -387,13 +389,13 @@ extern int setup_obb_dentry(struct dentry *dentry, struct path *lower_path);
 static inline struct dentry *lock_parent(struct dentry *dentry)
 {
 	struct dentry *dir = dget_parent(dentry);
-	mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
+	mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
 	return dir;
 }
 
 static inline void unlock_dir(struct dentry *dir)
 {
-	mutex_unlock(&dir->d_inode->i_mutex);
+	mutex_unlock(&d_inode(dir)->i_mutex);
 	dput(dir);
 }
 
@@ -402,16 +404,9 @@ static inline int prepare_dir(const char *path_s, uid_t uid, gid_t gid, mode_t m
 	int err;
 	struct dentry *dent;
 	struct iattr attrs;
-	struct nameidata nd;
+	struct path parent;
 
-	err = kern_path_parent(path_s, &nd);
-	if (err) {
-		if (err == -EEXIST)
-			err = 0;
-		goto out;
-	}
-
-	dent = lookup_create(&nd, 1);
+	dent = kern_path_locked(path_s, &parent);
 	if (IS_ERR(dent)) {
 		err = PTR_ERR(dent);
 		if (err == -EEXIST)
@@ -419,29 +414,27 @@ static inline int prepare_dir(const char *path_s, uid_t uid, gid_t gid, mode_t m
 		goto out_unlock;
 	}
 
-	err = vfs_mkdir(nd.path.dentry->d_inode, dent, mode);
+	err = vfs_mkdir(d_inode(parent.dentry), dent, mode);
 	if (err) {
 		if (err == -EEXIST)
 			err = 0;
 		goto out_dput;
 	}
 
-	attrs.ia_uid = uid;
-	attrs.ia_gid = gid;
+	attrs.ia_uid = make_kuid(&init_user_ns, uid);
+	attrs.ia_gid = make_kgid(&init_user_ns, gid);
 	attrs.ia_valid = ATTR_UID | ATTR_GID;
-	mutex_lock(&dent->d_inode->i_mutex);
-	notify_change(dent, &attrs);
-	mutex_unlock(&dent->d_inode->i_mutex);
+	mutex_lock(&d_inode(dent)->i_mutex);
+	notify_change(dent, &attrs, NULL);
+	mutex_unlock(&d_inode(dent)->i_mutex);
 
 out_dput:
 	dput(dent);
 
 out_unlock:
 	/* parent dentry locked by lookup_create */
-	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
-	path_put(&nd.path);
-
-out:
+	mutex_unlock(&d_inode(parent.dentry)->i_mutex);
+	path_put(&parent);
 	return err;
 }
 
diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c
index 1d206c82dfdf..f153ce1b8cf3 100644
--- a/fs/sdcardfs/super.c
+++ b/fs/sdcardfs/super.c
@@ -122,7 +122,7 @@ static void sdcardfs_evict_inode(struct inode *inode)
 	struct inode *lower_inode;
 
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 	/*
 	 * Decrement a reference to a lower_inode, which was incremented
 	 * by our read_inode when it was created initially.
@@ -193,9 +193,9 @@ static void sdcardfs_umount_begin(struct super_block *sb)
 		lower_sb->s_op->umount_begin(lower_sb);
 }
 
-static int sdcardfs_show_options(struct seq_file *m, struct vfsmount *mnt)
+static int sdcardfs_show_options(struct seq_file *m, struct dentry *root)
 {
-	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(mnt->mnt_sb);
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(root->d_sb);
 	struct sdcardfs_mount_options *opts = &sbi->options;
 
 	if (opts->fs_low_uid != 0)
diff --git a/include/linux/namei.h b/include/linux/namei.h
index ef3b4f74eaf0..f2b8acbdb928 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -78,6 +78,8 @@ extern struct dentry *user_path_create(int, const char __user *, struct path *,
 extern void done_path_create(struct path *, struct dentry *);
 extern struct dentry *kern_path_locked(const char *, struct path *);
 extern int kern_path_mountpoint(int, const char *, struct path *, unsigned int);
+extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
+		const char *, unsigned int, struct path *);
 
 extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
 

From ef5f9857ff64f07f41bc7fca9b1f402e4638f9a5 Mon Sep 17 00:00:00 2001
From: Daniel Campello <campello@google.com>
Date: Mon, 20 Jul 2015 16:33:46 -0700
Subject: [PATCH 039/797] Changed type-casting in packagelist management

Fixed existing type-casting in packagelist management code. All
warnings at compile time were taken care of.

Change-Id: I1ea97786d1d1325f31b9f09ae966af1f896a2af5
Signed-off-by: Daniel Campello <campello@google.com>
---
 fs/sdcardfs/packagelist.c | 40 ++++++++++++++++++++-------------------
 1 file changed, 21 insertions(+), 19 deletions(-)

diff --git a/fs/sdcardfs/packagelist.c b/fs/sdcardfs/packagelist.c
index d7ba8d4a423e..f11591da141d 100644
--- a/fs/sdcardfs/packagelist.c
+++ b/fs/sdcardfs/packagelist.c
@@ -31,7 +31,7 @@
 struct hashtable_entry {
 	struct hlist_node hlist;
 	void *key;
-	int value;
+	unsigned int value;
 };
 
 struct packagelist_data {
@@ -54,7 +54,7 @@ static const char* const kpackageslist_file = "/data/system/packages.list";
 /* Supplementary groups to execute with */
 static const gid_t kgroups[1] = { AID_PACKAGE_INFO };
 
-static unsigned int str_hash(void *key) {
+static unsigned int str_hash(const char *key) {
 	int i;
 	unsigned int h = strlen(key);
 	char *data = (char *)key;
@@ -66,13 +66,13 @@ static unsigned int str_hash(void *key) {
 	return h;
 }
 
-static int contain_appid_key(struct packagelist_data *pkgl_dat, void *appid) {
+static int contain_appid_key(struct packagelist_data *pkgl_dat, unsigned int appid) {
 	struct hashtable_entry *hash_cur;
 
-	hash_for_each_possible(pkgl_dat->appid_with_rw, hash_cur, hlist, (unsigned int)appid)
-
-		if (appid == hash_cur->key)
+	hash_for_each_possible(pkgl_dat->appid_with_rw, hash_cur, hlist, appid)
+		if ((void *)(uintptr_t)appid == hash_cur->key)
 			return 1;
+
 	return 0;
 }
 
@@ -89,7 +89,7 @@ int get_caller_has_rw_locked(void *pkgl_id, derive_t derive) {
 
 	appid = multiuser_get_app_id(from_kuid(&init_user_ns, current_fsuid()));
 	mutex_lock(&pkgl_dat->hashtable_lock);
-	ret = contain_appid_key(pkgl_dat, (void *)appid);
+	ret = contain_appid_key(pkgl_dat, appid);
 	mutex_unlock(&pkgl_dat->hashtable_lock);
 	return ret;
 }
@@ -98,7 +98,7 @@ appid_t get_appid(void *pkgl_id, const char *app_name)
 {
 	struct packagelist_data *pkgl_dat = (struct packagelist_data *)pkgl_id;
 	struct hashtable_entry *hash_cur;
-	unsigned int hash = str_hash((void *)app_name);
+	unsigned int hash = str_hash(app_name);
 	appid_t ret_id;
 
 	//printk(KERN_INFO "sdcardfs: %s: %s, %u\n", __func__, (char *)app_name, hash);
@@ -171,7 +171,9 @@ int open_flags_to_access_mode(int open_flags) {
 	}
 }
 
-static int insert_str_to_int(struct packagelist_data *pkgl_dat, void *key, int value) {
+static int insert_str_to_int(struct packagelist_data *pkgl_dat, char *key,
+		unsigned int value)
+{
 	struct hashtable_entry *hash_cur;
 	struct hashtable_entry *new_entry;
 	unsigned int hash = str_hash(key);
@@ -198,14 +200,15 @@ static void remove_str_to_int(struct hashtable_entry *h_entry) {
 	kmem_cache_free(hashtable_entry_cachep, h_entry);
 }
 
-static int insert_int_to_null(struct packagelist_data *pkgl_dat, void *key, int value) {
+static int insert_int_to_null(struct packagelist_data *pkgl_dat, unsigned int key,
+		unsigned int value)
+{
 	struct hashtable_entry *hash_cur;
 	struct hashtable_entry *new_entry;
 
 	//printk(KERN_INFO "sdcardfs: %s: %d: %d\n", __func__, (int)key, value);
-	hash_for_each_possible(pkgl_dat->appid_with_rw,	hash_cur, hlist,
-					(unsigned int)key) {
-		if (key == hash_cur->key) {
+	hash_for_each_possible(pkgl_dat->appid_with_rw,	hash_cur, hlist, key) {
+		if ((void *)(uintptr_t)key == hash_cur->key) {
 			hash_cur->value = value;
 			return 0;
 		}
@@ -213,10 +216,9 @@ static int insert_int_to_null(struct packagelist_data *pkgl_dat, void *key, int
 	new_entry = kmem_cache_alloc(hashtable_entry_cachep, GFP_KERNEL);
 	if (!new_entry)
 		return -ENOMEM;
-	new_entry->key = key;
+	new_entry->key = (void *)(uintptr_t)key;
 	new_entry->value = value;
-	hash_add(pkgl_dat->appid_with_rw, &new_entry->hlist,
-			(unsigned int)new_entry->key);
+	hash_add(pkgl_dat->appid_with_rw, &new_entry->hlist, key);
 	return 0;
 }
 
@@ -260,7 +262,7 @@ static int read_package_list(struct packagelist_data *pkgl_dat) {
 
 	while ((read_amount = sys_read(fd, pkgl_dat->read_buf,
 					sizeof(pkgl_dat->read_buf))) > 0) {
-		int appid;
+		unsigned int appid;
 		char *token;
 		int one_line_len = 0;
 		int additional_read;
@@ -277,7 +279,7 @@ static int read_package_list(struct packagelist_data *pkgl_dat) {
 		if (additional_read > 0)
 			sys_lseek(fd, -additional_read, SEEK_CUR);
 
-		if (sscanf(pkgl_dat->read_buf, "%s %d %*d %*s %*s %s",
+		if (sscanf(pkgl_dat->read_buf, "%s %u %*d %*s %*s %s",
 				pkgl_dat->app_name_buf, &appid,
 				pkgl_dat->gids_buf) == 3) {
 			ret = insert_str_to_int(pkgl_dat, pkgl_dat->app_name_buf, appid);
@@ -291,7 +293,7 @@ static int read_package_list(struct packagelist_data *pkgl_dat) {
 			while (token != NULL) {
 				if (!kstrtoul(token, 10, &ret_gid) &&
 						(ret_gid == pkgl_dat->write_gid)) {
-					ret = insert_int_to_null(pkgl_dat, (void *)appid, 1);
+					ret = insert_int_to_null(pkgl_dat, appid, 1);
 					if (ret) {
 						sys_close(fd);
 						mutex_unlock(&pkgl_dat->hashtable_lock);

From b0f3f87bc1533c64aa1b1a86dd583c92919462ca Mon Sep 17 00:00:00 2001
From: Daniel Rosenberg <drosen@google.com>
Date: Wed, 3 Feb 2016 21:08:21 -0800
Subject: [PATCH 040/797] sdcardfs: Bring up to date with Android M
 permissions:

In M, the workings of sdcardfs were changed significantly.
This brings sdcardfs into line with the changes.

Change-Id: I10e91a84a884c838feef7aa26c0a2b21f02e052e
---
 fs/sdcardfs/Kconfig        |   1 +
 fs/sdcardfs/derived_perm.c | 119 ++++----
 fs/sdcardfs/file.c         |  10 +-
 fs/sdcardfs/inode.c        |  78 +++---
 fs/sdcardfs/lookup.c       |  40 +--
 fs/sdcardfs/main.c         | 141 +++++-----
 fs/sdcardfs/packagelist.c  | 538 ++++++++++++++++++-------------------
 fs/sdcardfs/sdcardfs.h     | 134 ++++++---
 fs/sdcardfs/strtok.h       |  75 ------
 fs/sdcardfs/super.c        |  11 +-
 10 files changed, 518 insertions(+), 629 deletions(-)
 delete mode 100644 fs/sdcardfs/strtok.h

diff --git a/fs/sdcardfs/Kconfig b/fs/sdcardfs/Kconfig
index d995f3eaae6d..ab25f88ebb37 100644
--- a/fs/sdcardfs/Kconfig
+++ b/fs/sdcardfs/Kconfig
@@ -1,5 +1,6 @@
 config SDCARD_FS
 	tristate "sdcard file system"
+	depends on CONFIGFS_FS
 	default n
 	help
 	  Sdcardfs is based on Wrapfs file system.
diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c
index 00c33a471dcc..128b3e56851f 100644
--- a/fs/sdcardfs/derived_perm.c
+++ b/fs/sdcardfs/derived_perm.c
@@ -29,24 +29,23 @@ static void inherit_derived_state(struct inode *parent, struct inode *child)
 	ci->perm = PERM_INHERIT;
 	ci->userid = pi->userid;
 	ci->d_uid = pi->d_uid;
-	ci->d_gid = pi->d_gid;
-	ci->d_mode = pi->d_mode;
+	ci->under_android = pi->under_android;
 }
 
 /* helper function for derived state */
 void setup_derived_state(struct inode *inode, perm_t perm,
-                        userid_t userid, uid_t uid, gid_t gid, mode_t mode)
+                        userid_t userid, uid_t uid, bool under_android)
 {
 	struct sdcardfs_inode_info *info = SDCARDFS_I(inode);
 
 	info->perm = perm;
 	info->userid = userid;
 	info->d_uid = uid;
-	info->d_gid = gid;
-	info->d_mode = mode;
+	info->under_android = under_android;
 }
 
-void get_derived_permission(struct dentry *parent, struct dentry *dentry)
+/* While renaming, there is a point where we want the path from dentry, but the name from newdentry */
+void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, struct dentry *newdentry)
 {
 	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
 	struct sdcardfs_inode_info *info = SDCARDFS_I(dentry->d_inode);
@@ -63,86 +62,68 @@ void get_derived_permission(struct dentry *parent, struct dentry *dentry)
 
 	inherit_derived_state(parent->d_inode, dentry->d_inode);
 
-	//printk(KERN_INFO "sdcardfs: derived: %s, %s, %d\n", parent->d_name.name,
-	//				dentry->d_name.name, parent_info->perm);
-
-	if (sbi->options.derive == DERIVE_NONE) {
-		return;
-	}
-
 	/* Derive custom permissions based on parent and current node */
 	switch (parent_info->perm) {
 		case PERM_INHERIT:
 			/* Already inherited above */
 			break;
-		case PERM_LEGACY_PRE_ROOT:
+		case PERM_PRE_ROOT:
 			/* Legacy internal layout places users at top level */
 			info->perm = PERM_ROOT;
-			info->userid = simple_strtoul(dentry->d_name.name, NULL, 10);
+			info->userid = simple_strtoul(newdentry->d_name.name, NULL, 10);
 			break;
 		case PERM_ROOT:
 			/* Assume masked off by default. */
-			info->d_mode = 00770;
-			if (!strcasecmp(dentry->d_name.name, "Android")) {
+			if (!strcasecmp(newdentry->d_name.name, "Android")) {
 				/* App-specific directories inside; let anyone traverse */
 				info->perm = PERM_ANDROID;
-				info->d_mode = 00771;
-			} else if (sbi->options.split_perms) {
-				if (!strcasecmp(dentry->d_name.name, "DCIM")
-					|| !strcasecmp(dentry->d_name.name, "Pictures")) {
-					info->d_gid = AID_SDCARD_PICS;
-				} else if (!strcasecmp(dentry->d_name.name, "Alarms")
-						|| !strcasecmp(dentry->d_name.name, "Movies")
-						|| !strcasecmp(dentry->d_name.name, "Music")
-						|| !strcasecmp(dentry->d_name.name, "Notifications")
-						|| !strcasecmp(dentry->d_name.name, "Podcasts")
-						|| !strcasecmp(dentry->d_name.name, "Ringtones")) {
-					info->d_gid = AID_SDCARD_AV;
-				}
+				info->under_android = true;
 			}
 			break;
 		case PERM_ANDROID:
-			if (!strcasecmp(dentry->d_name.name, "data")) {
+			if (!strcasecmp(newdentry->d_name.name, "data")) {
 				/* App-specific directories inside; let anyone traverse */
 				info->perm = PERM_ANDROID_DATA;
-				info->d_mode = 00771;
-			} else if (!strcasecmp(dentry->d_name.name, "obb")) {
+			} else if (!strcasecmp(newdentry->d_name.name, "obb")) {
 				/* App-specific directories inside; let anyone traverse */
 				info->perm = PERM_ANDROID_OBB;
-				info->d_mode = 00771;
-				// FIXME : this feature will be implemented later.
 				/* Single OBB directory is always shared */
-			} else if (!strcasecmp(dentry->d_name.name, "user")) {
-				/* User directories must only be accessible to system, protected
-				 * by sdcard_all. Zygote will bind mount the appropriate user-
-				 * specific path. */
-				info->perm = PERM_ANDROID_USER;
-				info->d_gid = AID_SDCARD_ALL;
-				info->d_mode = 00770;
+			} else if (!strcasecmp(newdentry->d_name.name, "media")) {
+				/* App-specific directories inside; let anyone traverse */
+				info->perm = PERM_ANDROID_MEDIA;
 			}
 			break;
-		/* same policy will be applied on PERM_ANDROID_DATA
-		 * and PERM_ANDROID_OBB */
 		case PERM_ANDROID_DATA:
 		case PERM_ANDROID_OBB:
-			appid = get_appid(sbi->pkgl_id, dentry->d_name.name);
+		case PERM_ANDROID_MEDIA:
+			appid = get_appid(sbi->pkgl_id, newdentry->d_name.name);
 			if (appid != 0) {
 				info->d_uid = multiuser_get_uid(parent_info->userid, appid);
 			}
-			info->d_mode = 00770;
-			break;
-		case PERM_ANDROID_USER:
-			/* Root of a secondary user */
-			info->perm = PERM_ROOT;
-			info->userid = simple_strtoul(dentry->d_name.name, NULL, 10);
-			info->d_gid = AID_SDCARD_R;
-			info->d_mode = 00771;
 			break;
 	}
 }
 
+void get_derived_permission(struct dentry *parent, struct dentry *dentry)
+{
+	get_derived_permission_new(parent, dentry, dentry);
+}
+
+void get_derive_permissions_recursive(struct dentry *parent) {
+	struct dentry *dentry;
+	list_for_each_entry(dentry, &parent->d_subdirs, d_child) {
+		if (dentry && dentry->d_inode) {
+			mutex_lock(&dentry->d_inode->i_mutex);
+			get_derived_permission(parent, dentry);
+			fix_derived_permission(dentry->d_inode);
+			get_derive_permissions_recursive(dentry);
+			mutex_unlock(&dentry->d_inode->i_mutex);
+		}
+	}
+}
+
 /* main function for updating derived permission */
-inline void update_derived_permission(struct dentry *dentry)
+inline void update_derived_permission_lock(struct dentry *dentry)
 {
 	struct dentry *parent;
 
@@ -154,6 +135,7 @@ inline void update_derived_permission(struct dentry *dentry)
 	 * 1. need to check whether the dentry is updated or not
 	 * 2. remove the root dentry update
 	 */
+	mutex_lock(&dentry->d_inode->i_mutex);
 	if(IS_ROOT(dentry)) {
 		//setup_default_pre_root_state(dentry->d_inode);
 	} else {
@@ -164,6 +146,7 @@ inline void update_derived_permission(struct dentry *dentry)
 		}
 	}
 	fix_derived_permission(dentry->d_inode);
+	mutex_unlock(&dentry->d_inode->i_mutex);
 }
 
 int need_graft_path(struct dentry *dentry)
@@ -177,7 +160,7 @@ int need_graft_path(struct dentry *dentry)
 			!strcasecmp(dentry->d_name.name, "obb")) {
 
 		/* /Android/obb is the base obbpath of DERIVED_UNIFIED */
-		if(!(sbi->options.derive == DERIVE_UNIFIED
+		if(!(sbi->options.multiuser == false
 				&& parent_info->userid == 0)) {
 			ret = 1;
 		}
@@ -207,8 +190,7 @@ int is_obbpath_invalid(struct dentry *dent)
 			path_buf = kmalloc(PATH_MAX, GFP_ATOMIC);
 			if(!path_buf) {
 				ret = 1;
-				printk(KERN_ERR "sdcardfs: "
-					"fail to allocate path_buf in %s.\n", __func__);
+				printk(KERN_ERR "sdcardfs: fail to allocate path_buf in %s.\n", __func__);
 			} else {
 				obbpath_s = d_path(&di->lower_path, path_buf, PATH_MAX);
 				if (d_unhashed(di->lower_path.dentry) ||
@@ -234,21 +216,16 @@ int is_base_obbpath(struct dentry *dentry)
 	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
 
 	spin_lock(&SDCARDFS_D(dentry)->lock);
-	/* DERIVED_LEGACY */
-	if(parent_info->perm == PERM_LEGACY_PRE_ROOT &&
+	if (sbi->options.multiuser) {
+		if(parent_info->perm == PERM_PRE_ROOT &&
+				!strcasecmp(dentry->d_name.name, "obb")) {
+			ret = 1;
+		}
+	} else  if (parent_info->perm == PERM_ANDROID &&
 			!strcasecmp(dentry->d_name.name, "obb")) {
 		ret = 1;
 	}
-	/* DERIVED_UNIFIED :/Android/obb is the base obbpath */
-	else if (parent_info->perm == PERM_ANDROID &&
-			!strcasecmp(dentry->d_name.name, "obb")) {
-		if((sbi->options.derive == DERIVE_UNIFIED
-				&& parent_info->userid == 0)) {
-			ret = 1;
-		}
-	}
 	spin_unlock(&SDCARDFS_D(dentry)->lock);
-	dput(parent);
 	return ret;
 }
 
@@ -272,8 +249,7 @@ int setup_obb_dentry(struct dentry *dentry, struct path *lower_path)
 
 	if(!err) {
 		/* the obbpath base has been found */
-		printk(KERN_INFO "sdcardfs: "
-				"the sbi->obbpath is found\n");
+		printk(KERN_INFO "sdcardfs: the sbi->obbpath is found\n");
 		pathcpy(lower_path, &obbpath);
 	} else {
 		/* if the sbi->obbpath is not available, we can optionally
@@ -281,8 +257,7 @@ int setup_obb_dentry(struct dentry *dentry, struct path *lower_path)
 		 * but, the current implementation just returns an error
 		 * because the sdcard daemon also regards this case as
 		 * a lookup fail. */
-		printk(KERN_INFO "sdcardfs: "
-				"the sbi->obbpath is not available\n");
+		printk(KERN_INFO "sdcardfs: the sbi->obbpath is not available\n");
 	}
 	return err;
 }
diff --git a/fs/sdcardfs/file.c b/fs/sdcardfs/file.c
index f9c5eaafc619..c249fa982d3c 100644
--- a/fs/sdcardfs/file.c
+++ b/fs/sdcardfs/file.c
@@ -209,7 +209,6 @@ static int sdcardfs_open(struct inode *inode, struct file *file)
 	struct dentry *parent = dget_parent(dentry);
 	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
 	const struct cred *saved_cred = NULL;
-	int has_rw;
 
 	/* don't open unhashed/deleted files */
 	if (d_unhashed(dentry)) {
@@ -217,11 +216,7 @@ static int sdcardfs_open(struct inode *inode, struct file *file)
 		goto out_err;
 	}
 
-	has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive);
-
-	if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name,
-				sbi->options.derive,
-				open_flags_to_access_mode(file->f_flags), has_rw)) {
+	if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name)) {
 		printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n"
                          "	dentry: %s, task:%s\n",
 						 __func__, dentry->d_name.name, current->comm);
@@ -257,8 +252,7 @@ static int sdcardfs_open(struct inode *inode, struct file *file)
 	if (err)
 		kfree(SDCARDFS_F(file));
 	else {
-		fsstack_copy_attr_all(inode, sdcardfs_lower_inode(inode));
-		fix_derived_permission(inode);
+		sdcardfs_copy_and_fix_attrs(inode, sdcardfs_lower_inode(inode));
 	}
 
 out_revert_cred:
diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c
index 75c622bac2f5..2528da0d3ae1 100644
--- a/fs/sdcardfs/inode.c
+++ b/fs/sdcardfs/inode.c
@@ -55,11 +55,9 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry,
 	struct dentry *lower_dentry;
 	struct dentry *lower_parent_dentry = NULL;
 	struct path lower_path;
-	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
 	const struct cred *saved_cred = NULL;
 
-	int has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive);
-	if(!check_caller_access_to_name(dir, dentry->d_name.name, sbi->options.derive, 1, has_rw)) {
+	if(!check_caller_access_to_name(dir, dentry->d_name.name)) {
 		printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n"
 						 "  dentry: %s, task:%s\n",
 						 __func__, dentry->d_name.name, current->comm);
@@ -80,7 +78,7 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry,
 	if (err)
 		goto out;
 
-	err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path);
+	err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path, SDCARDFS_I(dir)->userid);
 	if (err)
 		goto out;
 	fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir));
@@ -143,11 +141,9 @@ static int sdcardfs_unlink(struct inode *dir, struct dentry *dentry)
 	struct inode *lower_dir_inode = sdcardfs_lower_inode(dir);
 	struct dentry *lower_dir_dentry;
 	struct path lower_path;
-	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
 	const struct cred *saved_cred = NULL;
 
-	int has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive);
-	if(!check_caller_access_to_name(dir, dentry->d_name.name, sbi->options.derive, 1, has_rw)) {
+	if(!check_caller_access_to_name(dir, dentry->d_name.name)) {
 		printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n"
 						 "  dentry: %s, task:%s\n",
 						 __func__, dentry->d_name.name, current->comm);
@@ -255,8 +251,7 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
 	int fullpath_namelen;
 	int touch_err = 0;
 
-	int has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive);
-	if(!check_caller_access_to_name(dir, dentry->d_name.name, sbi->options.derive, 1, has_rw)) {
+	if(!check_caller_access_to_name(dir, dentry->d_name.name)) {
 		printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n"
 						 "  dentry: %s, task:%s\n",
 						 __func__, dentry->d_name.name, current->comm);
@@ -293,19 +288,19 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
 		if(err) {
 			/* if the sbi->obbpath is not available, the lower_path won't be
 			 * changed by setup_obb_dentry() but the lower path is saved to
-             * its orig_path. this dentry will be revalidated later.
+			 * its orig_path. this dentry will be revalidated later.
 			 * but now, the lower_path should be NULL */
 			sdcardfs_put_reset_lower_path(dentry);
 
 			/* the newly created lower path which saved to its orig_path or
 			 * the lower_path is the base obbpath.
-             * therefore, an additional path_get is required */
+			 * therefore, an additional path_get is required */
 			path_get(&lower_path);
 		} else
 			make_nomedia_in_obb = 1;
 	}
 
-	err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path);
+	err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path, pi->userid);
 	if (err)
 		goto out;
 
@@ -314,7 +309,7 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
 	/* update number of links on parent directory */
 	set_nlink(dir, sdcardfs_lower_inode(dir)->i_nlink);
 
-	if ((sbi->options.derive == DERIVE_UNIFIED) && (!strcasecmp(dentry->d_name.name, "obb"))
+	if ((!sbi->options.multiuser) && (!strcasecmp(dentry->d_name.name, "obb"))
 		&& (pi->perm == PERM_ANDROID) && (pi->userid == 0))
 		make_nomedia_in_obb = 1;
 
@@ -371,12 +366,9 @@ static int sdcardfs_rmdir(struct inode *dir, struct dentry *dentry)
 	struct dentry *lower_dir_dentry;
 	int err;
 	struct path lower_path;
-	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
 	const struct cred *saved_cred = NULL;
-	//char *path_s = NULL;
 
-	int has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive);
-	if(!check_caller_access_to_name(dir, dentry->d_name.name, sbi->options.derive, 1, has_rw)) {
+	if(!check_caller_access_to_name(dir, dentry->d_name.name)) {
 		printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n"
 						 "  dentry: %s, task:%s\n",
 						 __func__, dentry->d_name.name, current->comm);
@@ -461,14 +453,10 @@ static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	struct dentry *trap = NULL;
 	struct dentry *new_parent = NULL;
 	struct path lower_old_path, lower_new_path;
-	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(old_dentry->d_sb);
 	const struct cred *saved_cred = NULL;
 
-	int has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive);
-	if(!check_caller_access_to_name(old_dir, old_dentry->d_name.name,
-			sbi->options.derive, 1, has_rw) ||
-		!check_caller_access_to_name(new_dir, new_dentry->d_name.name,
-			sbi->options.derive, 1, has_rw)) {
+	if(!check_caller_access_to_name(old_dir, old_dentry->d_name.name) ||
+		!check_caller_access_to_name(new_dir, new_dentry->d_name.name)) {
 		printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n"
 						 "  new_dentry: %s, task:%s\n",
 						 __func__, new_dentry->d_name.name, current->comm);
@@ -505,26 +493,31 @@ static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		goto out;
 
 	/* Copy attrs from lower dir, but i_uid/i_gid */
-	fsstack_copy_attr_all(new_dir, d_inode(lower_new_dir_dentry));
+	sdcardfs_copy_and_fix_attrs(new_dir, d_inode(lower_new_dir_dentry));
 	fsstack_copy_inode_size(new_dir, d_inode(lower_new_dir_dentry));
-	fix_derived_permission(new_dir);
+
 	if (new_dir != old_dir) {
-		fsstack_copy_attr_all(old_dir, d_inode(lower_old_dir_dentry));
+		sdcardfs_copy_and_fix_attrs(old_dir, d_inode(lower_old_dir_dentry));
 		fsstack_copy_inode_size(old_dir, d_inode(lower_old_dir_dentry));
-		fix_derived_permission(old_dir);
+
 		/* update the derived permission of the old_dentry
 		 * with its new parent
 		 */
 		new_parent = dget_parent(new_dentry);
 		if(new_parent) {
 			if(d_inode(old_dentry)) {
-				get_derived_permission(new_parent, old_dentry);
-				fix_derived_permission(d_inode(old_dentry));
+				update_derived_permission_lock(old_dentry);
 			}
 			dput(new_parent);
 		}
 	}
-
+	/* At this point, not all dentry information has been moved, so
+	 * we pass along new_dentry for the name.*/
+	mutex_lock(&d_inode(old_dentry)->i_mutex);
+	get_derived_permission_new(new_dentry->d_parent, old_dentry, new_dentry);
+	fix_derived_permission(d_inode(old_dentry));
+	get_derive_permissions_recursive(old_dentry);
+	mutex_unlock(&d_inode(old_dentry)->i_mutex);
 out:
 	unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
 	dput(lower_old_dir_dentry);
@@ -639,9 +632,7 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia)
 	struct inode *lower_inode;
 	struct path lower_path;
 	struct iattr lower_ia;
-	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
 	struct dentry *parent;
-	int has_rw;
 
 	inode = d_inode(dentry);
 
@@ -655,10 +646,8 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia)
 	/* no vfs_XXX operations required, cred overriding will be skipped. wj*/
 	if (!err) {
 		/* check the Android group ID */
-		has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive);
 		parent = dget_parent(dentry);
-		if(!check_caller_access_to_name(d_inode(parent), dentry->d_name.name,
-						sbi->options.derive, 1, has_rw)) {
+		if(!check_caller_access_to_name(d_inode(parent), dentry->d_name.name)) {
 			printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n"
 							 "  dentry: %s, task:%s\n",
 							 __func__, dentry->d_name.name, current->comm);
@@ -723,10 +712,8 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia)
 	if (err)
 		goto out;
 
-	/* get attributes from the lower inode */
-	fsstack_copy_attr_all(inode, lower_inode);
-	/* update derived permission of the upper inode */
-	fix_derived_permission(inode);
+	/* get attributes from the lower inode and update derived permissions */
+	sdcardfs_copy_and_fix_attrs(inode, lower_inode);
 
 	/*
 	 * Not running fsstack_copy_inode_size(inode, lower_inode), because
@@ -748,11 +735,9 @@ static int sdcardfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	struct inode *lower_inode;
 	struct path lower_path;
 	struct dentry *parent;
-	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
 
 	parent = dget_parent(dentry);
-	if(!check_caller_access_to_name(d_inode(parent), dentry->d_name.name,
-						sbi->options.derive, 0, 0)) {
+	if(!check_caller_access_to_name(d_inode(parent), dentry->d_name.name)) {
 		printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n"
 						 "  dentry: %s, task:%s\n",
 						 __func__, dentry->d_name.name, current->comm);
@@ -767,13 +752,10 @@ static int sdcardfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	lower_dentry = lower_path.dentry;
 	lower_inode = sdcardfs_lower_inode(inode);
 
-	fsstack_copy_attr_all(inode, lower_inode);
+
+	sdcardfs_copy_and_fix_attrs(inode, lower_inode);
 	fsstack_copy_inode_size(inode, lower_inode);
-	/* if the dentry has been moved from other location
-	 * so, on this stage, its derived permission must be
-	 * rechecked from its private field.
-	 */
-	fix_derived_permission(inode);
+
 
 	generic_fillattr(inode, stat);
 	sdcardfs_put_lower_path(dentry, &lower_path);
diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c
index a4b94df99f32..f80abcb6b467 100644
--- a/fs/sdcardfs/lookup.c
+++ b/fs/sdcardfs/lookup.c
@@ -64,10 +64,17 @@ int new_dentry_private_data(struct dentry *dentry)
 	return 0;
 }
 
-static int sdcardfs_inode_test(struct inode *inode, void *candidate_lower_inode)
+struct inode_data {
+	struct inode *lower_inode;
+	userid_t id;
+};
+
+static int sdcardfs_inode_test(struct inode *inode, void *candidate_data/*void *candidate_lower_inode*/)
 {
 	struct inode *current_lower_inode = sdcardfs_lower_inode(inode);
-	if (current_lower_inode == (struct inode *)candidate_lower_inode)
+	userid_t current_userid = SDCARDFS_I(inode)->userid;
+	if (current_lower_inode == ((struct inode_data *)candidate_data)->lower_inode &&
+			current_userid == ((struct inode_data *)candidate_data)->id)
 		return 1; /* found a match */
 	else
 		return 0; /* no match */
@@ -79,12 +86,15 @@ static int sdcardfs_inode_set(struct inode *inode, void *lower_inode)
 	return 0;
 }
 
-struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode)
+struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode, userid_t id)
 {
 	struct sdcardfs_inode_info *info;
+	struct inode_data data;
 	struct inode *inode; /* the new inode to return */
 	int err;
 
+	data.id = id;
+	data.lower_inode = lower_inode;
 	inode = iget5_locked(sb, /* our superblock */
 			     /*
 			      * hashval: we use inode number, but we can
@@ -94,7 +104,7 @@ struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode)
 			     lower_inode->i_ino, /* hashval */
 			     sdcardfs_inode_test,	/* inode comparison function */
 			     sdcardfs_inode_set, /* inode init function */
-			     lower_inode); /* data passed to test+set fxns */
+			     &data); /* data passed to test+set fxns */
 	if (!inode) {
 		err = -EACCES;
 		iput(lower_inode);
@@ -146,11 +156,9 @@ struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode)
 				   lower_inode->i_rdev);
 
 	/* all well, copy inode attributes */
-	fsstack_copy_attr_all(inode, lower_inode);
+	sdcardfs_copy_and_fix_attrs(inode, lower_inode);
 	fsstack_copy_inode_size(inode, lower_inode);
 
-	fix_derived_permission(inode);
-
 	unlock_new_inode(inode);
 	return inode;
 }
@@ -164,7 +172,7 @@ struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode)
  * @lower_path: the lower path (caller does path_get/put)
  */
 int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb,
-		     struct path *lower_path)
+		     struct path *lower_path, userid_t id)
 {
 	int err = 0;
 	struct inode *inode;
@@ -186,14 +194,14 @@ int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb,
 	 */
 
 	/* inherit lower inode number for sdcardfs's inode */
-	inode = sdcardfs_iget(sb, lower_inode);
+	inode = sdcardfs_iget(sb, lower_inode, id);
 	if (IS_ERR(inode)) {
 		err = PTR_ERR(inode);
 		goto out;
 	}
 
 	d_add(dentry, inode);
-	update_derived_permission(dentry);
+	update_derived_permission_lock(dentry);
 out:
 	return err;
 }
@@ -205,7 +213,7 @@ int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb,
  * Fills in lower_parent_path with <dentry,mnt> on success.
  */
 static struct dentry *__sdcardfs_lookup(struct dentry *dentry,
-		unsigned int flags, struct path *lower_parent_path)
+		unsigned int flags, struct path *lower_parent_path, userid_t id)
 {
 	int err = 0;
 	struct vfsmount *lower_dir_mnt;
@@ -266,7 +274,7 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry,
 		}
 
 		sdcardfs_set_lower_path(dentry, &lower_path);
-		err = sdcardfs_interpose(dentry, dentry->d_sb, &lower_path);
+		err = sdcardfs_interpose(dentry, dentry->d_sb, &lower_path, id);
 		if (err) /* path_put underlying path on error */
 			sdcardfs_put_reset_lower_path(dentry);
 		goto out;
@@ -328,13 +336,11 @@ struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry,
 	struct dentry *ret = NULL, *parent;
 	struct path lower_parent_path;
 	int err = 0;
-	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
 	const struct cred *saved_cred = NULL;
 
 	parent = dget_parent(dentry);
 
-	if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name,
-						sbi->options.derive, 0, 0)) {
+	if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name)) {
 		ret = ERR_PTR(-EACCES);
 		printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n"
                          "	dentry: %s, task:%s\n",
@@ -354,7 +360,7 @@ struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry,
 		goto out;
 	}
 
-	ret = __sdcardfs_lookup(dentry, flags, &lower_parent_path);
+	ret = __sdcardfs_lookup(dentry, flags, &lower_parent_path, SDCARDFS_I(dir)->userid);
 	if (IS_ERR(ret))
 	{
 		goto out;
@@ -365,8 +371,10 @@ struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry,
 		fsstack_copy_attr_times(dentry->d_inode,
 					sdcardfs_lower_inode(dentry->d_inode));
 		/* get drived permission */
+		mutex_lock(&dentry->d_inode->i_mutex);
 		get_derived_permission(parent, dentry);
 		fix_derived_permission(dentry->d_inode);
+		mutex_unlock(&dentry->d_inode->i_mutex);
 	}
 	/* update parent directory's atime */
 	fsstack_copy_attr_atime(parent->d_inode,
diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c
index 9d04ae8ceb46..80aa355d801e 100644
--- a/fs/sdcardfs/main.c
+++ b/fs/sdcardfs/main.c
@@ -24,25 +24,27 @@
 #include <linux/parser.h>
 
 enum {
-	Opt_uid,
+	Opt_fsuid,
+	Opt_fsgid,
 	Opt_gid,
-	Opt_wgid,
 	Opt_debug,
-	Opt_split,
-	Opt_derive,
 	Opt_lower_fs,
+	Opt_mask,
+	Opt_multiuser, // May need?
+	Opt_userid,
 	Opt_reserved_mb,
 	Opt_err,
 };
 
 static const match_table_t sdcardfs_tokens = {
-	{Opt_uid, "uid=%u"},
+	{Opt_fsuid, "fsuid=%u"},
+	{Opt_fsgid, "fsgid=%u"},
 	{Opt_gid, "gid=%u"},
-	{Opt_wgid, "wgid=%u"},
 	{Opt_debug, "debug"},
-	{Opt_split, "split"},
-	{Opt_derive, "derive=%s"},
 	{Opt_lower_fs, "lower_fs=%s"},
+	{Opt_mask, "mask=%u"},
+	{Opt_userid, "userid=%d"},
+	{Opt_multiuser, "multiuser"},
 	{Opt_reserved_mb, "reserved_mb=%u"},
 	{Opt_err, NULL}
 };
@@ -58,12 +60,10 @@ static int parse_options(struct super_block *sb, char *options, int silent,
 	/* by default, we use AID_MEDIA_RW as uid, gid */
 	opts->fs_low_uid = AID_MEDIA_RW;
 	opts->fs_low_gid = AID_MEDIA_RW;
-	/* by default, we use AID_SDCARD_RW as write_gid */
-	opts->write_gid = AID_SDCARD_RW;
-	/* default permission policy
-	 * (DERIVE_NONE | DERIVE_LEGACY | DERIVE_UNIFIED) */
-	opts->derive = DERIVE_NONE;
-	opts->split_perms = 0;
+	opts->mask = 0;
+	opts->multiuser = false;
+	opts->fs_user_id = 0;
+	opts->gid = 0;
 	/* by default, we use LOWER_FS_EXT4 as lower fs type */
 	opts->lower_fs = LOWER_FS_EXT4;
 	/* by default, 0MB is reserved */
@@ -85,37 +85,33 @@ static int parse_options(struct super_block *sb, char *options, int silent,
 		case Opt_debug:
 			*debug = 1;
 			break;
-		case Opt_uid:
+		case Opt_fsuid:
 			if (match_int(&args[0], &option))
 				return 0;
 			opts->fs_low_uid = option;
 			break;
-		case Opt_gid:
+		case Opt_fsgid:
 			if (match_int(&args[0], &option))
 				return 0;
 			opts->fs_low_gid = option;
 			break;
-		case Opt_wgid:
+		case Opt_gid:
 			if (match_int(&args[0], &option))
 				return 0;
-			opts->write_gid = option;
+			opts->gid = option;
 			break;
-		case Opt_split:
-			opts->split_perms=1;
+		case Opt_userid:
+			if (match_int(&args[0], &option))
+				return 0;
+			opts->fs_user_id = option;
 			break;
-		case Opt_derive:
-			string_option = match_strdup(&args[0]);
-			if (!strcmp("none", string_option)) {
-				opts->derive = DERIVE_NONE;
-			} else if (!strcmp("legacy", string_option)) {
-				opts->derive = DERIVE_LEGACY;
-			} else if (!strcmp("unified", string_option)) {
-				opts->derive = DERIVE_UNIFIED;
-			} else {
-				kfree(string_option);
-				goto invalid_option;
-			}
-			kfree(string_option);
+		case Opt_mask:
+			if (match_int(&args[0], &option))
+				return 0;
+			opts->mask = option;
+			break;
+		case Opt_multiuser:
+			opts->multiuser = true;
 			break;
 		case Opt_lower_fs:
 			string_option = match_strdup(&args[0]);
@@ -184,6 +180,11 @@ static struct dentry *sdcardfs_d_alloc_root(struct super_block *sb)
 }
 #endif
 
+DEFINE_MUTEX(sdcardfs_super_list_lock);
+LIST_HEAD(sdcardfs_super_list);
+EXPORT_SYMBOL_GPL(sdcardfs_super_list_lock);
+EXPORT_SYMBOL_GPL(sdcardfs_super_list);
+
 /*
  * There is no need to lock the sdcardfs_super_info's rwsem as there is no
  * way anyone can have a reference to the superblock at this point in time.
@@ -196,7 +197,6 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name,
 	struct super_block *lower_sb;
 	struct path lower_path;
 	struct sdcardfs_sb_info *sb_info;
-	void *pkgl_id;
 	struct inode *inode;
 
 	printk(KERN_INFO "sdcardfs version 2.0\n");
@@ -215,8 +215,7 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name,
 	err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
 			&lower_path);
 	if (err) {
-		printk(KERN_ERR	"sdcardfs: error accessing "
-		       "lower directory '%s'\n", dev_name);
+		printk(KERN_ERR	"sdcardfs: error accessing lower directory '%s'\n", dev_name);
 		goto out;
 	}
 
@@ -229,7 +228,6 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name,
 	}
 
 	sb_info = sb->s_fs_info;
-
 	/* parse options */
 	err = parse_options(sb, raw_data, silent, &debug, &sb_info->options);
 	if (err) {
@@ -237,14 +235,6 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name,
 		goto out_freesbi;
 	}
 
-	if (sb_info->options.derive != DERIVE_NONE) {
-		pkgl_id = packagelist_create(sb_info->options.write_gid);
-		if(IS_ERR(pkgl_id))
-			goto out_freesbi;
-		else
-			sb_info->pkgl_id = pkgl_id;
-	}
-
 	/* set the lower superblock field of upper superblock */
 	lower_sb = lower_path.dentry->d_sb;
 	atomic_inc(&lower_sb->s_active);
@@ -263,7 +253,7 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name,
 	sb->s_op = &sdcardfs_sops;
 
 	/* get a new inode and allocate our root dentry */
-	inode = sdcardfs_iget(sb, lower_path.dentry->d_inode);
+	inode = sdcardfs_iget(sb, lower_path.dentry->d_inode, 0);
 	if (IS_ERR(inode)) {
 		err = PTR_ERR(inode);
 		goto out_sput;
@@ -292,41 +282,22 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name,
 	d_rehash(sb->s_root);
 
 	/* setup permission policy */
-	switch(sb_info->options.derive) {
-		case DERIVE_NONE:
-			setup_derived_state(sb->s_root->d_inode,
-					PERM_ROOT, 0, AID_ROOT, AID_SDCARD_RW, 00775);
-			sb_info->obbpath_s = NULL;
-			break;
-		case DERIVE_LEGACY:
-			/* Legacy behavior used to support internal multiuser layout which
-			 * places user_id at the top directory level, with the actual roots
-			 * just below that. Shared OBB path is also at top level. */
-			setup_derived_state(sb->s_root->d_inode,
-					PERM_LEGACY_PRE_ROOT, 0, AID_ROOT, AID_SDCARD_R, 00771);
-			/* initialize the obbpath string and lookup the path
-			 * sb_info->obb_path will be deactivated by path_put
-			 * on sdcardfs_put_super */
-			sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL);
-			snprintf(sb_info->obbpath_s, PATH_MAX, "%s/obb", dev_name);
-			err =  prepare_dir(sb_info->obbpath_s,
+	sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL);
+	mutex_lock(&sdcardfs_super_list_lock);
+	if(sb_info->options.multiuser) {
+		setup_derived_state(sb->s_root->d_inode, PERM_PRE_ROOT, sb_info->options.fs_user_id, AID_ROOT, false);
+		snprintf(sb_info->obbpath_s, PATH_MAX, "%s/obb", dev_name);
+		/*err =  prepare_dir(sb_info->obbpath_s,
 					sb_info->options.fs_low_uid,
-					sb_info->options.fs_low_gid, 00755);
-			if(err)
-				printk(KERN_ERR "sdcardfs: %s: %d, error on creating %s\n",
-						__func__,__LINE__, sb_info->obbpath_s);
-			break;
-		case DERIVE_UNIFIED:
-			/* Unified multiuser layout which places secondary user_id under
-			 * /Android/user and shared OBB path under /Android/obb. */
-			setup_derived_state(sb->s_root->d_inode,
-					PERM_ROOT, 0, AID_ROOT, AID_SDCARD_R, 00771);
-
-			sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL);
-			snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name);
-			break;
+					sb_info->options.fs_low_gid, 00755);*/
+	} else {
+		setup_derived_state(sb->s_root->d_inode, PERM_ROOT, sb_info->options.fs_low_uid, AID_ROOT, false);
+		snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name);
 	}
 	fix_derived_permission(sb->s_root->d_inode);
+	sb_info->sb = sb;
+	list_add(&sb_info->list, &sdcardfs_super_list);
+	mutex_unlock(&sdcardfs_super_list_lock);
 
 	if (!silent)
 		printk(KERN_INFO "sdcardfs: mounted on top of %s type %s\n",
@@ -341,7 +312,6 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name,
 out_sput:
 	/* drop refs we took earlier */
 	atomic_dec(&lower_sb->s_active);
-	packagelist_destroy(sb_info->pkgl_id);
 out_freesbi:
 	kfree(SDCARDFS_SB(sb));
 	sb->s_fs_info = NULL;
@@ -386,11 +356,22 @@ struct dentry *sdcardfs_mount(struct file_system_type *fs_type, int flags,
 					raw_data, sdcardfs_read_super);
 }
 
+void sdcardfs_kill_sb(struct super_block *sb) {
+	struct sdcardfs_sb_info *sbi;
+	if (sb->s_magic == SDCARDFS_SUPER_MAGIC) {
+		sbi = SDCARDFS_SB(sb);
+		mutex_lock(&sdcardfs_super_list_lock);
+		list_del(&sbi->list);
+		mutex_unlock(&sdcardfs_super_list_lock);
+	}
+	generic_shutdown_super(sb);
+}
+
 static struct file_system_type sdcardfs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= SDCARDFS_NAME,
 	.mount		= sdcardfs_mount,
-	.kill_sb	= generic_shutdown_super,
+	.kill_sb	= sdcardfs_kill_sb,
 	.fs_flags	= 0,
 };
 
diff --git a/fs/sdcardfs/packagelist.c b/fs/sdcardfs/packagelist.c
index f11591da141d..ba3478d94107 100644
--- a/fs/sdcardfs/packagelist.c
+++ b/fs/sdcardfs/packagelist.c
@@ -19,13 +19,16 @@
  */
 
 #include "sdcardfs.h"
-#include "strtok.h"
 #include <linux/hashtable.h>
-#include <linux/syscalls.h>
-#include <linux/kthread.h>
-#include <linux/inotify.h>
 #include <linux/delay.h>
 
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <linux/configfs.h>
+
 #define STRING_BUF_SIZE		(512)
 
 struct hashtable_entry {
@@ -34,25 +37,20 @@ struct hashtable_entry {
 	unsigned int value;
 };
 
-struct packagelist_data {
-	DECLARE_HASHTABLE(package_to_appid,8);
-	DECLARE_HASHTABLE(appid_with_rw,7);
-	struct mutex hashtable_lock;
-	struct task_struct *thread_id;
-	gid_t write_gid;
-	char *strtok_last;
-	char read_buf[STRING_BUF_SIZE];
-	char event_buf[STRING_BUF_SIZE];
-	char app_name_buf[STRING_BUF_SIZE];
-	char gids_buf[STRING_BUF_SIZE];
+struct sb_list {
+	struct super_block *sb;
+	struct list_head list;
 };
 
-static struct kmem_cache *hashtable_entry_cachep;
+struct packagelist_data {
+	DECLARE_HASHTABLE(package_to_appid,8);
+	struct mutex hashtable_lock;
 
-/* Path to system-provided mapping of package name to appIds */
-static const char* const kpackageslist_file = "/data/system/packages.list";
-/* Supplementary groups to execute with */
-static const gid_t kgroups[1] = { AID_PACKAGE_INFO };
+};
+
+static struct packagelist_data *pkgl_data_all;
+
+static struct kmem_cache *hashtable_entry_cachep;
 
 static unsigned int str_hash(const char *key) {
 	int i;
@@ -66,62 +64,29 @@ static unsigned int str_hash(const char *key) {
 	return h;
 }
 
-static int contain_appid_key(struct packagelist_data *pkgl_dat, unsigned int appid) {
-	struct hashtable_entry *hash_cur;
-
-	hash_for_each_possible(pkgl_dat->appid_with_rw, hash_cur, hlist, appid)
-		if ((void *)(uintptr_t)appid == hash_cur->key)
-			return 1;
-
-	return 0;
-}
-
-/* Return if the calling UID holds sdcard_rw. */
-int get_caller_has_rw_locked(void *pkgl_id, derive_t derive) {
-	struct packagelist_data *pkgl_dat = (struct packagelist_data *)pkgl_id;
-	appid_t appid;
-	int ret;
-
-	/* No additional permissions enforcement */
-	if (derive == DERIVE_NONE) {
-		return 1;
-	}
-
-	appid = multiuser_get_app_id(from_kuid(&init_user_ns, current_fsuid()));
-	mutex_lock(&pkgl_dat->hashtable_lock);
-	ret = contain_appid_key(pkgl_dat, appid);
-	mutex_unlock(&pkgl_dat->hashtable_lock);
-	return ret;
-}
-
 appid_t get_appid(void *pkgl_id, const char *app_name)
 {
-	struct packagelist_data *pkgl_dat = (struct packagelist_data *)pkgl_id;
+	struct packagelist_data *pkgl_dat = pkgl_data_all;
 	struct hashtable_entry *hash_cur;
 	unsigned int hash = str_hash(app_name);
 	appid_t ret_id;
 
-	//printk(KERN_INFO "sdcardfs: %s: %s, %u\n", __func__, (char *)app_name, hash);
 	mutex_lock(&pkgl_dat->hashtable_lock);
 	hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash) {
-		//printk(KERN_INFO "sdcardfs: %s: %s\n", __func__, (char *)hash_cur->key);
 		if (!strcasecmp(app_name, hash_cur->key)) {
 			ret_id = (appid_t)hash_cur->value;
 			mutex_unlock(&pkgl_dat->hashtable_lock);
-			//printk(KERN_INFO "=> app_id: %d\n", (int)ret_id);
 			return ret_id;
 		}
 	}
 	mutex_unlock(&pkgl_dat->hashtable_lock);
-	//printk(KERN_INFO "=> app_id: %d\n", 0);
 	return 0;
 }
 
 /* Kernel has already enforced everything we returned through
  * derive_permissions_locked(), so this is used to lock down access
  * even further, such as enforcing that apps hold sdcard_rw. */
-int check_caller_access_to_name(struct inode *parent_node, const char* name,
-					derive_t derive, int w_ok, int has_rw) {
+int check_caller_access_to_name(struct inode *parent_node, const char* name) {
 
 	/* Always block security-sensitive files at root */
 	if (parent_node && SDCARDFS_I(parent_node)->perm == PERM_ROOT) {
@@ -132,28 +97,12 @@ int check_caller_access_to_name(struct inode *parent_node, const char* name,
 		}
 	}
 
-	/* No additional permissions enforcement */
-	if (derive == DERIVE_NONE) {
-		return 1;
-	}
-
 	/* Root always has access; access for any other UIDs should always
 	 * be controlled through packages.list. */
 	if (from_kuid(&init_user_ns, current_fsuid()) == 0) {
 		return 1;
 	}
 
-	/* If asking to write, verify that caller either owns the
-	 * parent or holds sdcard_rw. */
-	if (w_ok) {
-		if (parent_node &&
-			(from_kuid(&init_user_ns, current_fsuid()) ==
-			 SDCARDFS_I(parent_node)->d_uid)) {
-			return 1;
-		}
-		return has_rw;
-	}
-
 	/* No extra permissions to enforce */
 	return 1;
 }
@@ -171,14 +120,13 @@ int open_flags_to_access_mode(int open_flags) {
 	}
 }
 
-static int insert_str_to_int(struct packagelist_data *pkgl_dat, char *key,
+static int insert_str_to_int_lock(struct packagelist_data *pkgl_dat, char *key,
 		unsigned int value)
 {
 	struct hashtable_entry *hash_cur;
 	struct hashtable_entry *new_entry;
 	unsigned int hash = str_hash(key);
 
-	//printk(KERN_INFO "sdcardfs: %s: %s: %d, %u\n", __func__, (char *)key, value, hash);
 	hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash) {
 		if (!strcasecmp(key, hash_cur->key)) {
 			hash_cur->value = value;
@@ -194,247 +142,277 @@ static int insert_str_to_int(struct packagelist_data *pkgl_dat, char *key,
 	return 0;
 }
 
-static void remove_str_to_int(struct hashtable_entry *h_entry) {
-	//printk(KERN_INFO "sdcardfs: %s: %s: %d\n", __func__, (char *)h_entry->key, h_entry->value);
-	kfree(h_entry->key);
-	kmem_cache_free(hashtable_entry_cachep, h_entry);
+static void fixup_perms(struct super_block *sb) {
+	if (sb && sb->s_magic == SDCARDFS_SUPER_MAGIC) {
+		mutex_lock(&sb->s_root->d_inode->i_mutex);
+		get_derive_permissions_recursive(sb->s_root);
+		mutex_unlock(&sb->s_root->d_inode->i_mutex);
+	}
 }
 
-static int insert_int_to_null(struct packagelist_data *pkgl_dat, unsigned int key,
-		unsigned int value)
-{
-	struct hashtable_entry *hash_cur;
-	struct hashtable_entry *new_entry;
+static int insert_str_to_int(struct packagelist_data *pkgl_dat, char *key,
+		unsigned int value) {
+	int ret;
+	struct sdcardfs_sb_info *sbinfo;
+	mutex_lock(&sdcardfs_super_list_lock);
+	mutex_lock(&pkgl_dat->hashtable_lock);
+	ret = insert_str_to_int_lock(pkgl_dat, key, value);
+	mutex_unlock(&pkgl_dat->hashtable_lock);
 
-	//printk(KERN_INFO "sdcardfs: %s: %d: %d\n", __func__, (int)key, value);
-	hash_for_each_possible(pkgl_dat->appid_with_rw,	hash_cur, hlist, key) {
-		if ((void *)(uintptr_t)key == hash_cur->key) {
-			hash_cur->value = value;
-			return 0;
+	list_for_each_entry(sbinfo, &sdcardfs_super_list, list) {
+		if (sbinfo) {
+			fixup_perms(sbinfo->sb);
 		}
 	}
-	new_entry = kmem_cache_alloc(hashtable_entry_cachep, GFP_KERNEL);
-	if (!new_entry)
-		return -ENOMEM;
-	new_entry->key = (void *)(uintptr_t)key;
-	new_entry->value = value;
-	hash_add(pkgl_dat->appid_with_rw, &new_entry->hlist, key);
-	return 0;
+	mutex_unlock(&sdcardfs_super_list_lock);
+	return ret;
 }
 
-static void remove_int_to_null(struct hashtable_entry *h_entry) {
-	//printk(KERN_INFO "sdcardfs: %s: %d: %d\n", __func__, (int)h_entry->key, h_entry->value);
+static void remove_str_to_int_lock(struct hashtable_entry *h_entry) {
+	kfree(h_entry->key);
+	hash_del(&h_entry->hlist);
 	kmem_cache_free(hashtable_entry_cachep, h_entry);
 }
 
+static void remove_str_to_int(struct packagelist_data *pkgl_dat, const char *key)
+{
+	struct sdcardfs_sb_info *sbinfo;
+	struct hashtable_entry *hash_cur;
+	unsigned int hash = str_hash(key);
+	mutex_lock(&sdcardfs_super_list_lock);
+	mutex_lock(&pkgl_dat->hashtable_lock);
+	hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash) {
+		if (!strcasecmp(key, hash_cur->key)) {
+			remove_str_to_int_lock(hash_cur);
+			break;
+		}
+	}
+	mutex_unlock(&pkgl_dat->hashtable_lock);
+	list_for_each_entry(sbinfo, &sdcardfs_super_list, list) {
+		if (sbinfo) {
+			fixup_perms(sbinfo->sb);
+		}
+	}
+	mutex_unlock(&sdcardfs_super_list_lock);
+	return;
+}
+
 static void remove_all_hashentrys(struct packagelist_data *pkgl_dat)
 {
 	struct hashtable_entry *hash_cur;
 	struct hlist_node *h_t;
 	int i;
-
-	hash_for_each_safe(pkgl_dat->package_to_appid, i, h_t, hash_cur, hlist)
-		remove_str_to_int(hash_cur);
-	hash_for_each_safe(pkgl_dat->appid_with_rw, i, h_t, hash_cur, hlist)
-		remove_int_to_null(hash_cur);
-
-	hash_init(pkgl_dat->package_to_appid);
-	hash_init(pkgl_dat->appid_with_rw);
-}
-
-static int read_package_list(struct packagelist_data *pkgl_dat) {
-	int ret;
-	int fd;
-	int read_amount;
-
-	printk(KERN_INFO "sdcardfs: read_package_list\n");
-
 	mutex_lock(&pkgl_dat->hashtable_lock);
-
-	remove_all_hashentrys(pkgl_dat);
-
-	fd = sys_open(kpackageslist_file, O_RDONLY, 0);
-	if (fd < 0) {
-		printk(KERN_ERR "sdcardfs: failed to open package list\n");
-		mutex_unlock(&pkgl_dat->hashtable_lock);
-		return fd;
-	}
-
-	while ((read_amount = sys_read(fd, pkgl_dat->read_buf,
-					sizeof(pkgl_dat->read_buf))) > 0) {
-		unsigned int appid;
-		char *token;
-		int one_line_len = 0;
-		int additional_read;
-		unsigned long ret_gid;
-
-		while (one_line_len < read_amount) {
-			if (pkgl_dat->read_buf[one_line_len] == '\n') {
-				one_line_len++;
-				break;
-			}
-			one_line_len++;
-		}
-		additional_read = read_amount - one_line_len;
-		if (additional_read > 0)
-			sys_lseek(fd, -additional_read, SEEK_CUR);
-
-		if (sscanf(pkgl_dat->read_buf, "%s %u %*d %*s %*s %s",
-				pkgl_dat->app_name_buf, &appid,
-				pkgl_dat->gids_buf) == 3) {
-			ret = insert_str_to_int(pkgl_dat, pkgl_dat->app_name_buf, appid);
-			if (ret) {
-				sys_close(fd);
-				mutex_unlock(&pkgl_dat->hashtable_lock);
-				return ret;
-			}
-
-			token = strtok_r(pkgl_dat->gids_buf, ",", &pkgl_dat->strtok_last);
-			while (token != NULL) {
-				if (!kstrtoul(token, 10, &ret_gid) &&
-						(ret_gid == pkgl_dat->write_gid)) {
-					ret = insert_int_to_null(pkgl_dat, appid, 1);
-					if (ret) {
-						sys_close(fd);
-						mutex_unlock(&pkgl_dat->hashtable_lock);
-						return ret;
-					}
-					break;
-				}
-				token = strtok_r(NULL, ",", &pkgl_dat->strtok_last);
-			}
-		}
-	}
-
-	sys_close(fd);
+	hash_for_each_safe(pkgl_dat->package_to_appid, i, h_t, hash_cur, hlist)
+		remove_str_to_int_lock(hash_cur);
 	mutex_unlock(&pkgl_dat->hashtable_lock);
-	return 0;
+	hash_init(pkgl_dat->package_to_appid);
 }
 
-static int packagelist_reader(void *thread_data)
-{
-	struct packagelist_data *pkgl_dat = (struct packagelist_data *)thread_data;
-	struct inotify_event *event;
-	bool active = false;
-	int event_pos;
-	int event_size;
-	int res = 0;
-	int nfd;
-
-	allow_signal(SIGINT);
-
-	nfd = sys_inotify_init();
-	if (nfd < 0) {
-		printk(KERN_ERR "sdcardfs: inotify_init failed: %d\n", nfd);
-		return nfd;
-	}
-
-	while (!kthread_should_stop()) {
-		if (signal_pending(current)) {
-			ssleep(1);
-			continue;
-		}
-
-		if (!active) {
-			res = sys_inotify_add_watch(nfd, kpackageslist_file, IN_DELETE_SELF);
-			if (res < 0) {
-				if (res == -ENOENT || res == -EACCES) {
-				/* Framework may not have created yet, sleep and retry */
-					printk(KERN_ERR "sdcardfs: missing packages.list; retrying\n");
-					ssleep(2);
-					printk(KERN_ERR "sdcardfs: missing packages.list_end; retrying\n");
-					continue;
-				} else {
-					printk(KERN_ERR "sdcardfs: inotify_add_watch failed: %d\n", res);
-					goto interruptable_sleep;
-				}
-			}
-			/* Watch above will tell us about any future changes, so
-			 * read the current state. */
-			res = read_package_list(pkgl_dat);
-			if (res) {
-				printk(KERN_ERR "sdcardfs: read_package_list failed: %d\n", res);
-				goto interruptable_sleep;
-			}
-			active = true;
-		}
-
-		event_pos = 0;
-		res = sys_read(nfd, pkgl_dat->event_buf, sizeof(pkgl_dat->event_buf));
-		if (res < (int) sizeof(*event)) {
-			if (res == -EINTR)
-				continue;
-			printk(KERN_ERR "sdcardfs: failed to read inotify event: %d\n", res);
-			goto interruptable_sleep;
-		}
-
-		while (res >= (int) sizeof(*event)) {
-			event = (struct inotify_event *) (pkgl_dat->event_buf + event_pos);
-
-			printk(KERN_INFO "sdcardfs: inotify event: %08x\n", event->mask);
-			if ((event->mask & IN_IGNORED) == IN_IGNORED) {
-				/* Previously watched file was deleted, probably due to move
-				 * that swapped in new data; re-arm the watch and read. */
-				active = false;
-			}
-
-			event_size = sizeof(*event) + event->len;
-			res -= event_size;
-			event_pos += event_size;
-		}
-		continue;
-
-interruptable_sleep:
-		set_current_state(TASK_INTERRUPTIBLE);
-		schedule();
-	}
-	flush_signals(current);
-	sys_close(nfd);
-	return res;
-}
-
-void * packagelist_create(gid_t write_gid)
+static struct packagelist_data * packagelist_create(void)
 {
 	struct packagelist_data *pkgl_dat;
-        struct task_struct *packagelist_thread;
 
 	pkgl_dat = kmalloc(sizeof(*pkgl_dat), GFP_KERNEL | __GFP_ZERO);
 	if (!pkgl_dat) {
-                printk(KERN_ERR "sdcardfs: creating kthread failed\n");
+                printk(KERN_ERR "sdcardfs: Failed to create hash\n");
 		return ERR_PTR(-ENOMEM);
 	}
 
 	mutex_init(&pkgl_dat->hashtable_lock);
 	hash_init(pkgl_dat->package_to_appid);
-	hash_init(pkgl_dat->appid_with_rw);
-	pkgl_dat->write_gid = write_gid;
 
-        packagelist_thread = kthread_run(packagelist_reader, (void *)pkgl_dat, "pkgld");
-        if (IS_ERR(packagelist_thread)) {
-                printk(KERN_ERR "sdcardfs: creating kthread failed\n");
-		kfree(pkgl_dat);
-		return packagelist_thread;
-        }
-	pkgl_dat->thread_id = packagelist_thread;
-
-	printk(KERN_INFO "sdcardfs: created packagelist pkgld/%d\n",
-				(int)pkgl_dat->thread_id->pid);
-
-	return (void *)pkgl_dat;
+	return pkgl_dat;
 }
 
-void packagelist_destroy(void *pkgl_id)
+static void packagelist_destroy(struct packagelist_data *pkgl_dat)
 {
-	struct packagelist_data *pkgl_dat = (struct packagelist_data *)pkgl_id;
-	pid_t pkgl_pid = pkgl_dat->thread_id->pid;
-
-	force_sig_info(SIGINT, SEND_SIG_PRIV, pkgl_dat->thread_id);
-	kthread_stop(pkgl_dat->thread_id);
 	remove_all_hashentrys(pkgl_dat);
-	printk(KERN_INFO "sdcardfs: destroyed packagelist pkgld/%d\n", (int)pkgl_pid);
+	printk(KERN_INFO "sdcardfs: destroyed packagelist pkgld\n");
 	kfree(pkgl_dat);
 }
 
+struct package_appid {
+	struct config_item item;
+	int add_pid;
+};
+
+static inline struct package_appid *to_package_appid(struct config_item *item)
+{
+	return item ? container_of(item, struct package_appid, item) : NULL;
+}
+
+static ssize_t package_appid_attr_show(struct config_item *item,
+				      char *page)
+{
+	ssize_t count;
+	count = sprintf(page, "%d\n", get_appid(pkgl_data_all, item->ci_name));
+	return count;
+}
+
+static ssize_t package_appid_attr_store(struct config_item *item,
+				       const char *page, size_t count)
+{
+	struct package_appid *package_appid = to_package_appid(item);
+	unsigned long tmp;
+	char *p = (char *) page;
+	int ret;
+
+	tmp = simple_strtoul(p, &p, 10);
+	if (!p || (*p && (*p != '\n')))
+		return -EINVAL;
+
+	if (tmp > INT_MAX)
+		return -ERANGE;
+	ret = insert_str_to_int(pkgl_data_all, item->ci_name, (unsigned int)tmp);
+	package_appid->add_pid = tmp;
+	if (ret)
+		return ret;
+
+	return count;
+}
+
+static struct configfs_attribute package_appid_attr_add_pid = {
+	.ca_owner = THIS_MODULE,
+	.ca_name = "appid",
+	.ca_mode = S_IRUGO | S_IWUGO,
+	.show = package_appid_attr_show,
+	.store = package_appid_attr_store,
+};
+
+static struct configfs_attribute *package_appid_attrs[] = {
+	&package_appid_attr_add_pid,
+	NULL,
+};
+
+static void package_appid_release(struct config_item *item)
+{
+	printk(KERN_INFO "sdcardfs: removing %s\n", item->ci_dentry->d_name.name);
+	/* item->ci_name is freed already, so we rely on the dentry */
+	remove_str_to_int(pkgl_data_all, item->ci_dentry->d_name.name);
+	kfree(to_package_appid(item));
+}
+
+static struct configfs_item_operations package_appid_item_ops = {
+	.release		= package_appid_release,
+};
+
+static struct config_item_type package_appid_type = {
+	.ct_item_ops	= &package_appid_item_ops,
+	.ct_attrs	= package_appid_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+
+struct sdcardfs_packages {
+	struct config_group group;
+};
+
+static inline struct sdcardfs_packages *to_sdcardfs_packages(struct config_item *item)
+{
+	return item ? container_of(to_config_group(item), struct sdcardfs_packages, group) : NULL;
+}
+
+static struct config_item *sdcardfs_packages_make_item(struct config_group *group, const char *name)
+{
+	struct package_appid *package_appid;
+
+	package_appid = kzalloc(sizeof(struct package_appid), GFP_KERNEL);
+	if (!package_appid)
+		return ERR_PTR(-ENOMEM);
+
+	config_item_init_type_name(&package_appid->item, name,
+				   &package_appid_type);
+
+	package_appid->add_pid = 0;
+
+	return &package_appid->item;
+}
+
+static ssize_t packages_attr_show(struct config_item *item,
+					 char *page)
+{
+	struct hashtable_entry *hash_cur;
+	struct hlist_node *h_t;
+	int i;
+	int count = 0;
+	mutex_lock(&pkgl_data_all->hashtable_lock);
+	hash_for_each_safe(pkgl_data_all->package_to_appid, i, h_t, hash_cur, hlist)
+		count += snprintf(page + count, PAGE_SIZE - count, "%s %d\n", (char *)hash_cur->key, hash_cur->value);
+	mutex_unlock(&pkgl_data_all->hashtable_lock);
+
+
+	return count;
+}
+
+static struct configfs_attribute sdcardfs_packages_attr_description = {
+	.ca_owner = THIS_MODULE,
+	.ca_name = "packages_gid.list",
+	.ca_mode = S_IRUGO,
+	.show = packages_attr_show,
+};
+
+static struct configfs_attribute *sdcardfs_packages_attrs[] = {
+	&sdcardfs_packages_attr_description,
+	NULL,
+};
+
+static void sdcardfs_packages_release(struct config_item *item)
+{
+
+	printk(KERN_INFO "sdcardfs: destroyed something?\n");
+	kfree(to_sdcardfs_packages(item));
+}
+
+static struct configfs_item_operations sdcardfs_packages_item_ops = {
+	.release	= sdcardfs_packages_release,
+};
+
+/*
+ * Note that, since no extra work is required on ->drop_item(),
+ * no ->drop_item() is provided.
+ */
+static struct configfs_group_operations sdcardfs_packages_group_ops = {
+	.make_item	= sdcardfs_packages_make_item,
+};
+
+static struct config_item_type sdcardfs_packages_type = {
+	.ct_item_ops	= &sdcardfs_packages_item_ops,
+	.ct_group_ops	= &sdcardfs_packages_group_ops,
+	.ct_attrs	= sdcardfs_packages_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct configfs_subsystem sdcardfs_packages_subsys = {
+	.su_group = {
+		.cg_item = {
+			.ci_namebuf = "sdcardfs",
+			.ci_type = &sdcardfs_packages_type,
+		},
+	},
+};
+
+static int __init configfs_sdcardfs_init(void)
+{
+	int ret;
+	struct configfs_subsystem *subsys = &sdcardfs_packages_subsys;
+
+	config_group_init(&subsys->su_group);
+	mutex_init(&subsys->su_mutex);
+	ret = configfs_register_subsystem(subsys);
+	if (ret) {
+		printk(KERN_ERR "Error %d while registering subsystem %s\n",
+		       ret,
+		       subsys->su_group.cg_item.ci_namebuf);
+	}
+	return ret;
+}
+
+static void __exit configfs_sdcardfs_exit(void)
+{
+	configfs_unregister_subsystem(&sdcardfs_packages_subsys);
+}
+
 int packagelist_init(void)
 {
 	hashtable_entry_cachep =
@@ -445,13 +423,15 @@ int packagelist_init(void)
 		return -ENOMEM;
 	}
 
+	pkgl_data_all = packagelist_create();
+	configfs_sdcardfs_init();
         return 0;
 }
 
 void packagelist_exit(void)
 {
+	configfs_sdcardfs_exit();
+	packagelist_destroy(pkgl_data_all);
 	if (hashtable_entry_cachep)
 		kmem_cache_destroy(hashtable_entry_cachep);
 }
-
-
diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h
index 51f6c7912584..1b85f4e70324 100644
--- a/fs/sdcardfs/sdcardfs.h
+++ b/fs/sdcardfs/sdcardfs.h
@@ -42,6 +42,7 @@
 #include <linux/types.h>
 #include <linux/security.h>
 #include <linux/string.h>
+#include <linux/list.h>
 #include "multiuser.h"
 
 /* the file system name */
@@ -70,10 +71,11 @@
 #define fix_derived_permission(x)	\
 	do {						\
 		(x)->i_uid = make_kuid(&init_user_ns, SDCARDFS_I(x)->d_uid);	\
-		(x)->i_gid = make_kgid(&init_user_ns, SDCARDFS_I(x)->d_gid);	\
-		(x)->i_mode = ((x)->i_mode & S_IFMT) | SDCARDFS_I(x)->d_mode;\
+		(x)->i_gid = make_kgid(&init_user_ns, get_gid(SDCARDFS_I(x)));	\
+		(x)->i_mode = ((x)->i_mode & S_IFMT) | get_mode(SDCARDFS_I(x));\
 	} while (0)
 
+
 /* OVERRIDE_CRED() and REVERT_CRED()
  * 	OVERRID_CRED()
  * 		backup original task->cred
@@ -99,35 +101,28 @@
 		(int)current->cred->fsuid, 		\
 		(int)current->cred->fsgid);
 
-/* Android 4.4 support */
+/* Android 5.0 support */
 
 /* Permission mode for a specific node. Controls how file permissions
  * are derived for children nodes. */
 typedef enum {
-	/* Nothing special; this node should just inherit from its parent. */
-	PERM_INHERIT,
-	/* This node is one level above a normal root; used for legacy layouts
-	 * which use the first level to represent user_id. */
-	PERM_LEGACY_PRE_ROOT,
-	/* This node is "/" */
-	PERM_ROOT,
-	/* This node is "/Android" */
-	PERM_ANDROID,
-	/* This node is "/Android/data" */
-	PERM_ANDROID_DATA,
-	/* This node is "/Android/obb" */
-	PERM_ANDROID_OBB,
-	/* This node is "/Android/user" */
-	PERM_ANDROID_USER,
+    /* Nothing special; this node should just inherit from its parent. */
+    PERM_INHERIT,
+    /* This node is one level above a normal root; used for legacy layouts
+     * which use the first level to represent user_id. */
+    PERM_PRE_ROOT,
+    /* This node is "/" */
+    PERM_ROOT,
+    /* This node is "/Android" */
+    PERM_ANDROID,
+    /* This node is "/Android/data" */
+    PERM_ANDROID_DATA,
+    /* This node is "/Android/obb" */
+    PERM_ANDROID_OBB,
+    /* This node is "/Android/media" */
+    PERM_ANDROID_MEDIA,
 } perm_t;
 
-/* Permissions structure to derive */
-typedef enum {
-	DERIVE_NONE,
-	DERIVE_LEGACY,
-	DERIVE_UNIFIED,
-} derive_t;
-
 typedef enum {
 	LOWER_FS_EXT4,
 	LOWER_FS_FAT,
@@ -161,9 +156,9 @@ extern void free_dentry_private_data(struct dentry *dentry);
 extern struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry,
 				unsigned int flags);
 extern struct inode *sdcardfs_iget(struct super_block *sb,
-				 struct inode *lower_inode);
+				 struct inode *lower_inode, userid_t id);
 extern int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb,
-			    struct path *lower_path);
+			    struct path *lower_path, userid_t id);
 
 /* file private data */
 struct sdcardfs_file_info {
@@ -174,18 +169,16 @@ struct sdcardfs_file_info {
 /* sdcardfs inode data in memory */
 struct sdcardfs_inode_info {
 	struct inode *lower_inode;
-	/* state derived based on current position in hierachy
-	 * caution: d_mode does not include file types
-	 */
+	/* state derived based on current position in hierachy */
 	perm_t perm;
 	userid_t userid;
 	uid_t d_uid;
-	gid_t d_gid;
-	mode_t d_mode;
+	bool under_android;
 
 	struct inode vfs_inode;
 };
 
+
 /* sdcardfs dentry data in memory */
 struct sdcardfs_dentry_info {
 	spinlock_t lock;	/* protects lower_path */
@@ -196,15 +189,17 @@ struct sdcardfs_dentry_info {
 struct sdcardfs_mount_options {
 	uid_t fs_low_uid;
 	gid_t fs_low_gid;
-	gid_t write_gid;
-	int split_perms;
-	derive_t derive;
+	userid_t fs_user_id;
+	gid_t gid;
 	lower_fs_t lower_fs;
+	mode_t mask;
+	bool multiuser;
 	unsigned int reserved_mb;
 };
 
 /* sdcardfs super-block data in memory */
 struct sdcardfs_sb_info {
+	struct super_block *sb;
 	struct super_block *lower_sb;
 	/* derived perm policy : some of options have been added
 	 * to sdcardfs_mount_options (Android 4.4 support) */
@@ -213,6 +208,7 @@ struct sdcardfs_sb_info {
 	char *obbpath_s;
 	struct path obbpath;
 	void *pkgl_id;
+	struct list_head list;
 };
 
 /*
@@ -331,6 +327,44 @@ static inline void sdcardfs_put_reset_##pname(const struct dentry *dent) \
 SDCARDFS_DENT_FUNC(lower_path)
 SDCARDFS_DENT_FUNC(orig_path)
 
+static inline int get_gid(struct sdcardfs_inode_info *info) {
+	struct sdcardfs_sb_info *sb_info = SDCARDFS_SB(info->vfs_inode.i_sb);
+	if (sb_info->options.gid == AID_SDCARD_RW) {
+		/* As an optimization, certain trusted system components only run
+		 * as owner but operate across all users. Since we're now handing
+		 * out the sdcard_rw GID only to trusted apps, we're okay relaxing
+		 * the user boundary enforcement for the default view. The UIDs
+		 * assigned to app directories are still multiuser aware. */
+		return AID_SDCARD_RW;
+	} else {
+		return multiuser_get_uid(info->userid, sb_info->options.gid);
+	}
+}
+static inline int get_mode(struct sdcardfs_inode_info *info) {
+	int owner_mode;
+	int filtered_mode;
+	struct sdcardfs_sb_info *sb_info = SDCARDFS_SB(info->vfs_inode.i_sb);
+	int visible_mode = 0775 & ~sb_info->options.mask;
+
+	if (info->perm == PERM_PRE_ROOT) {
+		/* Top of multi-user view should always be visible to ensure
+		* secondary users can traverse inside. */
+		visible_mode = 0711;
+	} else if (info->under_android) {
+		/* Block "other" access to Android directories, since only apps
+		* belonging to a specific user should be in there; we still
+		* leave +x open for the default view. */
+		if (sb_info->options.gid == AID_SDCARD_RW) {
+			visible_mode = visible_mode & ~0006;
+		} else {
+			visible_mode = visible_mode & ~0007;
+		}
+	}
+	owner_mode = info->lower_inode->i_mode & 0700;
+	filtered_mode = visible_mode & (owner_mode | (owner_mode >> 3) | (owner_mode >> 6));
+	return filtered_mode;
+}
+
 static inline int has_graft_path(const struct dentry *dent)
 {
 	int ret = 0;
@@ -364,22 +398,24 @@ static inline void sdcardfs_put_real_lower(const struct dentry *dent,
 		sdcardfs_put_lower_path(dent, real_lower);
 }
 
+extern struct mutex sdcardfs_super_list_lock;
+extern struct list_head sdcardfs_super_list;
+
 /* for packagelist.c */
-extern int get_caller_has_rw_locked(void *pkgl_id, derive_t derive);
 extern appid_t get_appid(void *pkgl_id, const char *app_name);
-extern int check_caller_access_to_name(struct inode *parent_node, const char* name,
-                                        derive_t derive, int w_ok, int has_rw);
+extern int check_caller_access_to_name(struct inode *parent_node, const char* name);
 extern int open_flags_to_access_mode(int open_flags);
-extern void * packagelist_create(gid_t write_gid);
-extern void packagelist_destroy(void *pkgl_id);
 extern int packagelist_init(void);
 extern void packagelist_exit(void);
 
 /* for derived_perm.c */
 extern void setup_derived_state(struct inode *inode, perm_t perm,
-			userid_t userid, uid_t uid, gid_t gid, mode_t mode);
+			userid_t userid, uid_t uid, bool under_android);
 extern void get_derived_permission(struct dentry *parent, struct dentry *dentry);
-extern void update_derived_permission(struct dentry *dentry);
+extern void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, struct dentry *newdentry);
+extern void get_derive_permissions_recursive(struct dentry *parent);
+
+extern void update_derived_permission_lock(struct dentry *dentry);
 extern int need_graft_path(struct dentry *dentry);
 extern int is_base_obbpath(struct dentry *dentry);
 extern int is_obbpath_invalid(struct dentry *dentry);
@@ -483,4 +519,18 @@ static inline int check_min_free_space(struct dentry *dentry, size_t size, int d
 		return 1;
 }
 
+/* Copies attrs and maintains sdcardfs managed attrs */
+static inline void sdcardfs_copy_and_fix_attrs(struct inode *dest, const struct inode *src)
+{
+	dest->i_mode = (src->i_mode  & S_IFMT) | get_mode(SDCARDFS_I(dest));
+	dest->i_uid = make_kuid(&init_user_ns, SDCARDFS_I(dest)->d_uid);
+	dest->i_gid = make_kgid(&init_user_ns, get_gid(SDCARDFS_I(dest)));
+	dest->i_rdev = src->i_rdev;
+	dest->i_atime = src->i_atime;
+	dest->i_mtime = src->i_mtime;
+	dest->i_ctime = src->i_ctime;
+	dest->i_blkbits = src->i_blkbits;
+	dest->i_flags = src->i_flags;
+	set_nlink(dest, src->i_nlink);
+}
 #endif	/* not _SDCARDFS_H_ */
diff --git a/fs/sdcardfs/strtok.h b/fs/sdcardfs/strtok.h
deleted file mode 100644
index 50ab25aa0bc4..000000000000
--- a/fs/sdcardfs/strtok.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * fs/sdcardfs/strtok.h
- *
- * Copyright (c) 2013 Samsung Electronics Co. Ltd
- *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
- *               Sunghwan Yun, Sungjong Seo
- *
- * This program has been developed as a stackable file system based on
- * the WrapFS which written by
- *
- * Copyright (c) 1998-2011 Erez Zadok
- * Copyright (c) 2009     Shrikar Archak
- * Copyright (c) 2003-2011 Stony Brook University
- * Copyright (c) 2003-2011 The Research Foundation of SUNY
- *
- * This file is dual licensed.  It may be redistributed and/or modified
- * under the terms of the Apache 2.0 License OR version 2 of the GNU
- * General Public License.
- */
-
-static char *
-strtok_r(char *s, const char *delim, char **last)
-{
-        char *spanp;
-        int c, sc;
-        char *tok;
-
-
-        /* if (s == NULL && (s = *last) == NULL)
-                return NULL;     */
-        if (s == NULL) {
-                s = *last;
-                if (s == NULL)
-                        return NULL;
-        }
-
-        /*
-         * Skip (span) leading delimiters (s += strspn(s, delim), sort of).
-         */
-cont:
-        c = *s++;
-        for (spanp = (char *)delim; (sc = *spanp++) != 0;) {
-                if (c == sc)
-                        goto cont;
-        }
-
-        if (c == 0) {           /* no non-delimiter characters */
-                *last = NULL;
-                return NULL;
-        }
-        tok = s - 1;
-
-        /*
-         * Scan token (scan for delimiters: s += strcspn(s, delim), sort of).
-         * Note that delim must have one NUL; we stop if we see that, too.
-         */
-        for (;;) {
-                c = *s++;
-                spanp = (char *)delim;
-                do {
-                        sc = *spanp++;
-                        if (sc == c) {
-                                if (c == 0)
-                                        s = NULL;
-                                else
-                                        s[-1] = 0;
-                                *last = s;
-                                return tok;
-                        }
-                } while (sc != 0);
-        }
-
-        /* NOTREACHED */
-}
-
diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c
index f153ce1b8cf3..1d6490128c99 100644
--- a/fs/sdcardfs/super.c
+++ b/fs/sdcardfs/super.c
@@ -46,9 +46,6 @@ static void sdcardfs_put_super(struct super_block *sb)
 	sdcardfs_set_lower_super(sb, NULL);
 	atomic_dec(&s->s_active);
 
-	if(spd->pkgl_id)
-		packagelist_destroy(spd->pkgl_id);
-
 	kfree(spd);
 	sb->s_fs_info = NULL;
 }
@@ -203,12 +200,8 @@ static int sdcardfs_show_options(struct seq_file *m, struct dentry *root)
 	if (opts->fs_low_gid != 0)
 		seq_printf(m, ",gid=%u", opts->fs_low_gid);
 
-	if (opts->derive == DERIVE_NONE)
-		seq_printf(m, ",derive=none");
-	else if (opts->derive == DERIVE_LEGACY)
-		seq_printf(m, ",derive=legacy");
-	else if (opts->derive == DERIVE_UNIFIED)
-		seq_printf(m, ",derive=unified");
+	if (opts->multiuser)
+		seq_printf(m, ",multiuser");
 
 	if (opts->reserved_mb != 0)
 		seq_printf(m, ",reserved=%uMB", opts->reserved_mb);

From 735236039d54a5a426e774ee275f9cb2585f30dc Mon Sep 17 00:00:00 2001
From: Daniel Rosenberg <drosen@google.com>
Date: Thu, 11 Feb 2016 16:44:15 -0800
Subject: [PATCH 041/797] vfs: add d_canonical_path for stacked filesystem
 support

Inotify does not currently know when a filesystem
is acting as a wrapper around another fs. This means
that inotify watchers will miss any modifications to
the base file, as well as any made in a separate
stacked fs that points to the same file.
d_canonical_path solves this problem by allowing the fs
to map a dentry to a path in the lower fs. Inotify
can use it to find the appropriate place to watch to
be informed of all changes to a file.

Change-Id: I09563baffad1711a045e45c1bd0bd8713c2cc0b6
Signed-off-by: Daniel Rosenberg <drosen@google.com>
---
 fs/notify/inotify/inotify_user.c | 17 ++++++++++++++---
 include/linux/dcache.h           |  1 +
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index b8d08d0d0a4d..9a0cd02cf3dc 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -702,6 +702,8 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
 	struct fsnotify_group *group;
 	struct inode *inode;
 	struct path path;
+	struct path alteredpath;
+	struct path *canonical_path = &path;
 	struct fd f;
 	int ret;
 	unsigned flags = 0;
@@ -741,13 +743,22 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
 	if (ret)
 		goto fput_and_out;
 
+	/* support stacked filesystems */
+	if(path.dentry && path.dentry->d_op) {
+		if (path.dentry->d_op->d_canonical_path) {
+			path.dentry->d_op->d_canonical_path(path.dentry, &alteredpath);
+			canonical_path = &alteredpath;
+			path_put(&path);
+		}
+	}
+
 	/* inode held in place by reference to path; group by fget on fd */
-	inode = path.dentry->d_inode;
+	inode = canonical_path->dentry->d_inode;
 	group = f.file->private_data;
 
 	/* create/update an inode mark */
 	ret = inotify_update_watch(group, inode, mask);
-	path_put(&path);
+	path_put(canonical_path);
 fput_and_out:
 	fdput(f);
 	return ret;
@@ -814,7 +825,7 @@ static int __init inotify_user_setup(void)
 	BUILD_BUG_ON(IN_ISDIR != FS_ISDIR);
 	BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
 
-	BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21);
+	BUG_ON(hweight32(ALL_INOTIFY_BITS) != 22);
 
 	inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC);
 
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 8a2e009c8a5a..e7041ec48af8 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -161,6 +161,7 @@ struct dentry_operations {
 	struct vfsmount *(*d_automount)(struct path *);
 	int (*d_manage)(struct dentry *, bool);
 	struct inode *(*d_select_inode)(struct dentry *, unsigned);
+	void (*d_canonical_path)(const struct dentry *, struct path *);
 } ____cacheline_aligned;
 
 /*

From 815899d23490b00d21bdc4e1d4c9bfff41670270 Mon Sep 17 00:00:00 2001
From: Daniel Rosenberg <drosen@google.com>
Date: Thu, 11 Feb 2016 16:53:36 -0800
Subject: [PATCH 042/797] sdcardfs: Add support for d_canonical_path

Change-Id: I5d6f0e71b8ca99aec4b0894412f1dfd1cfe12add
Signed-off-by: Daniel Rosenberg <drosen@google.com>
---
 fs/sdcardfs/dentry.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/sdcardfs/dentry.c b/fs/sdcardfs/dentry.c
index dbbcfd091fc7..ba165ef11e27 100644
--- a/fs/sdcardfs/dentry.c
+++ b/fs/sdcardfs/dentry.c
@@ -177,5 +177,6 @@ const struct dentry_operations sdcardfs_ci_dops = {
 	.d_release	= sdcardfs_d_release,
 	.d_hash 	= sdcardfs_hash_ci,
 	.d_compare	= sdcardfs_cmp_ci,
+	.d_canonical_path = sdcardfs_get_real_lower,
 };
 

From 7f2bcf32a33b8b4e91a0cbf3b3b735fa8e561458 Mon Sep 17 00:00:00 2001
From: Thierry Strudel <tstrudel@google.com>
Date: Wed, 23 Mar 2016 10:02:15 -0700
Subject: [PATCH 043/797] trace: cpufreq: fix typo in min/max cpufreq

Change-Id: Ieed402d3a912b7a318826e101efe2c24b07ebfe4
Signed-off-by: Thierry Strudel <tstrudel@google.com>
---
 include/trace/events/power.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index de77035567c4..9af0d898016a 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -135,7 +135,7 @@ TRACE_EVENT(cpu_frequency_limits,
 
 	TP_fast_assign(
 		__entry->min_freq = min_freq;
-		__entry->max_freq = min_freq;
+		__entry->max_freq = max_freq;
 		__entry->cpu_id = cpu_id;
 	),
 

From 317b853517bb19ee58894f211d962267755a5f0d Mon Sep 17 00:00:00 2001
From: Guenter Roeck <groeck@chromium.org>
Date: Wed, 23 Mar 2016 08:32:23 -0700
Subject: [PATCH 044/797] fs: sdcardfs: Declare LOOKUP_CASE_INSENSITIVE
 unconditionally

Attempts to build sdcardfs as module fail with

fs/sdcardfs/lookup.c: In function '__sdcardfs_lookup':
fs/sdcardfs/lookup.c:243:5: error: 'LOOKUP_CASE_INSENSITIVE' undeclared

This occurs because the define is enclosed with #ifdef
CONFIG_SDCARD_FS_CI_SEARCH. If SDCARD_FS_CI_SEARCH is configured to be
built as module, this does not work. Alternatives would be to use #if
IS_ENABLED(CONFIG_SDCARD_FS_CI_SEARCH), or to declare SDCARD_FS_CI_SEARCH
as bool, but that does not work because the define is used unconditionally
in the source.

Note that LOOKUP_CASE_INSENSITIVE is only set but not evaluated in the
current source code, so setting the flag has no real effect.

Fixes: 84a1b7d3d312 ("Included sdcardfs source code for kernel 3.0")
Cc: Daniel Rosenberg <drosen@google.com>
Signed-off-by: Guenter Roeck <groeck@chromium.org>
---
 include/linux/namei.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/linux/namei.h b/include/linux/namei.h
index f2b8acbdb928..47b53673ec61 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -43,9 +43,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
 #define LOOKUP_JUMPED		0x1000
 #define LOOKUP_ROOT		0x2000
 #define LOOKUP_EMPTY		0x4000
-#ifdef CONFIG_SDCARD_FS_CI_SEARCH
 #define LOOKUP_CASE_INSENSITIVE 0x8000
-#endif
 
 extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, int *empty);
 

From ce15174caf1ce927a9255b23438977411082d20d Mon Sep 17 00:00:00 2001
From: Daniel Rosenberg <drosen@google.com>
Date: Wed, 23 Mar 2016 12:09:25 -0700
Subject: [PATCH 045/797] inotify: Fix erroneous update of bit count

Patch "vfs: add d_canonical_path for stacked filesystem support"
erroneously updated the ALL_INOTIFY_BITS count. This changes it back

Change-Id: Idb04edc736da276159d30f04c40cff9d6b1e070f
---
 fs/notify/inotify/inotify_user.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 9a0cd02cf3dc..f72f3b25b3f2 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -825,7 +825,7 @@ static int __init inotify_user_setup(void)
 	BUILD_BUG_ON(IN_ISDIR != FS_ISDIR);
 	BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
 
-	BUG_ON(hweight32(ALL_INOTIFY_BITS) != 22);
+	BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21);
 
 	inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC);
 

From 1a98a1b1e6af9dbd7a0d4253b13f9d2d83124053 Mon Sep 17 00:00:00 2001
From: Daniel Rosenberg <drosen@google.com>
Date: Wed, 23 Mar 2016 16:39:30 -0700
Subject: [PATCH 046/797] sdcardfs: remove effectless config option

CONFIG_SDCARD_FS_CI_SEARCH only guards a define for
LOOKUP_CASE_INSENSITIVE, which is never used in the
kernel. Remove both, along with the option matching
that supports it.

Change-Id: I363a8f31de8ee7a7a934d75300cc9ba8176e2edf
Signed-off-by: Daniel Rosenberg <drosen@google.com>
---
 fs/sdcardfs/Kconfig    |  5 -----
 fs/sdcardfs/lookup.c   |  7 +------
 fs/sdcardfs/main.c     | 15 ---------------
 fs/sdcardfs/sdcardfs.h |  6 ------
 include/linux/namei.h  |  1 -
 5 files changed, 1 insertion(+), 33 deletions(-)

diff --git a/fs/sdcardfs/Kconfig b/fs/sdcardfs/Kconfig
index ab25f88ebb37..a1c103316ac7 100644
--- a/fs/sdcardfs/Kconfig
+++ b/fs/sdcardfs/Kconfig
@@ -11,8 +11,3 @@ config SDCARD_FS_FADV_NOACTIVE
 	default y
 	help
 	  Sdcardfs supports fadvise noactive mode.
-
-config SDCARD_FS_CI_SEARCH
-	tristate "sdcardfs case-insensitive search support"
-	depends on SDCARD_FS
-	default y
diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c
index f80abcb6b467..a01b06a514fd 100644
--- a/fs/sdcardfs/lookup.c
+++ b/fs/sdcardfs/lookup.c
@@ -238,13 +238,8 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry,
 	lower_dir_mnt = lower_parent_path->mnt;
 
 	/* Use vfs_path_lookup to check if the dentry exists or not */
-	if (sbi->options.lower_fs == LOWER_FS_EXT4) {
-		err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name,
-				LOOKUP_CASE_INSENSITIVE, &lower_path);
-	} else if (sbi->options.lower_fs == LOWER_FS_FAT) {
-		err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name, 0,
+	err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name, 0,
 				&lower_path);
-	}
 
 	/* no error: handle positive dentries */
 	if (!err) {
diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c
index 80aa355d801e..fa11a0458b84 100644
--- a/fs/sdcardfs/main.c
+++ b/fs/sdcardfs/main.c
@@ -41,7 +41,6 @@ static const match_table_t sdcardfs_tokens = {
 	{Opt_fsgid, "fsgid=%u"},
 	{Opt_gid, "gid=%u"},
 	{Opt_debug, "debug"},
-	{Opt_lower_fs, "lower_fs=%s"},
 	{Opt_mask, "mask=%u"},
 	{Opt_userid, "userid=%d"},
 	{Opt_multiuser, "multiuser"},
@@ -64,8 +63,6 @@ static int parse_options(struct super_block *sb, char *options, int silent,
 	opts->multiuser = false;
 	opts->fs_user_id = 0;
 	opts->gid = 0;
-	/* by default, we use LOWER_FS_EXT4 as lower fs type */
-	opts->lower_fs = LOWER_FS_EXT4;
 	/* by default, 0MB is reserved */
 	opts->reserved_mb = 0;
 
@@ -113,18 +110,6 @@ static int parse_options(struct super_block *sb, char *options, int silent,
 		case Opt_multiuser:
 			opts->multiuser = true;
 			break;
-		case Opt_lower_fs:
-			string_option = match_strdup(&args[0]);
-			if (!strcmp("ext4", string_option)) {
-				opts->lower_fs = LOWER_FS_EXT4;
-			} else if (!strcmp("fat", string_option)) {
-				opts->lower_fs = LOWER_FS_FAT;
-			} else {
-				kfree(string_option);
-				goto invalid_option;
-			}
-			kfree(string_option);
-			break;
 		case Opt_reserved_mb:
 			if (match_int(&args[0], &option))
 				return 0;
diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h
index 1b85f4e70324..f111f898b630 100644
--- a/fs/sdcardfs/sdcardfs.h
+++ b/fs/sdcardfs/sdcardfs.h
@@ -123,11 +123,6 @@ typedef enum {
     PERM_ANDROID_MEDIA,
 } perm_t;
 
-typedef enum {
-	LOWER_FS_EXT4,
-	LOWER_FS_FAT,
-} lower_fs_t;
-
 struct sdcardfs_sb_info;
 struct sdcardfs_mount_options;
 
@@ -191,7 +186,6 @@ struct sdcardfs_mount_options {
 	gid_t fs_low_gid;
 	userid_t fs_user_id;
 	gid_t gid;
-	lower_fs_t lower_fs;
 	mode_t mask;
 	bool multiuser;
 	unsigned int reserved_mb;
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 47b53673ec61..d53c25453aca 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -43,7 +43,6 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
 #define LOOKUP_JUMPED		0x1000
 #define LOOKUP_ROOT		0x2000
 #define LOOKUP_EMPTY		0x4000
-#define LOOKUP_CASE_INSENSITIVE 0x8000
 
 extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, int *empty);
 

From d023b7c5eef07aa8273710ff7625e7a512aa90a8 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <groeck@chromium.org>
Date: Thu, 24 Mar 2016 10:32:35 -0700
Subject: [PATCH 047/797] fs: Export d_absolute_path

The 0-day build bot reports the following build error, seen if SDCARD_FS
is built as module.

ERROR: "d_absolute_path" undefined!

Fixes: 84a1b7d3d312 ("Included sdcardfs source code for kernel 3.0")
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Guenter Roeck <groeck@chromium.org>
---
 fs/dcache.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/dcache.c b/fs/dcache.c
index 877bcbbd03ff..24190e8b7860 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -3017,6 +3017,7 @@ char *d_absolute_path(const struct path *path,
 		return ERR_PTR(error);
 	return res;
 }
+EXPORT_SYMBOL(d_absolute_path);
 
 /*
  * same as __d_path but appends "(deleted)" for unlinked files.

From c4fcf3f1c10ce5cfd7ce57197798035ba0436799 Mon Sep 17 00:00:00 2001
From: Daniel Rosenberg <drosen@google.com>
Date: Mon, 28 Mar 2016 15:00:20 -0700
Subject: [PATCH 048/797] sdcardfs: Remove unused code

Change-Id: Ie97cba27ce44818ac56cfe40954f164ad44eccf6
---
 fs/sdcardfs/main.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c
index fa11a0458b84..a6522286d731 100644
--- a/fs/sdcardfs/main.c
+++ b/fs/sdcardfs/main.c
@@ -54,7 +54,6 @@ static int parse_options(struct super_block *sb, char *options, int silent,
 	char *p;
 	substring_t args[MAX_OPT_ARGS];
 	int option;
-	char *string_option;
 
 	/* by default, we use AID_MEDIA_RW as uid, gid */
 	opts->fs_low_uid = AID_MEDIA_RW;
@@ -117,7 +116,6 @@ static int parse_options(struct super_block *sb, char *options, int silent,
 			break;
 		/* unknown option */
 		default:
-invalid_option:
 			if (!silent) {
 				printk( KERN_ERR "Unrecognized mount option \"%s\" "
 						"or missing value", p);

From 30e5f6e073ced638dc973819d09362002275f476 Mon Sep 17 00:00:00 2001
From: Daniel Rosenberg <drosen@google.com>
Date: Mon, 28 Mar 2016 16:00:34 -0700
Subject: [PATCH 049/797] sdcardfs: remove unneeded __init and __exit

Change-Id: I2a2d45d52f891332174c3000e8681c5167c1564f
---
 fs/sdcardfs/packagelist.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/sdcardfs/packagelist.c b/fs/sdcardfs/packagelist.c
index ba3478d94107..10f0d6be718b 100644
--- a/fs/sdcardfs/packagelist.c
+++ b/fs/sdcardfs/packagelist.c
@@ -392,7 +392,7 @@ static struct configfs_subsystem sdcardfs_packages_subsys = {
 	},
 };
 
-static int __init configfs_sdcardfs_init(void)
+static int configfs_sdcardfs_init(void)
 {
 	int ret;
 	struct configfs_subsystem *subsys = &sdcardfs_packages_subsys;
@@ -408,7 +408,7 @@ static int __init configfs_sdcardfs_init(void)
 	return ret;
 }
 
-static void __exit configfs_sdcardfs_exit(void)
+static void configfs_sdcardfs_exit(void)
 {
 	configfs_unregister_subsystem(&sdcardfs_packages_subsys);
 }

From a7b7a225c1ba4ea849a74e90b937b64c10c35e1f Mon Sep 17 00:00:00 2001
From: Guenter Roeck <groeck@chromium.org>
Date: Thu, 24 Mar 2016 10:39:14 -0700
Subject: [PATCH 050/797] mm: Export do_munmap

The 0-day build bot reports the following build error, seen if SDCARD_FS
is built as module.

ERROR: "do_munmap" undefined!

Fixes: 84a1b7d3d312 ("Included sdcardfs source code for kernel 3.0")
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Guenter Roeck <groeck@chromium.org>
---
 mm/mmap.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/mmap.c b/mm/mmap.c
index 6c561acdca92..a089cca8d79a 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2634,6 +2634,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
 
 	return 0;
 }
+EXPORT_SYMBOL(do_munmap);
 
 int vm_munmap(unsigned long start, size_t len)
 {

From 47733418c908d9139692f286304c9db36fbe1a8d Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Wed, 25 Nov 2015 16:03:31 -0500
Subject: [PATCH 051/797] UPSTREAM: dm: don't save and restore bi_private

Device mapper used the field bi_private to point to dm_target_io. However,
since kernel 3.15, the bi_private field is unused, and so the targets do
not need to save and restore this field.

This patch removes code that saves and restores bi_private from dm-cache,
dm-snapshot and dm-verity.

Change-Id: Ic72905ccb6d58ff94eafaa47ba54b2688d92d3d1
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
(cherry picked from commit fe3265b180d6282648f03bc6ac3958c733df01c2)
---
 drivers/md/dm-cache-target.c | 3 ---
 drivers/md/dm-snap.c         | 6 +-----
 drivers/md/dm-verity.c       | 5 +----
 3 files changed, 2 insertions(+), 12 deletions(-)

diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 2fd4c8296144..5780accffa30 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -118,14 +118,12 @@ static void iot_io_end(struct io_tracker *iot, sector_t len)
  */
 struct dm_hook_info {
 	bio_end_io_t *bi_end_io;
-	void *bi_private;
 };
 
 static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio,
 			bio_end_io_t *bi_end_io, void *bi_private)
 {
 	h->bi_end_io = bio->bi_end_io;
-	h->bi_private = bio->bi_private;
 
 	bio->bi_end_io = bi_end_io;
 	bio->bi_private = bi_private;
@@ -134,7 +132,6 @@ static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio,
 static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
 {
 	bio->bi_end_io = h->bi_end_io;
-	bio->bi_private = h->bi_private;
 }
 
 /*----------------------------------------------------------------*/
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 61f184ad081c..3766386080a4 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -207,7 +207,6 @@ struct dm_snap_pending_exception {
 	 */
 	struct bio *full_bio;
 	bio_end_io_t *full_bio_end_io;
-	void *full_bio_private;
 };
 
 /*
@@ -1486,10 +1485,8 @@ static void pending_complete(void *context, int success)
 	snapshot_bios = bio_list_get(&pe->snapshot_bios);
 	origin_bios = bio_list_get(&pe->origin_bios);
 	full_bio = pe->full_bio;
-	if (full_bio) {
+	if (full_bio)
 		full_bio->bi_end_io = pe->full_bio_end_io;
-		full_bio->bi_private = pe->full_bio_private;
-	}
 	increment_pending_exceptions_done_count();
 
 	up_write(&s->lock);
@@ -1595,7 +1592,6 @@ static void start_full_bio(struct dm_snap_pending_exception *pe,
 
 	pe->full_bio = bio;
 	pe->full_bio_end_io = bio->bi_end_io;
-	pe->full_bio_private = bio->bi_private;
 
 	callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client,
 						   copy_callback, pe);
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index ccf41886ebcf..9e8891507c1c 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -83,9 +83,8 @@ struct dm_verity {
 struct dm_verity_io {
 	struct dm_verity *v;
 
-	/* original values of bio->bi_end_io and bio->bi_private */
+	/* original value of bio->bi_end_io */
 	bio_end_io_t *orig_bi_end_io;
-	void *orig_bi_private;
 
 	sector_t block;
 	unsigned n_blocks;
@@ -453,7 +452,6 @@ static void verity_finish_io(struct dm_verity_io *io, int error)
 	struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_bio_data_size);
 
 	bio->bi_end_io = io->orig_bi_end_io;
-	bio->bi_private = io->orig_bi_private;
 	bio->bi_error = error;
 
 	bio_endio(bio);
@@ -566,7 +564,6 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
 	io = dm_per_bio_data(bio, ti->per_bio_data_size);
 	io->v = v;
 	io->orig_bi_end_io = bio->bi_end_io;
-	io->orig_bi_private = bio->bi_private;
 	io->block = bio->bi_iter.bi_sector >> (v->data_dev_block_bits - SECTOR_SHIFT);
 	io->n_blocks = bio->bi_iter.bi_size >> v->data_dev_block_bits;
 

From 00e28c2ab88ad154a38a4b8233f94b4e1a4e35f0 Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Thu, 5 Nov 2015 02:02:31 +0000
Subject: [PATCH 052/797] UPSTREAM: dm verity: clean up duplicate hashing code

Handle dm-verity salting in one place to simplify the code.

Change-Id: If923a01dc63ae5123af13ba1b0863b73e33ddf46
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
(cherry picked from commit 6dbeda3469ced777bc3138ed5918f7ae79670b7b)
---
 drivers/md/dm-verity.c | 266 +++++++++++++++++++++++------------------
 1 file changed, 149 insertions(+), 117 deletions(-)

diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index 9e8891507c1c..24517055bd8e 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -172,6 +172,84 @@ static sector_t verity_position_at_level(struct dm_verity *v, sector_t block,
 	return block >> (level * v->hash_per_block_bits);
 }
 
+/*
+ * Wrapper for crypto_shash_init, which handles verity salting.
+ */
+static int verity_hash_init(struct dm_verity *v, struct shash_desc *desc)
+{
+	int r;
+
+	desc->tfm = v->tfm;
+	desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	r = crypto_shash_init(desc);
+
+	if (unlikely(r < 0)) {
+		DMERR("crypto_shash_init failed: %d", r);
+		return r;
+	}
+
+	if (likely(v->version >= 1)) {
+		r = crypto_shash_update(desc, v->salt, v->salt_size);
+
+		if (unlikely(r < 0)) {
+			DMERR("crypto_shash_update failed: %d", r);
+			return r;
+		}
+	}
+
+	return 0;
+}
+
+static int verity_hash_update(struct dm_verity *v, struct shash_desc *desc,
+			      const u8 *data, size_t len)
+{
+	int r = crypto_shash_update(desc, data, len);
+
+	if (unlikely(r < 0))
+		DMERR("crypto_shash_update failed: %d", r);
+
+	return r;
+}
+
+static int verity_hash_final(struct dm_verity *v, struct shash_desc *desc,
+			     u8 *digest)
+{
+	int r;
+
+	if (unlikely(!v->version)) {
+		r = crypto_shash_update(desc, v->salt, v->salt_size);
+
+		if (r < 0) {
+			DMERR("crypto_shash_update failed: %d", r);
+			return r;
+		}
+	}
+
+	r = crypto_shash_final(desc, digest);
+
+	if (unlikely(r < 0))
+		DMERR("crypto_shash_final failed: %d", r);
+
+	return r;
+}
+
+static int verity_hash(struct dm_verity *v, struct shash_desc *desc,
+		       const u8 *data, size_t len, u8 *digest)
+{
+	int r;
+
+	r = verity_hash_init(v, desc);
+	if (unlikely(r < 0))
+		return r;
+
+	r = verity_hash_update(v, desc, data, len);
+	if (unlikely(r < 0))
+		return r;
+
+	return verity_hash_final(v, desc, digest);
+}
+
 static void verity_hash_at_level(struct dm_verity *v, sector_t block, int level,
 				 sector_t *hash_block, unsigned *offset)
 {
@@ -252,10 +330,10 @@ static int verity_handle_err(struct dm_verity *v, enum verity_block_type type,
  * If "skip_unverified" is false, unverified buffer is hashed and verified
  * against current value of io_want_digest(v, io).
  */
-static int verity_verify_level(struct dm_verity_io *io, sector_t block,
-			       int level, bool skip_unverified)
+static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
+			       sector_t block, int level, bool skip_unverified,
+			       u8 *want_digest)
 {
-	struct dm_verity *v = io->v;
 	struct dm_buffer *buf;
 	struct buffer_aux *aux;
 	u8 *data;
@@ -272,74 +350,71 @@ static int verity_verify_level(struct dm_verity_io *io, sector_t block,
 	aux = dm_bufio_get_aux_data(buf);
 
 	if (!aux->hash_verified) {
-		struct shash_desc *desc;
-		u8 *result;
-
 		if (skip_unverified) {
 			r = 1;
 			goto release_ret_r;
 		}
 
-		desc = io_hash_desc(v, io);
-		desc->tfm = v->tfm;
-		desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
-		r = crypto_shash_init(desc);
-		if (r < 0) {
-			DMERR("crypto_shash_init failed: %d", r);
+		r = verity_hash(v, io_hash_desc(v, io),
+				data, 1 << v->hash_dev_block_bits,
+				io_real_digest(v, io));
+		if (unlikely(r < 0))
 			goto release_ret_r;
-		}
 
-		if (likely(v->version >= 1)) {
-			r = crypto_shash_update(desc, v->salt, v->salt_size);
-			if (r < 0) {
-				DMERR("crypto_shash_update failed: %d", r);
-				goto release_ret_r;
-			}
-		}
-
-		r = crypto_shash_update(desc, data, 1 << v->hash_dev_block_bits);
-		if (r < 0) {
-			DMERR("crypto_shash_update failed: %d", r);
-			goto release_ret_r;
-		}
-
-		if (!v->version) {
-			r = crypto_shash_update(desc, v->salt, v->salt_size);
-			if (r < 0) {
-				DMERR("crypto_shash_update failed: %d", r);
-				goto release_ret_r;
-			}
-		}
-
-		result = io_real_digest(v, io);
-		r = crypto_shash_final(desc, result);
-		if (r < 0) {
-			DMERR("crypto_shash_final failed: %d", r);
-			goto release_ret_r;
-		}
-		if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) {
-			if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_METADATA,
-					      hash_block)) {
-				r = -EIO;
-				goto release_ret_r;
-			}
-		} else
+		if (likely(memcmp(io_real_digest(v, io), want_digest,
+				  v->digest_size) == 0))
 			aux->hash_verified = 1;
+		else if (verity_handle_err(v,
+					   DM_VERITY_BLOCK_TYPE_METADATA,
+					   hash_block)) {
+			r = -EIO;
+			goto release_ret_r;
+		}
 	}
 
 	data += offset;
-
-	memcpy(io_want_digest(v, io), data, v->digest_size);
-
-	dm_bufio_release(buf);
-	return 0;
+	memcpy(want_digest, data, v->digest_size);
+	r = 0;
 
 release_ret_r:
 	dm_bufio_release(buf);
-
 	return r;
 }
 
+/*
+ * Find a hash for a given block, write it to digest and verify the integrity
+ * of the hash tree if necessary.
+ */
+static int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
+				 sector_t block, u8 *digest)
+{
+	int i;
+	int r;
+
+	if (likely(v->levels)) {
+		/*
+		 * First, we try to get the requested hash for
+		 * the current block. If the hash block itself is
+		 * verified, zero is returned. If it isn't, this
+		 * function returns 1 and we fall back to whole
+		 * chain verification.
+		 */
+		r = verity_verify_level(v, io, block, 0, true, digest);
+		if (likely(r <= 0))
+			return r;
+	}
+
+	memcpy(digest, v->root_digest, v->digest_size);
+
+	for (i = v->levels - 1; i >= 0; i--) {
+		r = verity_verify_level(v, io, block, i, false, digest);
+		if (unlikely(r))
+			return r;
+	}
+
+	return 0;
+}
+
 /*
  * Verify one "dm_verity_io" structure.
  */
@@ -349,54 +424,21 @@ static int verity_verify_io(struct dm_verity_io *io)
 	struct bio *bio = dm_bio_from_per_bio_data(io,
 						   v->ti->per_bio_data_size);
 	unsigned b;
-	int i;
 
 	for (b = 0; b < io->n_blocks; b++) {
-		struct shash_desc *desc;
-		u8 *result;
 		int r;
 		unsigned todo;
+		struct shash_desc *desc = io_hash_desc(v, io);
 
-		if (likely(v->levels)) {
-			/*
-			 * First, we try to get the requested hash for
-			 * the current block. If the hash block itself is
-			 * verified, zero is returned. If it isn't, this
-			 * function returns 0 and we fall back to whole
-			 * chain verification.
-			 */
-			int r = verity_verify_level(io, io->block + b, 0, true);
-			if (likely(!r))
-				goto test_block_hash;
-			if (r < 0)
-				return r;
-		}
-
-		memcpy(io_want_digest(v, io), v->root_digest, v->digest_size);
-
-		for (i = v->levels - 1; i >= 0; i--) {
-			int r = verity_verify_level(io, io->block + b, i, false);
-			if (unlikely(r))
-				return r;
-		}
-
-test_block_hash:
-		desc = io_hash_desc(v, io);
-		desc->tfm = v->tfm;
-		desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
-		r = crypto_shash_init(desc);
-		if (r < 0) {
-			DMERR("crypto_shash_init failed: %d", r);
+		r = verity_hash_for_block(v, io, io->block + b,
+					  io_want_digest(v, io));
+		if (unlikely(r < 0))
+			return r;
+
+		r = verity_hash_init(v, desc);
+		if (unlikely(r < 0))
 			return r;
-		}
 
-		if (likely(v->version >= 1)) {
-			r = crypto_shash_update(desc, v->salt, v->salt_size);
-			if (r < 0) {
-				DMERR("crypto_shash_update failed: %d", r);
-				return r;
-			}
-		}
 		todo = 1 << v->data_dev_block_bits;
 		do {
 			u8 *page;
@@ -407,37 +449,27 @@ static int verity_verify_io(struct dm_verity_io *io)
 			len = bv.bv_len;
 			if (likely(len >= todo))
 				len = todo;
-			r = crypto_shash_update(desc, page + bv.bv_offset, len);
+			r = verity_hash_update(v, desc,  page + bv.bv_offset,
+					       len);
 			kunmap_atomic(page);
 
-			if (r < 0) {
-				DMERR("crypto_shash_update failed: %d", r);
+			if (unlikely(r < 0))
 				return r;
-			}
 
 			bio_advance_iter(bio, &io->iter, len);
 			todo -= len;
 		} while (todo);
 
-		if (!v->version) {
-			r = crypto_shash_update(desc, v->salt, v->salt_size);
-			if (r < 0) {
-				DMERR("crypto_shash_update failed: %d", r);
-				return r;
-			}
-		}
-
-		result = io_real_digest(v, io);
-		r = crypto_shash_final(desc, result);
-		if (r < 0) {
-			DMERR("crypto_shash_final failed: %d", r);
+		r = verity_hash_final(v, desc, io_real_digest(v, io));
+		if (unlikely(r < 0))
 			return r;
-		}
-		if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) {
-			if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA,
-					      io->block + b))
-				return -EIO;
-		}
+
+		if (likely(memcmp(io_real_digest(v, io),
+				io_want_digest(v, io), v->digest_size) == 0))
+			continue;
+		else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA,
+				io->block + b))
+			return -EIO;
 	}
 
 	return 0;

From e6d1b9a713f90c87a5a2b2393c42ef13c392e047 Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Thu, 5 Nov 2015 02:02:32 +0000
Subject: [PATCH 053/797] UPSTREAM: dm verity: separate function for parsing
 opt args

Move optional argument parsing into a separate function to make it
easier to add more of them without making verity_ctr even longer.

Change-Id: I9cd9df41c3326824f8cca5764075501987e78a52
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
(cherry picked from commit 753c1fd02807cb43a1c5d01d75d454054d46bdad)
---
 drivers/md/dm-verity.c | 71 +++++++++++++++++++++++++-----------------
 1 file changed, 43 insertions(+), 28 deletions(-)

diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index 24517055bd8e..b0a53c3b926d 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -34,6 +34,8 @@
 #define DM_VERITY_OPT_LOGGING		"ignore_corruption"
 #define DM_VERITY_OPT_RESTART		"restart_on_corruption"
 
+#define DM_VERITY_OPTS_MAX		1
+
 static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE;
 
 module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, S_IRUGO | S_IWUSR);
@@ -721,6 +723,44 @@ static void verity_dtr(struct dm_target *ti)
 	kfree(v);
 }
 
+static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v)
+{
+	int r;
+	unsigned argc;
+	struct dm_target *ti = v->ti;
+	const char *arg_name;
+
+	static struct dm_arg _args[] = {
+		{0, DM_VERITY_OPTS_MAX, "Invalid number of feature args"},
+	};
+
+	r = dm_read_arg_group(_args, as, &argc, &ti->error);
+	if (r)
+		return -EINVAL;
+
+	if (!argc)
+		return 0;
+
+	do {
+		arg_name = dm_shift_arg(as);
+		argc--;
+
+		if (!strcasecmp(arg_name, DM_VERITY_OPT_LOGGING)) {
+			v->mode = DM_VERITY_MODE_LOGGING;
+			continue;
+
+		} else if (!strcasecmp(arg_name, DM_VERITY_OPT_RESTART)) {
+			v->mode = DM_VERITY_MODE_RESTART;
+			continue;
+		}
+
+		ti->error = "Unrecognized verity feature request";
+		return -EINVAL;
+	} while (argc && !r);
+
+	return r;
+}
+
 /*
  * Target parameters:
  *	<version>	The current format is version 1.
@@ -739,18 +779,13 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
 {
 	struct dm_verity *v;
 	struct dm_arg_set as;
-	const char *opt_string;
-	unsigned int num, opt_params;
+	unsigned int num;
 	unsigned long long num_ll;
 	int r;
 	int i;
 	sector_t hash_position;
 	char dummy;
 
-	static struct dm_arg _args[] = {
-		{0, 1, "Invalid number of feature args"},
-	};
-
 	v = kzalloc(sizeof(struct dm_verity), GFP_KERNEL);
 	if (!v) {
 		ti->error = "Cannot allocate verity structure";
@@ -895,29 +930,9 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
 		as.argc = argc;
 		as.argv = argv;
 
-		r = dm_read_arg_group(_args, &as, &opt_params, &ti->error);
-		if (r)
+		r = verity_parse_opt_args(&as, v);
+		if (r < 0)
 			goto bad;
-
-		while (opt_params) {
-			opt_params--;
-			opt_string = dm_shift_arg(&as);
-			if (!opt_string) {
-				ti->error = "Not enough feature arguments";
-				r = -EINVAL;
-				goto bad;
-			}
-
-			if (!strcasecmp(opt_string, DM_VERITY_OPT_LOGGING))
-				v->mode = DM_VERITY_MODE_LOGGING;
-			else if (!strcasecmp(opt_string, DM_VERITY_OPT_RESTART))
-				v->mode = DM_VERITY_MODE_RESTART;
-			else {
-				ti->error = "Invalid feature arguments";
-				r = -EINVAL;
-				goto bad;
-			}
-		}
 	}
 
 	v->hash_per_block_bits =

From 612ce5278f2070c7d5d85b7e82b17e0231d5ede7 Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Thu, 3 Dec 2015 15:36:00 -0500
Subject: [PATCH 054/797] UPSTREAM: dm verity: move dm-verity.c to
 dm-verity-target.c

Prepare for extending dm-verity with an optional object.  Follows the
naming convention used by other DM targets (e.g. dm-cache and dm-era).

Change-Id: If6d2f27b290adf14fa77f3745fdc13aaa417c8dc
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
(cherry picked from commit 03045cbafa2d663ad8d0a583ac219d202d824344)
---
 drivers/md/Makefile                            | 1 +
 drivers/md/{dm-verity.c => dm-verity-target.c} | 0
 2 files changed, 1 insertion(+)
 rename drivers/md/{dm-verity.c => dm-verity-target.c} (100%)

diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index f34979cd141a..94e9f6bb33d1 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -16,6 +16,7 @@ dm-cache-mq-y   += dm-cache-policy-mq.o
 dm-cache-smq-y   += dm-cache-policy-smq.o
 dm-cache-cleaner-y += dm-cache-policy-cleaner.o
 dm-era-y	+= dm-era-target.o
+dm-verity-y	+= dm-verity-target.o
 md-mod-y	+= md.o bitmap.o
 raid456-y	+= raid5.o raid5-cache.o
 
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity-target.c
similarity index 100%
rename from drivers/md/dm-verity.c
rename to drivers/md/dm-verity-target.c

From 0be563d6136d94c126ecc5bb352dfa6088d6d8dd Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Thu, 3 Dec 2015 16:01:51 -0500
Subject: [PATCH 055/797] UPSTREAM: dm verity: factor out structures and
 functions useful to separate object

Prepare for an optional verity object to make use of existing dm-verity
structures and functions.

Change-Id: Ib14c3834bfed222b33e068908fb5f71a53e1187b
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
(cherry picked from commit ffa393807cd69656d5b6bc9d9622e205071cbab8)
---
 drivers/md/dm-verity-target.c | 116 +++++-----------------------------
 drivers/md/dm-verity.h        | 112 ++++++++++++++++++++++++++++++++
 2 files changed, 128 insertions(+), 100 deletions(-)
 create mode 100644 drivers/md/dm-verity.h

diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index b0a53c3b926d..7e200ba631fb 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -14,12 +14,10 @@
  * access behavior.
  */
 
-#include "dm-bufio.h"
+#include "dm-verity.h"
 
 #include <linux/module.h>
-#include <linux/device-mapper.h>
 #include <linux/reboot.h>
-#include <crypto/hash.h>
 
 #define DM_MSG_PREFIX			"verity"
 
@@ -28,7 +26,6 @@
 
 #define DM_VERITY_DEFAULT_PREFETCH_SIZE	262144
 
-#define DM_VERITY_MAX_LEVELS		63
 #define DM_VERITY_MAX_CORRUPTED_ERRS	100
 
 #define DM_VERITY_OPT_LOGGING		"ignore_corruption"
@@ -40,72 +37,6 @@ static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE;
 
 module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, S_IRUGO | S_IWUSR);
 
-enum verity_mode {
-	DM_VERITY_MODE_EIO,
-	DM_VERITY_MODE_LOGGING,
-	DM_VERITY_MODE_RESTART
-};
-
-enum verity_block_type {
-	DM_VERITY_BLOCK_TYPE_DATA,
-	DM_VERITY_BLOCK_TYPE_METADATA
-};
-
-struct dm_verity {
-	struct dm_dev *data_dev;
-	struct dm_dev *hash_dev;
-	struct dm_target *ti;
-	struct dm_bufio_client *bufio;
-	char *alg_name;
-	struct crypto_shash *tfm;
-	u8 *root_digest;	/* digest of the root block */
-	u8 *salt;		/* salt: its size is salt_size */
-	unsigned salt_size;
-	sector_t data_start;	/* data offset in 512-byte sectors */
-	sector_t hash_start;	/* hash start in blocks */
-	sector_t data_blocks;	/* the number of data blocks */
-	sector_t hash_blocks;	/* the number of hash blocks */
-	unsigned char data_dev_block_bits;	/* log2(data blocksize) */
-	unsigned char hash_dev_block_bits;	/* log2(hash blocksize) */
-	unsigned char hash_per_block_bits;	/* log2(hashes in hash block) */
-	unsigned char levels;	/* the number of tree levels */
-	unsigned char version;
-	unsigned digest_size;	/* digest size for the current hash algorithm */
-	unsigned shash_descsize;/* the size of temporary space for crypto */
-	int hash_failed;	/* set to 1 if hash of any block failed */
-	enum verity_mode mode;	/* mode for handling verification errors */
-	unsigned corrupted_errs;/* Number of errors for corrupted blocks */
-
-	struct workqueue_struct *verify_wq;
-
-	/* starting blocks for each tree level. 0 is the lowest level. */
-	sector_t hash_level_block[DM_VERITY_MAX_LEVELS];
-};
-
-struct dm_verity_io {
-	struct dm_verity *v;
-
-	/* original value of bio->bi_end_io */
-	bio_end_io_t *orig_bi_end_io;
-
-	sector_t block;
-	unsigned n_blocks;
-
-	struct bvec_iter iter;
-
-	struct work_struct work;
-
-	/*
-	 * Three variably-size fields follow this struct:
-	 *
-	 * u8 hash_desc[v->shash_descsize];
-	 * u8 real_digest[v->digest_size];
-	 * u8 want_digest[v->digest_size];
-	 *
-	 * To access them use: io_hash_desc(), io_real_digest() and io_want_digest().
-	 */
-};
-
 struct dm_verity_prefetch_work {
 	struct work_struct work;
 	struct dm_verity *v;
@@ -113,21 +44,6 @@ struct dm_verity_prefetch_work {
 	unsigned n_blocks;
 };
 
-static struct shash_desc *io_hash_desc(struct dm_verity *v, struct dm_verity_io *io)
-{
-	return (struct shash_desc *)(io + 1);
-}
-
-static u8 *io_real_digest(struct dm_verity *v, struct dm_verity_io *io)
-{
-	return (u8 *)(io + 1) + v->shash_descsize;
-}
-
-static u8 *io_want_digest(struct dm_verity *v, struct dm_verity_io *io)
-{
-	return (u8 *)(io + 1) + v->shash_descsize + v->digest_size;
-}
-
 /*
  * Auxiliary structure appended to each dm-bufio buffer. If the value
  * hash_verified is nonzero, hash of the block has been verified.
@@ -236,8 +152,8 @@ static int verity_hash_final(struct dm_verity *v, struct shash_desc *desc,
 	return r;
 }
 
-static int verity_hash(struct dm_verity *v, struct shash_desc *desc,
-		       const u8 *data, size_t len, u8 *digest)
+int verity_hash(struct dm_verity *v, struct shash_desc *desc,
+		const u8 *data, size_t len, u8 *digest)
 {
 	int r;
 
@@ -325,12 +241,12 @@ static int verity_handle_err(struct dm_verity *v, enum verity_block_type type,
  * Verify hash of a metadata block pertaining to the specified data block
  * ("block" argument) at a specified level ("level" argument).
  *
- * On successful return, io_want_digest(v, io) contains the hash value for
- * a lower tree level or for the data block (if we're at the lowest leve).
+ * On successful return, verity_io_want_digest(v, io) contains the hash value
+ * for a lower tree level or for the data block (if we're at the lowest level).
  *
  * If "skip_unverified" is true, unverified buffer is skipped and 1 is returned.
  * If "skip_unverified" is false, unverified buffer is hashed and verified
- * against current value of io_want_digest(v, io).
+ * against current value of verity_io_want_digest(v, io).
  */
 static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
 			       sector_t block, int level, bool skip_unverified,
@@ -357,13 +273,13 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
 			goto release_ret_r;
 		}
 
-		r = verity_hash(v, io_hash_desc(v, io),
+		r = verity_hash(v, verity_io_hash_desc(v, io),
 				data, 1 << v->hash_dev_block_bits,
-				io_real_digest(v, io));
+				verity_io_real_digest(v, io));
 		if (unlikely(r < 0))
 			goto release_ret_r;
 
-		if (likely(memcmp(io_real_digest(v, io), want_digest,
+		if (likely(memcmp(verity_io_real_digest(v, io), want_digest,
 				  v->digest_size) == 0))
 			aux->hash_verified = 1;
 		else if (verity_handle_err(v,
@@ -387,8 +303,8 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
  * Find a hash for a given block, write it to digest and verify the integrity
  * of the hash tree if necessary.
  */
-static int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
-				 sector_t block, u8 *digest)
+int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
+			  sector_t block, u8 *digest)
 {
 	int i;
 	int r;
@@ -430,10 +346,10 @@ static int verity_verify_io(struct dm_verity_io *io)
 	for (b = 0; b < io->n_blocks; b++) {
 		int r;
 		unsigned todo;
-		struct shash_desc *desc = io_hash_desc(v, io);
+		struct shash_desc *desc = verity_io_hash_desc(v, io);
 
 		r = verity_hash_for_block(v, io, io->block + b,
-					  io_want_digest(v, io));
+					  verity_io_want_digest(v, io));
 		if (unlikely(r < 0))
 			return r;
 
@@ -462,12 +378,12 @@ static int verity_verify_io(struct dm_verity_io *io)
 			todo -= len;
 		} while (todo);
 
-		r = verity_hash_final(v, desc, io_real_digest(v, io));
+		r = verity_hash_final(v, desc, verity_io_real_digest(v, io));
 		if (unlikely(r < 0))
 			return r;
 
-		if (likely(memcmp(io_real_digest(v, io),
-				io_want_digest(v, io), v->digest_size) == 0))
+		if (likely(memcmp(verity_io_real_digest(v, io),
+				  verity_io_want_digest(v, io), v->digest_size) == 0))
 			continue;
 		else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA,
 				io->block + b))
diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h
new file mode 100644
index 000000000000..c7ad4fd05188
--- /dev/null
+++ b/drivers/md/dm-verity.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2012 Red Hat, Inc.
+ * Copyright (C) 2015 Google, Inc.
+ *
+ * Author: Mikulas Patocka <mpatocka@redhat.com>
+ *
+ * Based on Chromium dm-verity driver (C) 2011 The Chromium OS Authors
+ *
+ * This file is released under the GPLv2.
+ */
+
+#ifndef DM_VERITY_H
+#define DM_VERITY_H
+
+#include "dm-bufio.h"
+#include <linux/device-mapper.h>
+#include <crypto/hash.h>
+
+#define DM_VERITY_MAX_LEVELS		63
+
+enum verity_mode {
+	DM_VERITY_MODE_EIO,
+	DM_VERITY_MODE_LOGGING,
+	DM_VERITY_MODE_RESTART
+};
+
+enum verity_block_type {
+	DM_VERITY_BLOCK_TYPE_DATA,
+	DM_VERITY_BLOCK_TYPE_METADATA
+};
+
+struct dm_verity {
+	struct dm_dev *data_dev;
+	struct dm_dev *hash_dev;
+	struct dm_target *ti;
+	struct dm_bufio_client *bufio;
+	char *alg_name;
+	struct crypto_shash *tfm;
+	u8 *root_digest;	/* digest of the root block */
+	u8 *salt;		/* salt: its size is salt_size */
+	unsigned salt_size;
+	sector_t data_start;	/* data offset in 512-byte sectors */
+	sector_t hash_start;	/* hash start in blocks */
+	sector_t data_blocks;	/* the number of data blocks */
+	sector_t hash_blocks;	/* the number of hash blocks */
+	unsigned char data_dev_block_bits;	/* log2(data blocksize) */
+	unsigned char hash_dev_block_bits;	/* log2(hash blocksize) */
+	unsigned char hash_per_block_bits;	/* log2(hashes in hash block) */
+	unsigned char levels;	/* the number of tree levels */
+	unsigned char version;
+	unsigned digest_size;	/* digest size for the current hash algorithm */
+	unsigned shash_descsize;/* the size of temporary space for crypto */
+	int hash_failed;	/* set to 1 if hash of any block failed */
+	enum verity_mode mode;	/* mode for handling verification errors */
+	unsigned corrupted_errs;/* Number of errors for corrupted blocks */
+
+	struct workqueue_struct *verify_wq;
+
+	/* starting blocks for each tree level. 0 is the lowest level. */
+	sector_t hash_level_block[DM_VERITY_MAX_LEVELS];
+};
+
+struct dm_verity_io {
+	struct dm_verity *v;
+
+	/* original value of bio->bi_end_io */
+	bio_end_io_t *orig_bi_end_io;
+
+	sector_t block;
+	unsigned n_blocks;
+
+	struct bvec_iter iter;
+
+	struct work_struct work;
+
+	/*
+	 * Three variably-size fields follow this struct:
+	 *
+	 * u8 hash_desc[v->shash_descsize];
+	 * u8 real_digest[v->digest_size];
+	 * u8 want_digest[v->digest_size];
+	 *
+	 * To access them use: verity_io_hash_desc(), verity_io_real_digest()
+	 * and verity_io_want_digest().
+	 */
+};
+
+static inline struct shash_desc *verity_io_hash_desc(struct dm_verity *v,
+						     struct dm_verity_io *io)
+{
+	return (struct shash_desc *)(io + 1);
+}
+
+static inline u8 *verity_io_real_digest(struct dm_verity *v,
+					struct dm_verity_io *io)
+{
+	return (u8 *)(io + 1) + v->shash_descsize;
+}
+
+static inline u8 *verity_io_want_digest(struct dm_verity *v,
+					struct dm_verity_io *io)
+{
+	return (u8 *)(io + 1) + v->shash_descsize + v->digest_size;
+}
+
+extern int verity_hash(struct dm_verity *v, struct shash_desc *desc,
+		       const u8 *data, size_t len, u8 *digest);
+
+extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
+				 sector_t block, u8 *digest);
+
+#endif /* DM_VERITY_H */

From c81331d2f4248c76f8d68a0efedb691fe1343596 Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Thu, 3 Dec 2015 16:30:36 -0500
Subject: [PATCH 056/797] UPSTREAM: dm verity: factor out verity_for_bv_block()

verity_for_bv_block() will be re-used by optional dm-verity object.

Change-Id: I80e0f8e7c9f234fce3fbdf21cb05aba3041d7f98
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
(cherry picked from commit bb4d73ac5e4f0a6c4853f35824f6cb2d396a2f9c)
---
 drivers/md/dm-verity-target.c | 72 ++++++++++++++++++++++++-----------
 drivers/md/dm-verity.h        |  6 +++
 2 files changed, 55 insertions(+), 23 deletions(-)

diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 7e200ba631fb..2b0ee52d1ad8 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -333,19 +333,61 @@ int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
 	return 0;
 }
 
+/*
+ * Calls function process for 1 << v->data_dev_block_bits bytes in the bio_vec
+ * starting from iter.
+ */
+int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io,
+			struct bvec_iter *iter,
+			int (*process)(struct dm_verity *v,
+				       struct dm_verity_io *io, u8 *data,
+				       size_t len))
+{
+	unsigned todo = 1 << v->data_dev_block_bits;
+	struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_bio_data_size);
+
+	do {
+		int r;
+		u8 *page;
+		unsigned len;
+		struct bio_vec bv = bio_iter_iovec(bio, *iter);
+
+		page = kmap_atomic(bv.bv_page);
+		len = bv.bv_len;
+
+		if (likely(len >= todo))
+			len = todo;
+
+		r = process(v, io, page + bv.bv_offset, len);
+		kunmap_atomic(page);
+
+		if (r < 0)
+			return r;
+
+		bio_advance_iter(bio, iter, len);
+		todo -= len;
+	} while (todo);
+
+	return 0;
+}
+
+static int verity_bv_hash_update(struct dm_verity *v, struct dm_verity_io *io,
+				 u8 *data, size_t len)
+{
+	return verity_hash_update(v, verity_io_hash_desc(v, io), data, len);
+}
+
 /*
  * Verify one "dm_verity_io" structure.
  */
 static int verity_verify_io(struct dm_verity_io *io)
 {
 	struct dm_verity *v = io->v;
-	struct bio *bio = dm_bio_from_per_bio_data(io,
-						   v->ti->per_bio_data_size);
+	struct bvec_iter start;
 	unsigned b;
 
 	for (b = 0; b < io->n_blocks; b++) {
 		int r;
-		unsigned todo;
 		struct shash_desc *desc = verity_io_hash_desc(v, io);
 
 		r = verity_hash_for_block(v, io, io->block + b,
@@ -357,26 +399,10 @@ static int verity_verify_io(struct dm_verity_io *io)
 		if (unlikely(r < 0))
 			return r;
 
-		todo = 1 << v->data_dev_block_bits;
-		do {
-			u8 *page;
-			unsigned len;
-			struct bio_vec bv = bio_iter_iovec(bio, io->iter);
-
-			page = kmap_atomic(bv.bv_page);
-			len = bv.bv_len;
-			if (likely(len >= todo))
-				len = todo;
-			r = verity_hash_update(v, desc,  page + bv.bv_offset,
-					       len);
-			kunmap_atomic(page);
-
-			if (unlikely(r < 0))
-				return r;
-
-			bio_advance_iter(bio, &io->iter, len);
-			todo -= len;
-		} while (todo);
+		start = io->iter;
+		r = verity_for_bv_block(v, io, &io->iter, verity_bv_hash_update);
+		if (unlikely(r < 0))
+			return r;
 
 		r = verity_hash_final(v, desc, verity_io_real_digest(v, io));
 		if (unlikely(r < 0))
diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h
index c7ad4fd05188..f5af52df8e38 100644
--- a/drivers/md/dm-verity.h
+++ b/drivers/md/dm-verity.h
@@ -103,6 +103,12 @@ static inline u8 *verity_io_want_digest(struct dm_verity *v,
 	return (u8 *)(io + 1) + v->shash_descsize + v->digest_size;
 }
 
+extern int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io,
+			       struct bvec_iter *iter,
+			       int (*process)(struct dm_verity *v,
+					      struct dm_verity_io *io,
+					      u8 *data, size_t len));
+
 extern int verity_hash(struct dm_verity *v, struct shash_desc *desc,
 		       const u8 *data, size_t len, u8 *digest);
 

From d5fe9722ab2ef9ac6232164204d3b1d04d3ab75c Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Thu, 3 Dec 2015 14:26:30 +0000
Subject: [PATCH 057/797] UPSTREAM: dm verity: add support for forward error
 correction

Add support for correcting corrupted blocks using Reed-Solomon.

This code uses RS(255, N) interleaved across data and hash
blocks. Each error-correcting block covers N bytes evenly
distributed across the combined total data, so that each byte is a
maximum distance away from the others. This makes it possible to
recover from several consecutive corrupted blocks with relatively
small space overhead.

In addition, using verity hashes to locate erasures nearly doubles
the effectiveness of error correction. Being able to detect
corrupted blocks also improves performance, because only corrupted
blocks need to corrected.

For a 2 GiB partition, RS(255, 253) (two parity bytes for each
253-byte block) can correct up to 16 MiB of consecutive corrupted
blocks if erasures can be located, and 8 MiB if they cannot, with
16 MiB space overhead.

Change-Id: Ife4f8889f7fbf0974bf3ed4be6d3322ae9b4cb0e
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
(cherry picked from commit a739ff3f543afbb4a041c16cd0182c8e8d366e70)
---
 Documentation/device-mapper/verity.txt |  35 +-
 drivers/md/Kconfig                     |  12 +
 drivers/md/Makefile                    |   4 +
 drivers/md/dm-verity-fec.c             | 812 +++++++++++++++++++++++++
 drivers/md/dm-verity-fec.h             | 152 +++++
 drivers/md/dm-verity-target.c          |  55 +-
 drivers/md/dm-verity.h                 |  10 +
 7 files changed, 1071 insertions(+), 9 deletions(-)
 create mode 100644 drivers/md/dm-verity-fec.c
 create mode 100644 drivers/md/dm-verity-fec.h

diff --git a/Documentation/device-mapper/verity.txt b/Documentation/device-mapper/verity.txt
index e15bc1a0fb98..d602c801ff59 100644
--- a/Documentation/device-mapper/verity.txt
+++ b/Documentation/device-mapper/verity.txt
@@ -18,11 +18,11 @@ Construction Parameters
 
     0 is the original format used in the Chromium OS.
       The salt is appended when hashing, digests are stored continuously and
-      the rest of the block is padded with zeros.
+      the rest of the block is padded with zeroes.
 
     1 is the current format that should be used for new devices.
       The salt is prepended when hashing and each digest is
-      padded with zeros to the power of two.
+      padded with zeroes to the power of two.
 
 <dev>
     This is the device containing data, the integrity of which needs to be
@@ -79,6 +79,32 @@ restart_on_corruption
     not compatible with ignore_corruption and requires user space support to
     avoid restart loops.
 
+use_fec_from_device <fec_dev>
+    Use forward error correction (FEC) to recover from corruption if hash
+    verification fails. Use encoding data from the specified device. This
+    may be the same device where data and hash blocks reside, in which case
+    fec_start must be outside data and hash areas.
+
+    If the encoding data covers additional metadata, it must be accessible
+    on the hash device after the hash blocks.
+
+    Note: block sizes for data and hash devices must match. Also, if the
+    verity <dev> is encrypted the <fec_dev> should be too.
+
+fec_roots <num>
+    Number of generator roots. This equals to the number of parity bytes in
+    the encoding data. For example, in RS(M, N) encoding, the number of roots
+    is M-N.
+
+fec_blocks <num>
+    The number of encoding data blocks on the FEC device. The block size for
+    the FEC device is <data_block_size>.
+
+fec_start <offset>
+    This is the offset, in <data_block_size> blocks, from the start of the
+    FEC device to the beginning of the encoding data.
+
+
 Theory of operation
 ===================
 
@@ -98,6 +124,11 @@ per-block basis. This allows for a lightweight hash computation on first read
 into the page cache. Block hashes are stored linearly, aligned to the nearest
 block size.
 
+If forward error correction (FEC) support is enabled any recovery of
+corrupted data will be verified using the cryptographic hash of the
+corresponding data. This is why combining error correction with
+integrity checking is essential.
+
 Hash Tree
 ---------
 
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 7913fdcfc849..d8b0ab6f3753 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -458,6 +458,18 @@ config DM_VERITY
 
 	  If unsure, say N.
 
+config DM_VERITY_FEC
+	bool "Verity forward error correction support"
+	depends on DM_VERITY
+	select REED_SOLOMON
+	select REED_SOLOMON_DEC8
+	---help---
+	  Add forward error correction support to dm-verity. This option
+	  makes it possible to use pre-generated error correction data to
+	  recover from corrupted blocks.
+
+	  If unsure, say N.
+
 config DM_SWITCH
 	tristate "Switch target support (EXPERIMENTAL)"
 	depends on BLK_DEV_DM
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 94e9f6bb33d1..62a65764e8e0 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -64,3 +64,7 @@ obj-$(CONFIG_DM_LOG_WRITES)	+= dm-log-writes.o
 ifeq ($(CONFIG_DM_UEVENT),y)
 dm-mod-objs			+= dm-uevent.o
 endif
+
+ifeq ($(CONFIG_DM_VERITY_FEC),y)
+dm-verity-objs			+= dm-verity-fec.o
+endif
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
new file mode 100644
index 000000000000..88143d36a1d2
--- /dev/null
+++ b/drivers/md/dm-verity-fec.c
@@ -0,0 +1,812 @@
+/*
+ * Copyright (C) 2015 Google, Inc.
+ *
+ * Author: Sami Tolvanen <samitolvanen@google.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include "dm-verity-fec.h"
+#include <linux/math64.h>
+
+#define DM_MSG_PREFIX	"verity-fec"
+
+/*
+ * If error correction has been configured, returns true.
+ */
+bool verity_fec_is_enabled(struct dm_verity *v)
+{
+	return v->fec && v->fec->dev;
+}
+
+/*
+ * Return a pointer to dm_verity_fec_io after dm_verity_io and its variable
+ * length fields.
+ */
+static inline struct dm_verity_fec_io *fec_io(struct dm_verity_io *io)
+{
+	return (struct dm_verity_fec_io *) verity_io_digest_end(io->v, io);
+}
+
+/*
+ * Return an interleaved offset for a byte in RS block.
+ */
+static inline u64 fec_interleave(struct dm_verity *v, u64 offset)
+{
+	u32 mod;
+
+	mod = do_div(offset, v->fec->rsn);
+	return offset + mod * (v->fec->rounds << v->data_dev_block_bits);
+}
+
+/*
+ * Decode an RS block using Reed-Solomon.
+ */
+static int fec_decode_rs8(struct dm_verity *v, struct dm_verity_fec_io *fio,
+			  u8 *data, u8 *fec, int neras)
+{
+	int i;
+	uint16_t par[DM_VERITY_FEC_RSM - DM_VERITY_FEC_MIN_RSN];
+
+	for (i = 0; i < v->fec->roots; i++)
+		par[i] = fec[i];
+
+	return decode_rs8(fio->rs, data, par, v->fec->rsn, NULL, neras,
+			  fio->erasures, 0, NULL);
+}
+
+/*
+ * Read error-correcting codes for the requested RS block. Returns a pointer
+ * to the data block. Caller is responsible for releasing buf.
+ */
+static u8 *fec_read_parity(struct dm_verity *v, u64 rsb, int index,
+			   unsigned *offset, struct dm_buffer **buf)
+{
+	u64 position, block;
+	u8 *res;
+
+	position = (index + rsb) * v->fec->roots;
+	block = position >> v->data_dev_block_bits;
+	*offset = (unsigned)(position - (block << v->data_dev_block_bits));
+
+	res = dm_bufio_read(v->fec->bufio, v->fec->start + block, buf);
+	if (unlikely(IS_ERR(res))) {
+		DMERR("%s: FEC %llu: parity read failed (block %llu): %ld",
+		      v->data_dev->name, (unsigned long long)rsb,
+		      (unsigned long long)(v->fec->start + block),
+		      PTR_ERR(res));
+		*buf = NULL;
+	}
+
+	return res;
+}
+
+/* Loop over each preallocated buffer slot. */
+#define fec_for_each_prealloc_buffer(__i) \
+	for (__i = 0; __i < DM_VERITY_FEC_BUF_PREALLOC; __i++)
+
+/* Loop over each extra buffer slot. */
+#define fec_for_each_extra_buffer(io, __i) \
+	for (__i = DM_VERITY_FEC_BUF_PREALLOC; __i < DM_VERITY_FEC_BUF_MAX; __i++)
+
+/* Loop over each allocated buffer. */
+#define fec_for_each_buffer(io, __i) \
+	for (__i = 0; __i < (io)->nbufs; __i++)
+
+/* Loop over each RS block in each allocated buffer. */
+#define fec_for_each_buffer_rs_block(io, __i, __j) \
+	fec_for_each_buffer(io, __i) \
+		for (__j = 0; __j < 1 << DM_VERITY_FEC_BUF_RS_BITS; __j++)
+
+/*
+ * Return a pointer to the current RS block when called inside
+ * fec_for_each_buffer_rs_block.
+ */
+static inline u8 *fec_buffer_rs_block(struct dm_verity *v,
+				      struct dm_verity_fec_io *fio,
+				      unsigned i, unsigned j)
+{
+	return &fio->bufs[i][j * v->fec->rsn];
+}
+
+/*
+ * Return an index to the current RS block when called inside
+ * fec_for_each_buffer_rs_block.
+ */
+static inline unsigned fec_buffer_rs_index(unsigned i, unsigned j)
+{
+	return (i << DM_VERITY_FEC_BUF_RS_BITS) + j;
+}
+
+/*
+ * Decode all RS blocks from buffers and copy corrected bytes into fio->output
+ * starting from block_offset.
+ */
+static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio,
+			   u64 rsb, int byte_index, unsigned block_offset,
+			   int neras)
+{
+	int r, corrected = 0, res;
+	struct dm_buffer *buf;
+	unsigned n, i, offset;
+	u8 *par, *block;
+
+	par = fec_read_parity(v, rsb, block_offset, &offset, &buf);
+	if (IS_ERR(par))
+		return PTR_ERR(par);
+
+	/*
+	 * Decode the RS blocks we have in bufs. Each RS block results in
+	 * one corrected target byte and consumes fec->roots parity bytes.
+	 */
+	fec_for_each_buffer_rs_block(fio, n, i) {
+		block = fec_buffer_rs_block(v, fio, n, i);
+		res = fec_decode_rs8(v, fio, block, &par[offset], neras);
+		if (res < 0) {
+			dm_bufio_release(buf);
+
+			r = res;
+			goto error;
+		}
+
+		corrected += res;
+		fio->output[block_offset] = block[byte_index];
+
+		block_offset++;
+		if (block_offset >= 1 << v->data_dev_block_bits)
+			goto done;
+
+		/* read the next block when we run out of parity bytes */
+		offset += v->fec->roots;
+		if (offset >= 1 << v->data_dev_block_bits) {
+			dm_bufio_release(buf);
+
+			par = fec_read_parity(v, rsb, block_offset, &offset, &buf);
+			if (unlikely(IS_ERR(par)))
+				return PTR_ERR(par);
+		}
+	}
+done:
+	r = corrected;
+error:
+	if (r < 0 && neras)
+		DMERR_LIMIT("%s: FEC %llu: failed to correct: %d",
+			    v->data_dev->name, (unsigned long long)rsb, r);
+	else if (r > 0)
+		DMWARN_LIMIT("%s: FEC %llu: corrected %d errors",
+			     v->data_dev->name, (unsigned long long)rsb, r);
+
+	return r;
+}
+
+/*
+ * Locate data block erasures using verity hashes.
+ */
+static int fec_is_erasure(struct dm_verity *v, struct dm_verity_io *io,
+			  u8 *want_digest, u8 *data)
+{
+	if (unlikely(verity_hash(v, verity_io_hash_desc(v, io),
+				 data, 1 << v->data_dev_block_bits,
+				 verity_io_real_digest(v, io))))
+		return 0;
+
+	return memcmp(verity_io_real_digest(v, io), want_digest,
+		      v->digest_size) != 0;
+}
+
+/*
+ * Read data blocks that are part of the RS block and deinterleave as much as
+ * fits into buffers. Check for erasure locations if @neras is non-NULL.
+ */
+static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io,
+			 u64 rsb, u64 target, unsigned block_offset,
+			 int *neras)
+{
+	int i, j, target_index = -1;
+	struct dm_buffer *buf;
+	struct dm_bufio_client *bufio;
+	struct dm_verity_fec_io *fio = fec_io(io);
+	u64 block, ileaved;
+	u8 *bbuf, *rs_block;
+	u8 want_digest[v->digest_size];
+	unsigned n, k;
+
+	if (neras)
+		*neras = 0;
+
+	/*
+	 * read each of the rsn data blocks that are part of the RS block, and
+	 * interleave contents to available bufs
+	 */
+	for (i = 0; i < v->fec->rsn; i++) {
+		ileaved = fec_interleave(v, rsb * v->fec->rsn + i);
+
+		/*
+		 * target is the data block we want to correct, target_index is
+		 * the index of this block within the rsn RS blocks
+		 */
+		if (ileaved == target)
+			target_index = i;
+
+		block = ileaved >> v->data_dev_block_bits;
+		bufio = v->fec->data_bufio;
+
+		if (block >= v->data_blocks) {
+			block -= v->data_blocks;
+
+			/*
+			 * blocks outside the area were assumed to contain
+			 * zeros when encoding data was generated
+			 */
+			if (unlikely(block >= v->fec->hash_blocks))
+				continue;
+
+			block += v->hash_start;
+			bufio = v->bufio;
+		}
+
+		bbuf = dm_bufio_read(bufio, block, &buf);
+		if (unlikely(IS_ERR(bbuf))) {
+			DMWARN_LIMIT("%s: FEC %llu: read failed (%llu): %ld",
+				     v->data_dev->name,
+				     (unsigned long long)rsb,
+				     (unsigned long long)block, PTR_ERR(bbuf));
+
+			/* assume the block is corrupted */
+			if (neras && *neras <= v->fec->roots)
+				fio->erasures[(*neras)++] = i;
+
+			continue;
+		}
+
+		/* locate erasures if the block is on the data device */
+		if (bufio == v->fec->data_bufio &&
+		    verity_hash_for_block(v, io, block, want_digest) == 0) {
+			/*
+			 * skip if we have already found the theoretical
+			 * maximum number (i.e. fec->roots) of erasures
+			 */
+			if (neras && *neras <= v->fec->roots &&
+			    fec_is_erasure(v, io, want_digest, bbuf))
+				fio->erasures[(*neras)++] = i;
+		}
+
+		/*
+		 * deinterleave and copy the bytes that fit into bufs,
+		 * starting from block_offset
+		 */
+		fec_for_each_buffer_rs_block(fio, n, j) {
+			k = fec_buffer_rs_index(n, j) + block_offset;
+
+			if (k >= 1 << v->data_dev_block_bits)
+				goto done;
+
+			rs_block = fec_buffer_rs_block(v, fio, n, j);
+			rs_block[i] = bbuf[k];
+		}
+done:
+		dm_bufio_release(buf);
+	}
+
+	return target_index;
+}
+
+/*
+ * Allocate RS control structure and FEC buffers from preallocated mempools,
+ * and attempt to allocate as many extra buffers as available.
+ */
+static int fec_alloc_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio)
+{
+	unsigned n;
+
+	if (!fio->rs) {
+		fio->rs = mempool_alloc(v->fec->rs_pool, 0);
+		if (unlikely(!fio->rs)) {
+			DMERR("failed to allocate RS");
+			return -ENOMEM;
+		}
+	}
+
+	fec_for_each_prealloc_buffer(n) {
+		if (fio->bufs[n])
+			continue;
+
+		fio->bufs[n] = mempool_alloc(v->fec->prealloc_pool, GFP_NOIO);
+		if (unlikely(!fio->bufs[n])) {
+			DMERR("failed to allocate FEC buffer");
+			return -ENOMEM;
+		}
+	}
+
+	/* try to allocate the maximum number of buffers */
+	fec_for_each_extra_buffer(fio, n) {
+		if (fio->bufs[n])
+			continue;
+
+		fio->bufs[n] = mempool_alloc(v->fec->extra_pool, GFP_NOIO);
+		/* we can manage with even one buffer if necessary */
+		if (unlikely(!fio->bufs[n]))
+			break;
+	}
+	fio->nbufs = n;
+
+	if (!fio->output) {
+		fio->output = mempool_alloc(v->fec->output_pool, GFP_NOIO);
+
+		if (!fio->output) {
+			DMERR("failed to allocate FEC page");
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Initialize buffers and clear erasures. fec_read_bufs() assumes buffers are
+ * zeroed before deinterleaving.
+ */
+static void fec_init_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio)
+{
+	unsigned n;
+
+	fec_for_each_buffer(fio, n)
+		memset(fio->bufs[n], 0, v->fec->rsn << DM_VERITY_FEC_BUF_RS_BITS);
+
+	memset(fio->erasures, 0, sizeof(fio->erasures));
+}
+
+/*
+ * Decode all RS blocks in a single data block and return the target block
+ * (indicated by @offset) in fio->output. If @use_erasures is non-zero, uses
+ * hashes to locate erasures.
+ */
+static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io,
+			  struct dm_verity_fec_io *fio, u64 rsb, u64 offset,
+			  bool use_erasures)
+{
+	int r, neras = 0;
+	unsigned pos;
+
+	r = fec_alloc_bufs(v, fio);
+	if (unlikely(r < 0))
+		return r;
+
+	for (pos = 0; pos < 1 << v->data_dev_block_bits; ) {
+		fec_init_bufs(v, fio);
+
+		r = fec_read_bufs(v, io, rsb, offset, pos,
+				  use_erasures ? &neras : NULL);
+		if (unlikely(r < 0))
+			return r;
+
+		r = fec_decode_bufs(v, fio, rsb, r, pos, neras);
+		if (r < 0)
+			return r;
+
+		pos += fio->nbufs << DM_VERITY_FEC_BUF_RS_BITS;
+	}
+
+	/* Always re-validate the corrected block against the expected hash */
+	r = verity_hash(v, verity_io_hash_desc(v, io), fio->output,
+			1 << v->data_dev_block_bits,
+			verity_io_real_digest(v, io));
+	if (unlikely(r < 0))
+		return r;
+
+	if (memcmp(verity_io_real_digest(v, io), verity_io_want_digest(v, io),
+		   v->digest_size)) {
+		DMERR_LIMIT("%s: FEC %llu: failed to correct (%d erasures)",
+			    v->data_dev->name, (unsigned long long)rsb, neras);
+		return -EILSEQ;
+	}
+
+	return 0;
+}
+
+static int fec_bv_copy(struct dm_verity *v, struct dm_verity_io *io, u8 *data,
+		       size_t len)
+{
+	struct dm_verity_fec_io *fio = fec_io(io);
+
+	memcpy(data, &fio->output[fio->output_pos], len);
+	fio->output_pos += len;
+
+	return 0;
+}
+
+/*
+ * Correct errors in a block. Copies corrected block to dest if non-NULL,
+ * otherwise to a bio_vec starting from iter.
+ */
+int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
+		      enum verity_block_type type, sector_t block, u8 *dest,
+		      struct bvec_iter *iter)
+{
+	int r;
+	struct dm_verity_fec_io *fio = fec_io(io);
+	u64 offset, res, rsb;
+
+	if (!verity_fec_is_enabled(v))
+		return -EOPNOTSUPP;
+
+	if (type == DM_VERITY_BLOCK_TYPE_METADATA)
+		block += v->data_blocks;
+
+	/*
+	 * For RS(M, N), the continuous FEC data is divided into blocks of N
+	 * bytes. Since block size may not be divisible by N, the last block
+	 * is zero padded when decoding.
+	 *
+	 * Each byte of the block is covered by a different RS(M, N) code,
+	 * and each code is interleaved over N blocks to make it less likely
+	 * that bursty corruption will leave us in unrecoverable state.
+	 */
+
+	offset = block << v->data_dev_block_bits;
+
+	res = offset;
+	div64_u64(res, v->fec->rounds << v->data_dev_block_bits);
+
+	/*
+	 * The base RS block we can feed to the interleaver to find out all
+	 * blocks required for decoding.
+	 */
+	rsb = offset - res * (v->fec->rounds << v->data_dev_block_bits);
+
+	/*
+	 * Locating erasures is slow, so attempt to recover the block without
+	 * them first. Do a second attempt with erasures if the corruption is
+	 * bad enough.
+	 */
+	r = fec_decode_rsb(v, io, fio, rsb, offset, false);
+	if (r < 0) {
+		r = fec_decode_rsb(v, io, fio, rsb, offset, true);
+		if (r < 0)
+			return r;
+	}
+
+	if (dest)
+		memcpy(dest, fio->output, 1 << v->data_dev_block_bits);
+	else if (iter) {
+		fio->output_pos = 0;
+		r = verity_for_bv_block(v, io, iter, fec_bv_copy);
+	}
+
+	return r;
+}
+
+/*
+ * Clean up per-bio data.
+ */
+void verity_fec_finish_io(struct dm_verity_io *io)
+{
+	unsigned n;
+	struct dm_verity_fec *f = io->v->fec;
+	struct dm_verity_fec_io *fio = fec_io(io);
+
+	if (!verity_fec_is_enabled(io->v))
+		return;
+
+	mempool_free(fio->rs, f->rs_pool);
+
+	fec_for_each_prealloc_buffer(n)
+		mempool_free(fio->bufs[n], f->prealloc_pool);
+
+	fec_for_each_extra_buffer(fio, n)
+		mempool_free(fio->bufs[n], f->extra_pool);
+
+	mempool_free(fio->output, f->output_pool);
+}
+
+/*
+ * Initialize per-bio data.
+ */
+void verity_fec_init_io(struct dm_verity_io *io)
+{
+	struct dm_verity_fec_io *fio = fec_io(io);
+
+	if (!verity_fec_is_enabled(io->v))
+		return;
+
+	fio->rs = NULL;
+	memset(fio->bufs, 0, sizeof(fio->bufs));
+	fio->nbufs = 0;
+	fio->output = NULL;
+}
+
+/*
+ * Append feature arguments and values to the status table.
+ */
+unsigned verity_fec_status_table(struct dm_verity *v, unsigned sz,
+				 char *result, unsigned maxlen)
+{
+	if (!verity_fec_is_enabled(v))
+		return sz;
+
+	DMEMIT(" " DM_VERITY_OPT_FEC_DEV " %s "
+	       DM_VERITY_OPT_FEC_BLOCKS " %llu "
+	       DM_VERITY_OPT_FEC_START " %llu "
+	       DM_VERITY_OPT_FEC_ROOTS " %d",
+	       v->fec->dev->name,
+	       (unsigned long long)v->fec->blocks,
+	       (unsigned long long)v->fec->start,
+	       v->fec->roots);
+
+	return sz;
+}
+
+void verity_fec_dtr(struct dm_verity *v)
+{
+	struct dm_verity_fec *f = v->fec;
+
+	if (!verity_fec_is_enabled(v))
+		goto out;
+
+	mempool_destroy(f->rs_pool);
+	mempool_destroy(f->prealloc_pool);
+	mempool_destroy(f->extra_pool);
+	kmem_cache_destroy(f->cache);
+
+	if (f->data_bufio)
+		dm_bufio_client_destroy(f->data_bufio);
+	if (f->bufio)
+		dm_bufio_client_destroy(f->bufio);
+
+	if (f->dev)
+		dm_put_device(v->ti, f->dev);
+out:
+	kfree(f);
+	v->fec = NULL;
+}
+
+static void *fec_rs_alloc(gfp_t gfp_mask, void *pool_data)
+{
+	struct dm_verity *v = (struct dm_verity *)pool_data;
+
+	return init_rs(8, 0x11d, 0, 1, v->fec->roots);
+}
+
+static void fec_rs_free(void *element, void *pool_data)
+{
+	struct rs_control *rs = (struct rs_control *)element;
+
+	if (rs)
+		free_rs(rs);
+}
+
+bool verity_is_fec_opt_arg(const char *arg_name)
+{
+	return (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_DEV) ||
+		!strcasecmp(arg_name, DM_VERITY_OPT_FEC_BLOCKS) ||
+		!strcasecmp(arg_name, DM_VERITY_OPT_FEC_START) ||
+		!strcasecmp(arg_name, DM_VERITY_OPT_FEC_ROOTS));
+}
+
+int verity_fec_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v,
+			      unsigned *argc, const char *arg_name)
+{
+	int r;
+	struct dm_target *ti = v->ti;
+	const char *arg_value;
+	unsigned long long num_ll;
+	unsigned char num_c;
+	char dummy;
+
+	if (!*argc) {
+		ti->error = "FEC feature arguments require a value";
+		return -EINVAL;
+	}
+
+	arg_value = dm_shift_arg(as);
+	(*argc)--;
+
+	if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_DEV)) {
+		r = dm_get_device(ti, arg_value, FMODE_READ, &v->fec->dev);
+		if (r) {
+			ti->error = "FEC device lookup failed";
+			return r;
+		}
+
+	} else if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_BLOCKS)) {
+		if (sscanf(arg_value, "%llu%c", &num_ll, &dummy) != 1 ||
+		    ((sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT))
+		     >> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll)) {
+			ti->error = "Invalid " DM_VERITY_OPT_FEC_BLOCKS;
+			return -EINVAL;
+		}
+		v->fec->blocks = num_ll;
+
+	} else if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_START)) {
+		if (sscanf(arg_value, "%llu%c", &num_ll, &dummy) != 1 ||
+		    ((sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) >>
+		     (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll)) {
+			ti->error = "Invalid " DM_VERITY_OPT_FEC_START;
+			return -EINVAL;
+		}
+		v->fec->start = num_ll;
+
+	} else if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_ROOTS)) {
+		if (sscanf(arg_value, "%hhu%c", &num_c, &dummy) != 1 || !num_c ||
+		    num_c < (DM_VERITY_FEC_RSM - DM_VERITY_FEC_MAX_RSN) ||
+		    num_c > (DM_VERITY_FEC_RSM - DM_VERITY_FEC_MIN_RSN)) {
+			ti->error = "Invalid " DM_VERITY_OPT_FEC_ROOTS;
+			return -EINVAL;
+		}
+		v->fec->roots = num_c;
+
+	} else {
+		ti->error = "Unrecognized verity FEC feature request";
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * Allocate dm_verity_fec for v->fec. Must be called before verity_fec_ctr.
+ */
+int verity_fec_ctr_alloc(struct dm_verity *v)
+{
+	struct dm_verity_fec *f;
+
+	f = kzalloc(sizeof(struct dm_verity_fec), GFP_KERNEL);
+	if (!f) {
+		v->ti->error = "Cannot allocate FEC structure";
+		return -ENOMEM;
+	}
+	v->fec = f;
+
+	return 0;
+}
+
+/*
+ * Validate arguments and preallocate memory. Must be called after arguments
+ * have been parsed using verity_fec_parse_opt_args.
+ */
+int verity_fec_ctr(struct dm_verity *v)
+{
+	struct dm_verity_fec *f = v->fec;
+	struct dm_target *ti = v->ti;
+	u64 hash_blocks;
+
+	if (!verity_fec_is_enabled(v)) {
+		verity_fec_dtr(v);
+		return 0;
+	}
+
+	/*
+	 * FEC is computed over data blocks, possible metadata, and
+	 * hash blocks. In other words, FEC covers total of fec_blocks
+	 * blocks consisting of the following:
+	 *
+	 *  data blocks | hash blocks | metadata (optional)
+	 *
+	 * We allow metadata after hash blocks to support a use case
+	 * where all data is stored on the same device and FEC covers
+	 * the entire area.
+	 *
+	 * If metadata is included, we require it to be available on the
+	 * hash device after the hash blocks.
+	 */
+
+	hash_blocks = v->hash_blocks - v->hash_start;
+
+	/*
+	 * Require matching block sizes for data and hash devices for
+	 * simplicity.
+	 */
+	if (v->data_dev_block_bits != v->hash_dev_block_bits) {
+		ti->error = "Block sizes must match to use FEC";
+		return -EINVAL;
+	}
+
+	if (!f->roots) {
+		ti->error = "Missing " DM_VERITY_OPT_FEC_ROOTS;
+		return -EINVAL;
+	}
+	f->rsn = DM_VERITY_FEC_RSM - f->roots;
+
+	if (!f->blocks) {
+		ti->error = "Missing " DM_VERITY_OPT_FEC_BLOCKS;
+		return -EINVAL;
+	}
+
+	f->rounds = f->blocks;
+	if (sector_div(f->rounds, f->rsn))
+		f->rounds++;
+
+	/*
+	 * Due to optional metadata, f->blocks can be larger than
+	 * data_blocks and hash_blocks combined.
+	 */
+	if (f->blocks < v->data_blocks + hash_blocks || !f->rounds) {
+		ti->error = "Invalid " DM_VERITY_OPT_FEC_BLOCKS;
+		return -EINVAL;
+	}
+
+	/*
+	 * Metadata is accessed through the hash device, so we require
+	 * it to be large enough.
+	 */
+	f->hash_blocks = f->blocks - v->data_blocks;
+	if (dm_bufio_get_device_size(v->bufio) < f->hash_blocks) {
+		ti->error = "Hash device is too small for "
+			DM_VERITY_OPT_FEC_BLOCKS;
+		return -E2BIG;
+	}
+
+	f->bufio = dm_bufio_client_create(f->dev->bdev,
+					  1 << v->data_dev_block_bits,
+					  1, 0, NULL, NULL);
+	if (IS_ERR(f->bufio)) {
+		ti->error = "Cannot initialize FEC bufio client";
+		return PTR_ERR(f->bufio);
+	}
+
+	if (dm_bufio_get_device_size(f->bufio) <
+	    ((f->start + f->rounds * f->roots) >> v->data_dev_block_bits)) {
+		ti->error = "FEC device is too small";
+		return -E2BIG;
+	}
+
+	f->data_bufio = dm_bufio_client_create(v->data_dev->bdev,
+					       1 << v->data_dev_block_bits,
+					       1, 0, NULL, NULL);
+	if (IS_ERR(f->data_bufio)) {
+		ti->error = "Cannot initialize FEC data bufio client";
+		return PTR_ERR(f->data_bufio);
+	}
+
+	if (dm_bufio_get_device_size(f->data_bufio) < v->data_blocks) {
+		ti->error = "Data device is too small";
+		return -E2BIG;
+	}
+
+	/* Preallocate an rs_control structure for each worker thread */
+	f->rs_pool = mempool_create(num_online_cpus(), fec_rs_alloc,
+				    fec_rs_free, (void *) v);
+	if (!f->rs_pool) {
+		ti->error = "Cannot allocate RS pool";
+		return -ENOMEM;
+	}
+
+	f->cache = kmem_cache_create("dm_verity_fec_buffers",
+				     f->rsn << DM_VERITY_FEC_BUF_RS_BITS,
+				     0, 0, NULL);
+	if (!f->cache) {
+		ti->error = "Cannot create FEC buffer cache";
+		return -ENOMEM;
+	}
+
+	/* Preallocate DM_VERITY_FEC_BUF_PREALLOC buffers for each thread */
+	f->prealloc_pool = mempool_create_slab_pool(num_online_cpus() *
+						    DM_VERITY_FEC_BUF_PREALLOC,
+						    f->cache);
+	if (!f->prealloc_pool) {
+		ti->error = "Cannot allocate FEC buffer prealloc pool";
+		return -ENOMEM;
+	}
+
+	f->extra_pool = mempool_create_slab_pool(0, f->cache);
+	if (!f->extra_pool) {
+		ti->error = "Cannot allocate FEC buffer extra pool";
+		return -ENOMEM;
+	}
+
+	/* Preallocate an output buffer for each thread */
+	f->output_pool = mempool_create_kmalloc_pool(num_online_cpus(),
+						     1 << v->data_dev_block_bits);
+	if (!f->output_pool) {
+		ti->error = "Cannot allocate FEC output pool";
+		return -ENOMEM;
+	}
+
+	/* Reserve space for our per-bio data */
+	ti->per_bio_data_size += sizeof(struct dm_verity_fec_io);
+
+	return 0;
+}
diff --git a/drivers/md/dm-verity-fec.h b/drivers/md/dm-verity-fec.h
new file mode 100644
index 000000000000..7fa0298b995e
--- /dev/null
+++ b/drivers/md/dm-verity-fec.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2015 Google, Inc.
+ *
+ * Author: Sami Tolvanen <samitolvanen@google.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#ifndef DM_VERITY_FEC_H
+#define DM_VERITY_FEC_H
+
+#include "dm-verity.h"
+#include <linux/rslib.h>
+
+/* Reed-Solomon(M, N) parameters */
+#define DM_VERITY_FEC_RSM		255
+#define DM_VERITY_FEC_MAX_RSN		253
+#define DM_VERITY_FEC_MIN_RSN		231	/* ~10% space overhead */
+
+/* buffers for deinterleaving and decoding */
+#define DM_VERITY_FEC_BUF_PREALLOC	1	/* buffers to preallocate */
+#define DM_VERITY_FEC_BUF_RS_BITS	4	/* 1 << RS blocks per buffer */
+/* we need buffers for at most 1 << block size RS blocks */
+#define DM_VERITY_FEC_BUF_MAX \
+	(1 << (PAGE_SHIFT - DM_VERITY_FEC_BUF_RS_BITS))
+
+#define DM_VERITY_OPT_FEC_DEV		"use_fec_from_device"
+#define DM_VERITY_OPT_FEC_BLOCKS	"fec_blocks"
+#define DM_VERITY_OPT_FEC_START		"fec_start"
+#define DM_VERITY_OPT_FEC_ROOTS		"fec_roots"
+
+/* configuration */
+struct dm_verity_fec {
+	struct dm_dev *dev;	/* parity data device */
+	struct dm_bufio_client *data_bufio;	/* for data dev access */
+	struct dm_bufio_client *bufio;		/* for parity data access */
+	sector_t start;		/* parity data start in blocks */
+	sector_t blocks;	/* number of blocks covered */
+	sector_t rounds;	/* number of interleaving rounds */
+	sector_t hash_blocks;	/* blocks covered after v->hash_start */
+	unsigned char roots;	/* number of parity bytes, M-N of RS(M, N) */
+	unsigned char rsn;	/* N of RS(M, N) */
+	mempool_t *rs_pool;	/* mempool for fio->rs */
+	mempool_t *prealloc_pool;	/* mempool for preallocated buffers */
+	mempool_t *extra_pool;	/* mempool for extra buffers */
+	mempool_t *output_pool;	/* mempool for output */
+	struct kmem_cache *cache;	/* cache for buffers */
+};
+
+/* per-bio data */
+struct dm_verity_fec_io {
+	struct rs_control *rs;	/* Reed-Solomon state */
+	int erasures[DM_VERITY_FEC_MAX_RSN];	/* erasures for decode_rs8 */
+	u8 *bufs[DM_VERITY_FEC_BUF_MAX];	/* bufs for deinterleaving */
+	unsigned nbufs;		/* number of buffers allocated */
+	u8 *output;		/* buffer for corrected output */
+	size_t output_pos;
+};
+
+#ifdef CONFIG_DM_VERITY_FEC
+
+/* each feature parameter requires a value */
+#define DM_VERITY_OPTS_FEC	8
+
+extern bool verity_fec_is_enabled(struct dm_verity *v);
+
+extern int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
+			     enum verity_block_type type, sector_t block,
+			     u8 *dest, struct bvec_iter *iter);
+
+extern unsigned verity_fec_status_table(struct dm_verity *v, unsigned sz,
+					char *result, unsigned maxlen);
+
+extern void verity_fec_finish_io(struct dm_verity_io *io);
+extern void verity_fec_init_io(struct dm_verity_io *io);
+
+extern bool verity_is_fec_opt_arg(const char *arg_name);
+extern int verity_fec_parse_opt_args(struct dm_arg_set *as,
+				     struct dm_verity *v, unsigned *argc,
+				     const char *arg_name);
+
+extern void verity_fec_dtr(struct dm_verity *v);
+
+extern int verity_fec_ctr_alloc(struct dm_verity *v);
+extern int verity_fec_ctr(struct dm_verity *v);
+
+#else /* !CONFIG_DM_VERITY_FEC */
+
+#define DM_VERITY_OPTS_FEC	0
+
+static inline bool verity_fec_is_enabled(struct dm_verity *v)
+{
+	return false;
+}
+
+static inline int verity_fec_decode(struct dm_verity *v,
+				    struct dm_verity_io *io,
+				    enum verity_block_type type,
+				    sector_t block, u8 *dest,
+				    struct bvec_iter *iter)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline unsigned verity_fec_status_table(struct dm_verity *v,
+					       unsigned sz, char *result,
+					       unsigned maxlen)
+{
+	return sz;
+}
+
+static inline void verity_fec_finish_io(struct dm_verity_io *io)
+{
+}
+
+static inline void verity_fec_init_io(struct dm_verity_io *io)
+{
+}
+
+static inline bool verity_is_fec_opt_arg(const char *arg_name)
+{
+	return false;
+}
+
+static inline int verity_fec_parse_opt_args(struct dm_arg_set *as,
+					    struct dm_verity *v,
+					    unsigned *argc,
+					    const char *arg_name)
+{
+	return -EINVAL;
+}
+
+static inline void verity_fec_dtr(struct dm_verity *v)
+{
+}
+
+static inline int verity_fec_ctr_alloc(struct dm_verity *v)
+{
+	return 0;
+}
+
+static inline int verity_fec_ctr(struct dm_verity *v)
+{
+	return 0;
+}
+
+#endif /* CONFIG_DM_VERITY_FEC */
+
+#endif /* DM_VERITY_FEC_H */
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 2b0ee52d1ad8..4f90ec2c6b7a 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -15,6 +15,7 @@
  */
 
 #include "dm-verity.h"
+#include "dm-verity-fec.h"
 
 #include <linux/module.h>
 #include <linux/reboot.h>
@@ -31,7 +32,7 @@
 #define DM_VERITY_OPT_LOGGING		"ignore_corruption"
 #define DM_VERITY_OPT_RESTART		"restart_on_corruption"
 
-#define DM_VERITY_OPTS_MAX		1
+#define DM_VERITY_OPTS_MAX		(1 + DM_VERITY_OPTS_FEC)
 
 static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE;
 
@@ -282,6 +283,10 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
 		if (likely(memcmp(verity_io_real_digest(v, io), want_digest,
 				  v->digest_size) == 0))
 			aux->hash_verified = 1;
+		else if (verity_fec_decode(v, io,
+					   DM_VERITY_BLOCK_TYPE_METADATA,
+					   hash_block, data, NULL) == 0)
+			aux->hash_verified = 1;
 		else if (verity_handle_err(v,
 					   DM_VERITY_BLOCK_TYPE_METADATA,
 					   hash_block)) {
@@ -411,8 +416,11 @@ static int verity_verify_io(struct dm_verity_io *io)
 		if (likely(memcmp(verity_io_real_digest(v, io),
 				  verity_io_want_digest(v, io), v->digest_size) == 0))
 			continue;
+		else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA,
+					   io->block + b, NULL, &start) == 0)
+			continue;
 		else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA,
-				io->block + b))
+					   io->block + b))
 			return -EIO;
 	}
 
@@ -430,6 +438,8 @@ static void verity_finish_io(struct dm_verity_io *io, int error)
 	bio->bi_end_io = io->orig_bi_end_io;
 	bio->bi_error = error;
 
+	verity_fec_finish_io(io);
+
 	bio_endio(bio);
 }
 
@@ -444,7 +454,7 @@ static void verity_end_io(struct bio *bio)
 {
 	struct dm_verity_io *io = bio->bi_private;
 
-	if (bio->bi_error) {
+	if (bio->bi_error && !verity_fec_is_enabled(io->v)) {
 		verity_finish_io(io, bio->bi_error);
 		return;
 	}
@@ -547,6 +557,8 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
 	bio->bi_private = io;
 	io->iter = bio->bi_iter;
 
+	verity_fec_init_io(io);
+
 	verity_submit_prefetch(v, io);
 
 	generic_make_request(bio);
@@ -561,6 +573,7 @@ static void verity_status(struct dm_target *ti, status_type_t type,
 			  unsigned status_flags, char *result, unsigned maxlen)
 {
 	struct dm_verity *v = ti->private;
+	unsigned args = 0;
 	unsigned sz = 0;
 	unsigned x;
 
@@ -587,8 +600,15 @@ static void verity_status(struct dm_target *ti, status_type_t type,
 		else
 			for (x = 0; x < v->salt_size; x++)
 				DMEMIT("%02x", v->salt[x]);
+		if (v->mode != DM_VERITY_MODE_EIO)
+			args++;
+		if (verity_fec_is_enabled(v))
+			args += DM_VERITY_OPTS_FEC;
+		if (!args)
+			return;
+		DMEMIT(" %u", args);
 		if (v->mode != DM_VERITY_MODE_EIO) {
-			DMEMIT(" 1 ");
+			DMEMIT(" ");
 			switch (v->mode) {
 			case DM_VERITY_MODE_LOGGING:
 				DMEMIT(DM_VERITY_OPT_LOGGING);
@@ -600,6 +620,7 @@ static void verity_status(struct dm_target *ti, status_type_t type,
 				BUG();
 			}
 		}
+		sz = verity_fec_status_table(v, sz, result, maxlen);
 		break;
 	}
 }
@@ -662,6 +683,8 @@ static void verity_dtr(struct dm_target *ti)
 	if (v->data_dev)
 		dm_put_device(ti, v->data_dev);
 
+	verity_fec_dtr(v);
+
 	kfree(v);
 }
 
@@ -694,6 +717,12 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v)
 		} else if (!strcasecmp(arg_name, DM_VERITY_OPT_RESTART)) {
 			v->mode = DM_VERITY_MODE_RESTART;
 			continue;
+
+		} else if (verity_is_fec_opt_arg(arg_name)) {
+			r = verity_fec_parse_opt_args(as, v, &argc, arg_name);
+			if (r)
+				return r;
+			continue;
 		}
 
 		ti->error = "Unrecognized verity feature request";
@@ -736,6 +765,10 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	ti->private = v;
 	v->ti = ti;
 
+	r = verity_fec_ctr_alloc(v);
+	if (r)
+		goto bad;
+
 	if ((dm_table_get_mode(ti->table) & ~FMODE_READ)) {
 		ti->error = "Device must be readonly";
 		r = -EINVAL;
@@ -924,8 +957,6 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
 		goto bad;
 	}
 
-	ti->per_bio_data_size = roundup(sizeof(struct dm_verity_io) + v->shash_descsize + v->digest_size * 2, __alignof__(struct dm_verity_io));
-
 	/* WQ_UNBOUND greatly improves performance when running on ramdisk */
 	v->verify_wq = alloc_workqueue("kverityd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus());
 	if (!v->verify_wq) {
@@ -934,6 +965,16 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
 		goto bad;
 	}
 
+	ti->per_bio_data_size = sizeof(struct dm_verity_io) +
+				v->shash_descsize + v->digest_size * 2;
+
+	r = verity_fec_ctr(v);
+	if (r)
+		goto bad;
+
+	ti->per_bio_data_size = roundup(ti->per_bio_data_size,
+					__alignof__(struct dm_verity_io));
+
 	return 0;
 
 bad:
@@ -944,7 +985,7 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
 
 static struct target_type verity_target = {
 	.name		= "verity",
-	.version	= {1, 2, 0},
+	.version	= {1, 3, 0},
 	.module		= THIS_MODULE,
 	.ctr		= verity_ctr,
 	.dtr		= verity_dtr,
diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h
index f5af52df8e38..8e853722f6c6 100644
--- a/drivers/md/dm-verity.h
+++ b/drivers/md/dm-verity.h
@@ -29,6 +29,8 @@ enum verity_block_type {
 	DM_VERITY_BLOCK_TYPE_METADATA
 };
 
+struct dm_verity_fec;
+
 struct dm_verity {
 	struct dm_dev *data_dev;
 	struct dm_dev *hash_dev;
@@ -58,6 +60,8 @@ struct dm_verity {
 
 	/* starting blocks for each tree level. 0 is the lowest level. */
 	sector_t hash_level_block[DM_VERITY_MAX_LEVELS];
+
+	struct dm_verity_fec *fec;	/* forward error correction */
 };
 
 struct dm_verity_io {
@@ -103,6 +107,12 @@ static inline u8 *verity_io_want_digest(struct dm_verity *v,
 	return (u8 *)(io + 1) + v->shash_descsize + v->digest_size;
 }
 
+static inline u8 *verity_io_digest_end(struct dm_verity *v,
+				       struct dm_verity_io *io)
+{
+	return verity_io_want_digest(v, io) + v->digest_size;
+}
+
 extern int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io,
 			       struct bvec_iter *iter,
 			       int (*process)(struct dm_verity *v,

From 4cd7f1ecf798206533dfc11279c9264198e5c3bf Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Thu, 3 Dec 2015 14:26:31 +0000
Subject: [PATCH 058/797] UPSTREAM: dm verity: add ignore_zero_blocks feature

If ignore_zero_blocks is enabled dm-verity will return zeroes for blocks
matching a zero hash without validating the content.

Change-Id: I728fa4b2586b29f2793ea5cb014289892819d249
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
(cherry picked from commit 0cc37c2df4fa0aa702f9662edce4b7ce12c86b7a)
---
 Documentation/device-mapper/verity.txt |  5 ++
 drivers/md/dm-verity-fec.c             |  8 ++-
 drivers/md/dm-verity-target.c          | 87 +++++++++++++++++++++++---
 drivers/md/dm-verity.h                 |  3 +-
 4 files changed, 93 insertions(+), 10 deletions(-)

diff --git a/Documentation/device-mapper/verity.txt b/Documentation/device-mapper/verity.txt
index d602c801ff59..89fd8f9a259f 100644
--- a/Documentation/device-mapper/verity.txt
+++ b/Documentation/device-mapper/verity.txt
@@ -79,6 +79,11 @@ restart_on_corruption
     not compatible with ignore_corruption and requires user space support to
     avoid restart loops.
 
+ignore_zero_blocks
+    Do not verify blocks that are expected to contain zeroes and always return
+    zeroes instead. This may be useful if the partition contains unused blocks
+    that are not guaranteed to contain zeroes.
+
 use_fec_from_device <fec_dev>
     Use forward error correction (FEC) to recover from corruption if hash
     verification fails. Use encoding data from the specified device. This
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index 88143d36a1d2..1cc10c4de701 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -205,6 +205,7 @@ static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io,
 			 u64 rsb, u64 target, unsigned block_offset,
 			 int *neras)
 {
+	bool is_zero;
 	int i, j, target_index = -1;
 	struct dm_buffer *buf;
 	struct dm_bufio_client *bufio;
@@ -264,7 +265,12 @@ static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io,
 
 		/* locate erasures if the block is on the data device */
 		if (bufio == v->fec->data_bufio &&
-		    verity_hash_for_block(v, io, block, want_digest) == 0) {
+		    verity_hash_for_block(v, io, block, want_digest,
+					  &is_zero) == 0) {
+			/* skip known zero blocks entirely */
+			if (is_zero)
+				continue;
+
 			/*
 			 * skip if we have already found the theoretical
 			 * maximum number (i.e. fec->roots) of erasures
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 4f90ec2c6b7a..5c5d30cb6ec5 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -31,8 +31,9 @@
 
 #define DM_VERITY_OPT_LOGGING		"ignore_corruption"
 #define DM_VERITY_OPT_RESTART		"restart_on_corruption"
+#define DM_VERITY_OPT_IGN_ZEROES	"ignore_zero_blocks"
 
-#define DM_VERITY_OPTS_MAX		(1 + DM_VERITY_OPTS_FEC)
+#define DM_VERITY_OPTS_MAX		(2 + DM_VERITY_OPTS_FEC)
 
 static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE;
 
@@ -309,10 +310,9 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
  * of the hash tree if necessary.
  */
 int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
-			  sector_t block, u8 *digest)
+			  sector_t block, u8 *digest, bool *is_zero)
 {
-	int i;
-	int r;
+	int r = 0, i;
 
 	if (likely(v->levels)) {
 		/*
@@ -324,7 +324,7 @@ int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
 		 */
 		r = verity_verify_level(v, io, block, 0, true, digest);
 		if (likely(r <= 0))
-			return r;
+			goto out;
 	}
 
 	memcpy(digest, v->root_digest, v->digest_size);
@@ -332,10 +332,15 @@ int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
 	for (i = v->levels - 1; i >= 0; i--) {
 		r = verity_verify_level(v, io, block, i, false, digest);
 		if (unlikely(r))
-			return r;
+			goto out;
 	}
+out:
+	if (!r && v->zero_digest)
+		*is_zero = !memcmp(v->zero_digest, digest, v->digest_size);
+	else
+		*is_zero = false;
 
-	return 0;
+	return r;
 }
 
 /*
@@ -382,11 +387,19 @@ static int verity_bv_hash_update(struct dm_verity *v, struct dm_verity_io *io,
 	return verity_hash_update(v, verity_io_hash_desc(v, io), data, len);
 }
 
+static int verity_bv_zero(struct dm_verity *v, struct dm_verity_io *io,
+			  u8 *data, size_t len)
+{
+	memset(data, 0, len);
+	return 0;
+}
+
 /*
  * Verify one "dm_verity_io" structure.
  */
 static int verity_verify_io(struct dm_verity_io *io)
 {
+	bool is_zero;
 	struct dm_verity *v = io->v;
 	struct bvec_iter start;
 	unsigned b;
@@ -396,10 +409,24 @@ static int verity_verify_io(struct dm_verity_io *io)
 		struct shash_desc *desc = verity_io_hash_desc(v, io);
 
 		r = verity_hash_for_block(v, io, io->block + b,
-					  verity_io_want_digest(v, io));
+					  verity_io_want_digest(v, io),
+					  &is_zero);
 		if (unlikely(r < 0))
 			return r;
 
+		if (is_zero) {
+			/*
+			 * If we expect a zero block, don't validate, just
+			 * return zeros.
+			 */
+			r = verity_for_bv_block(v, io, &io->iter,
+						verity_bv_zero);
+			if (unlikely(r < 0))
+				return r;
+
+			continue;
+		}
+
 		r = verity_hash_init(v, desc);
 		if (unlikely(r < 0))
 			return r;
@@ -604,6 +631,8 @@ static void verity_status(struct dm_target *ti, status_type_t type,
 			args++;
 		if (verity_fec_is_enabled(v))
 			args += DM_VERITY_OPTS_FEC;
+		if (v->zero_digest)
+			args++;
 		if (!args)
 			return;
 		DMEMIT(" %u", args);
@@ -620,6 +649,8 @@ static void verity_status(struct dm_target *ti, status_type_t type,
 				BUG();
 			}
 		}
+		if (v->zero_digest)
+			DMEMIT(" " DM_VERITY_OPT_IGN_ZEROES);
 		sz = verity_fec_status_table(v, sz, result, maxlen);
 		break;
 	}
@@ -671,6 +702,7 @@ static void verity_dtr(struct dm_target *ti)
 
 	kfree(v->salt);
 	kfree(v->root_digest);
+	kfree(v->zero_digest);
 
 	if (v->tfm)
 		crypto_free_shash(v->tfm);
@@ -688,6 +720,37 @@ static void verity_dtr(struct dm_target *ti)
 	kfree(v);
 }
 
+static int verity_alloc_zero_digest(struct dm_verity *v)
+{
+	int r = -ENOMEM;
+	struct shash_desc *desc;
+	u8 *zero_data;
+
+	v->zero_digest = kmalloc(v->digest_size, GFP_KERNEL);
+
+	if (!v->zero_digest)
+		return r;
+
+	desc = kmalloc(v->shash_descsize, GFP_KERNEL);
+
+	if (!desc)
+		return r; /* verity_dtr will free zero_digest */
+
+	zero_data = kzalloc(1 << v->data_dev_block_bits, GFP_KERNEL);
+
+	if (!zero_data)
+		goto out;
+
+	r = verity_hash(v, desc, zero_data, 1 << v->data_dev_block_bits,
+			v->zero_digest);
+
+out:
+	kfree(desc);
+	kfree(zero_data);
+
+	return r;
+}
+
 static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v)
 {
 	int r;
@@ -718,6 +781,14 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v)
 			v->mode = DM_VERITY_MODE_RESTART;
 			continue;
 
+		} else if (!strcasecmp(arg_name, DM_VERITY_OPT_IGN_ZEROES)) {
+			r = verity_alloc_zero_digest(v);
+			if (r) {
+				ti->error = "Cannot allocate zero digest";
+				return r;
+			}
+			continue;
+
 		} else if (verity_is_fec_opt_arg(arg_name)) {
 			r = verity_fec_parse_opt_args(as, v, &argc, arg_name);
 			if (r)
diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h
index 8e853722f6c6..fb419f422d73 100644
--- a/drivers/md/dm-verity.h
+++ b/drivers/md/dm-verity.h
@@ -40,6 +40,7 @@ struct dm_verity {
 	struct crypto_shash *tfm;
 	u8 *root_digest;	/* digest of the root block */
 	u8 *salt;		/* salt: its size is salt_size */
+	u8 *zero_digest;	/* digest for a zero block */
 	unsigned salt_size;
 	sector_t data_start;	/* data offset in 512-byte sectors */
 	sector_t hash_start;	/* hash start in blocks */
@@ -123,6 +124,6 @@ extern int verity_hash(struct dm_verity *v, struct shash_desc *desc,
 		       const u8 *data, size_t len, u8 *digest);
 
 extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
-				 sector_t block, u8 *digest);
+				 sector_t block, u8 *digest, bool *is_zero);
 
 #endif /* DM_VERITY_H */

From dc6fc413a1efb257a5947ca2412407297b92268a Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Wed, 16 Dec 2015 16:23:49 +0000
Subject: [PATCH 059/797] ANDROID: android: base-cfg: enable
 CONFIG_DM_VERITY_FEC

Bug: 21893453
Change-Id: Idd0dfe4e3e527df2eff2f0d734effc40dce294c7
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
(cherry picked from commit 9408350ed80005174918ce5147490035b2cf451b)
---
 android/configs/android-base.cfg | 1 +
 1 file changed, 1 insertion(+)

diff --git a/android/configs/android-base.cfg b/android/configs/android-base.cfg
index a4cffb1840d7..220ddb5304df 100644
--- a/android/configs/android-base.cfg
+++ b/android/configs/android-base.cfg
@@ -21,6 +21,7 @@ CONFIG_CGROUP_SCHED=y
 CONFIG_CP15_BARRIER_EMULATION=y
 CONFIG_DM_CRYPT=y
 CONFIG_DM_VERITY=y
+CONFIG_DM_VERITY_FEC=y
 CONFIG_EMBEDDED=y
 CONFIG_FB=y
 CONFIG_HIGH_RES_TIMERS=y

From 73a20ae3132fc35ba83b6c622fc60b8fc9fe7589 Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Wed, 30 Mar 2016 14:10:13 -0700
Subject: [PATCH 060/797] ANDROID: dm verity fec: add sysfs attribute
 fec/corrected

Add a sysfs entry that allows user space to determine whether dm-verity
has come across correctable errors on the underlying block device.

Bug: 22655252
Bug: 27928374
Change-Id: I80547a2aa944af2fb9ffde002650482877ade31b
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
(cherry picked from commit 7911fad5f0a2cf5afc2215657219a21e6630e001)
---
 drivers/md/dm-verity-fec.c | 45 +++++++++++++++++++++++++++++++++++++-
 drivers/md/dm-verity-fec.h |  3 +++
 2 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index 1cc10c4de701..ad10d6d8ed28 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -11,6 +11,7 @@
 
 #include "dm-verity-fec.h"
 #include <linux/math64.h>
+#include <linux/sysfs.h>
 
 #define DM_MSG_PREFIX	"verity-fec"
 
@@ -175,9 +176,11 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio,
 	if (r < 0 && neras)
 		DMERR_LIMIT("%s: FEC %llu: failed to correct: %d",
 			    v->data_dev->name, (unsigned long long)rsb, r);
-	else if (r > 0)
+	else if (r > 0) {
 		DMWARN_LIMIT("%s: FEC %llu: corrected %d errors",
 			     v->data_dev->name, (unsigned long long)rsb, r);
+		atomic_add_unless(&v->fec->corrected, 1, INT_MAX);
+	}
 
 	return r;
 }
@@ -548,6 +551,7 @@ unsigned verity_fec_status_table(struct dm_verity *v, unsigned sz,
 void verity_fec_dtr(struct dm_verity *v)
 {
 	struct dm_verity_fec *f = v->fec;
+	struct kobject *kobj = &f->kobj_holder.kobj;
 
 	if (!verity_fec_is_enabled(v))
 		goto out;
@@ -564,6 +568,12 @@ void verity_fec_dtr(struct dm_verity *v)
 
 	if (f->dev)
 		dm_put_device(v->ti, f->dev);
+
+	if (kobj->state_initialized) {
+		kobject_put(kobj);
+		wait_for_completion(dm_get_completion_from_kobject(kobj));
+	}
+
 out:
 	kfree(f);
 	v->fec = NULL;
@@ -652,6 +662,27 @@ int verity_fec_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v,
 	return 0;
 }
 
+static ssize_t corrected_show(struct kobject *kobj, struct kobj_attribute *attr,
+			      char *buf)
+{
+	struct dm_verity_fec *f = container_of(kobj, struct dm_verity_fec,
+					       kobj_holder.kobj);
+
+	return sprintf(buf, "%d\n", atomic_read(&f->corrected));
+}
+
+static struct kobj_attribute attr_corrected = __ATTR_RO(corrected);
+
+static struct attribute *fec_attrs[] = {
+	&attr_corrected.attr,
+	NULL
+};
+
+static struct kobj_type fec_ktype = {
+	.sysfs_ops = &kobj_sysfs_ops,
+	.default_attrs = fec_attrs
+};
+
 /*
  * Allocate dm_verity_fec for v->fec. Must be called before verity_fec_ctr.
  */
@@ -675,8 +706,10 @@ int verity_fec_ctr_alloc(struct dm_verity *v)
  */
 int verity_fec_ctr(struct dm_verity *v)
 {
+	int r;
 	struct dm_verity_fec *f = v->fec;
 	struct dm_target *ti = v->ti;
+	struct mapped_device *md = dm_table_get_md(ti->table);
 	u64 hash_blocks;
 
 	if (!verity_fec_is_enabled(v)) {
@@ -684,6 +717,16 @@ int verity_fec_ctr(struct dm_verity *v)
 		return 0;
 	}
 
+	/* Create a kobject and sysfs attributes */
+	init_completion(&f->kobj_holder.completion);
+
+	r = kobject_init_and_add(&f->kobj_holder.kobj, &fec_ktype,
+				 &disk_to_dev(dm_disk(md))->kobj, "%s", "fec");
+	if (r) {
+		ti->error = "Cannot create kobject";
+		return r;
+	}
+
 	/*
 	 * FEC is computed over data blocks, possible metadata, and
 	 * hash blocks. In other words, FEC covers total of fec_blocks
diff --git a/drivers/md/dm-verity-fec.h b/drivers/md/dm-verity-fec.h
index 7fa0298b995e..8c4bee052a73 100644
--- a/drivers/md/dm-verity-fec.h
+++ b/drivers/md/dm-verity-fec.h
@@ -12,6 +12,7 @@
 #ifndef DM_VERITY_FEC_H
 #define DM_VERITY_FEC_H
 
+#include "dm.h"
 #include "dm-verity.h"
 #include <linux/rslib.h>
 
@@ -48,6 +49,8 @@ struct dm_verity_fec {
 	mempool_t *extra_pool;	/* mempool for extra buffers */
 	mempool_t *output_pool;	/* mempool for output */
 	struct kmem_cache *cache;	/* cache for buffers */
+	atomic_t corrected;		/* corrected errors */
+	struct dm_kobject_holder kobj_holder;	/* for sysfs attributes */
 };
 
 /* per-bio data */

From 02018626853b6dd6dee77d1e1befeaf2d6a8098e Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor@chromium.org>
Date: Wed, 4 Feb 2015 13:54:48 -0800
Subject: [PATCH 061/797] cpufreq: interactive: fix policy locking

cpufreq_interactive_speedchange_task() is running as a separate kernel
thread and is calling __cpufreq_driver_target(), which requires callers
to hold policy->rwsem for writing to prevent racing with other parts of
the kernel trying to adjust the frequency, for example kernel thermal
throttling. Let's change the code to take policy->rwsem and while at it
refactor the code a bit.

This was originally 2 changes reviewed at:
	https://chromium-review.googlesource.com/246273
	https://chromium-review.googlesource.com/256120

Change-Id: Icc2d97c6c1b929acd2ee32e8c81d81fd2af778ab
Signed-off-by: Dmitry Torokhov <dtor@chromium.org>
Reviewed-by: Dylan Reid <dgreid@chromium.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Dmitry Torokhov <dtor@google.com>
---
 drivers/cpufreq/cpufreq_interactive.c | 99 ++++++++++++++++-----------
 1 file changed, 60 insertions(+), 39 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c
index 0be66df4a6e6..5224e897d460 100644
--- a/drivers/cpufreq/cpufreq_interactive.c
+++ b/drivers/cpufreq/cpufreq_interactive.c
@@ -511,6 +511,58 @@ static void cpufreq_interactive_idle_end(void)
 	up_read(&pcpu->enable_sem);
 }
 
+static void cpufreq_interactive_get_policy_info(struct cpufreq_policy *policy,
+						unsigned int *pmax_freq,
+						u64 *phvt, u64 *pfvt)
+{
+	struct cpufreq_interactive_cpuinfo *pcpu;
+	unsigned int max_freq = 0;
+	u64 hvt = ~0ULL, fvt = 0;
+	unsigned int i;
+
+	for_each_cpu(i, policy->cpus) {
+		pcpu = &per_cpu(cpuinfo, i);
+
+		fvt = max(fvt, pcpu->loc_floor_val_time);
+		if (pcpu->target_freq > max_freq) {
+			max_freq = pcpu->target_freq;
+			hvt = pcpu->loc_hispeed_val_time;
+		} else if (pcpu->target_freq == max_freq) {
+			hvt = min(hvt, pcpu->loc_hispeed_val_time);
+		}
+	}
+
+	*pmax_freq = max_freq;
+	*phvt = hvt;
+	*pfvt = fvt;
+}
+
+static void cpufreq_interactive_adjust_cpu(unsigned int cpu,
+					   struct cpufreq_policy *policy)
+{
+	struct cpufreq_interactive_cpuinfo *pcpu;
+	u64 hvt, fvt;
+	unsigned int max_freq;
+	int i;
+
+	cpufreq_interactive_get_policy_info(policy, &max_freq, &hvt, &fvt);
+
+	for_each_cpu(i, policy->cpus) {
+		pcpu = &per_cpu(cpuinfo, i);
+		pcpu->pol_floor_val_time = fvt;
+	}
+
+	if (max_freq != policy->cur) {
+		__cpufreq_driver_target(policy, max_freq, CPUFREQ_RELATION_H);
+		for_each_cpu(i, policy->cpus) {
+			pcpu = &per_cpu(cpuinfo, i);
+			pcpu->pol_hispeed_val_time = hvt;
+		}
+	}
+
+	trace_cpufreq_interactive_setspeed(cpu, max_freq, policy->cur);
+}
+
 static int cpufreq_interactive_speedchange_task(void *data)
 {
 	unsigned int cpu;
@@ -539,49 +591,18 @@ static int cpufreq_interactive_speedchange_task(void *data)
 		spin_unlock_irqrestore(&speedchange_cpumask_lock, flags);
 
 		for_each_cpu(cpu, &tmp_mask) {
-			unsigned int j;
-			unsigned int max_freq = 0;
-			struct cpufreq_interactive_cpuinfo *pjcpu;
-			u64 hvt = ~0ULL, fvt = 0;
-
 			pcpu = &per_cpu(cpuinfo, cpu);
-			if (!down_read_trylock(&pcpu->enable_sem))
-				continue;
-			if (!pcpu->governor_enabled) {
+
+			down_write(&pcpu->policy->rwsem);
+
+			if (likely(down_read_trylock(&pcpu->enable_sem))) {
+				if (likely(pcpu->governor_enabled))
+					cpufreq_interactive_adjust_cpu(cpu,
+							pcpu->policy);
 				up_read(&pcpu->enable_sem);
-				continue;
 			}
 
-			for_each_cpu(j, pcpu->policy->cpus) {
-				pjcpu = &per_cpu(cpuinfo, j);
-
-				fvt = max(fvt, pjcpu->loc_floor_val_time);
-				if (pjcpu->target_freq > max_freq) {
-					max_freq = pjcpu->target_freq;
-					hvt = pjcpu->loc_hispeed_val_time;
-				} else if (pjcpu->target_freq == max_freq) {
-					hvt = min(hvt, pjcpu->loc_hispeed_val_time);
-				}
-			}
-			for_each_cpu(j, pcpu->policy->cpus) {
-				pjcpu = &per_cpu(cpuinfo, j);
-				pjcpu->pol_floor_val_time = fvt;
-			}
-
-			if (max_freq != pcpu->policy->cur) {
-				__cpufreq_driver_target(pcpu->policy,
-							max_freq,
-							CPUFREQ_RELATION_H);
-				for_each_cpu(j, pcpu->policy->cpus) {
-					pjcpu = &per_cpu(cpuinfo, j);
-					pjcpu->pol_hispeed_val_time = hvt;
-				}
-			}
-			trace_cpufreq_interactive_setspeed(cpu,
-						     pcpu->target_freq,
-						     pcpu->policy->cur);
-
-			up_read(&pcpu->enable_sem);
+			up_write(&pcpu->policy->rwsem);
 		}
 	}
 

From b2d18bdfa38e1f1d13f19aab9fc664b9dd3712b4 Mon Sep 17 00:00:00 2001
From: Daniel Kurtz <djkurtz@chromium.org>
Date: Thu, 28 May 2015 12:08:11 +0800
Subject: [PATCH 062/797] cpufreq: interactive: only apply interactive boost
 when enabled

Only apply the interactive boost when the interactive governor is
enabled.  This seems like the right thing to do.

This was originally reviewed on
	https://chromium-review.googlesource.com/273501

Change-Id: I5f4a7320683eada099f9a4253e3d6b0f03057fe8
Signed-off-by: Daniel Kurtz <djkurtz@chromium.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Dmitry Torokhov <dtor@google.com>
---
 drivers/cpufreq/cpufreq_interactive.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c
index 5224e897d460..bd83be39f17b 100644
--- a/drivers/cpufreq/cpufreq_interactive.c
+++ b/drivers/cpufreq/cpufreq_interactive.c
@@ -622,9 +622,20 @@ static void cpufreq_interactive_boost(struct cpufreq_interactive_tunables *tunab
 
 	for_each_online_cpu(i) {
 		pcpu = &per_cpu(cpuinfo, i);
-		if (tunables != pcpu->policy->governor_data)
+
+		if (!down_read_trylock(&pcpu->enable_sem))
 			continue;
 
+		if (!pcpu->governor_enabled) {
+			up_read(&pcpu->enable_sem);
+			continue;
+		}
+
+		if (tunables != pcpu->policy->governor_data) {
+			up_read(&pcpu->enable_sem);
+			continue;
+		}
+
 		spin_lock_irqsave(&pcpu->target_freq_lock, flags[1]);
 		if (pcpu->target_freq < tunables->hispeed_freq) {
 			pcpu->target_freq = tunables->hispeed_freq;
@@ -634,6 +645,8 @@ static void cpufreq_interactive_boost(struct cpufreq_interactive_tunables *tunab
 			anyboost = 1;
 		}
 		spin_unlock_irqrestore(&pcpu->target_freq_lock, flags[1]);
+
+		up_read(&pcpu->enable_sem);
 	}
 
 	spin_unlock_irqrestore(&speedchange_cpumask_lock, flags[0]);

From 48ad4abaff846bbe5e21a9d18b41c6f606ef84e4 Mon Sep 17 00:00:00 2001
From: Dmitry Shmidt <dimitrysh@google.com>
Date: Thu, 31 Mar 2016 13:21:09 -0700
Subject: [PATCH 063/797] android: base-cfg: Add CONFIG_INET_DIAG_DESTROY

Change-Id: I67430b05eca8fd520d2795d3db60faf2ec0fab9e
Signed-off-by: Dmitry Shmidt <dimitrysh@google.com>
---
 android/configs/android-base.cfg | 1 +
 1 file changed, 1 insertion(+)

diff --git a/android/configs/android-base.cfg b/android/configs/android-base.cfg
index 220ddb5304df..fa53af0c37ad 100644
--- a/android/configs/android-base.cfg
+++ b/android/configs/android-base.cfg
@@ -29,6 +29,7 @@ CONFIG_INET6_AH=y
 CONFIG_INET6_ESP=y
 CONFIG_INET6_IPCOMP=y
 CONFIG_INET=y
+CONFIG_INET_DIAG_DESTROY=y
 CONFIG_INET_ESP=y
 CONFIG_INET_XFRM_MODE_TUNNEL=y
 CONFIG_IP6_NF_FILTER=y

From 7c328c732fcb44a3eec9e8701610a526b3348e54 Mon Sep 17 00:00:00 2001
From: Mark Salyzyn <salyzyn@google.com>
Date: Thu, 28 Jan 2016 11:12:25 -0800
Subject: [PATCH 064/797] ANDROID: mmc: Add CONFIG_MMC_SIMULATE_MAX_SPEED

When CONFIG_MMC_SIMULATE_MAX_SPEED is enabled, Expose max_read_speed,
max_write_speed and cache_size default module parameters and sysfs
controls to simulate a slow eMMC device. Default values are 0 (off),
0 (off) and 4 MB respectively.

Signed-off-by: Mark Salyzyn <salyzyn@google.com>
Bug: 26976972
Change-Id: I342bfbd8b85f9b790e3f0e1e4e51a900ae07e05d
---
 Documentation/block/00-INDEX          |   6 +
 Documentation/block/mmc-max-speed.txt |  38 ++++
 drivers/mmc/card/Kconfig              |  12 ++
 drivers/mmc/card/block.c              | 300 ++++++++++++++++++++++++++
 drivers/mmc/card/queue.h              |   8 +
 5 files changed, 364 insertions(+)
 create mode 100644 Documentation/block/mmc-max-speed.txt

diff --git a/Documentation/block/00-INDEX b/Documentation/block/00-INDEX
index e840b47613f7..bc5148757edb 100644
--- a/Documentation/block/00-INDEX
+++ b/Documentation/block/00-INDEX
@@ -26,3 +26,9 @@ switching-sched.txt
 	- Switching I/O schedulers at runtime
 writeback_cache_control.txt
 	- Control of volatile write back caches
+mmc-max-speed.txt
+	- eMMC layer speed simulation, related to /sys/block/mmcblk*/
+          attributes:
+            max_read_speed
+            max_write_speed
+            cache_size
diff --git a/Documentation/block/mmc-max-speed.txt b/Documentation/block/mmc-max-speed.txt
new file mode 100644
index 000000000000..3f052b9fb999
--- /dev/null
+++ b/Documentation/block/mmc-max-speed.txt
@@ -0,0 +1,38 @@
+eMMC Block layer simulation speed controls in /sys/block/mmcblk*/
+===============================================
+
+Turned on with CONFIG_MMC_SIMULATE_MAX_SPEED which enables MMC device speed
+limiting. Used to test and simulate the behavior of the system when
+confronted with a slow MMC.
+
+Enables max_read_speed, max_write_speed and cache_size attributes and module
+default parameters to control the write or read maximum KB/second speed
+behaviors.
+
+NB: There is room for improving the algorithm for aspects tied directly to
+eMMC specific behavior. For instance, wear leveling and stalls from an
+exhausted erase pool. We would expect that if there was a need to provide
+similar speed simulation controls to other types of block devices, aspects of
+their behavior are modelled separately (e.g. head seek times, heat assist,
+shingling and rotational latency).
+
+/sys/block/mmcblk0/max_read_speed:
+
+Number of KB/second reads allowed to the block device. Used to test and
+simulate the behavior of the system when confronted with a slow reading MMC.
+Set to 0 or "off" to place no speed limit.
+
+/sys/block/mmcblk0/max_write_speed:
+
+Number of KB/second writes allowed to the block device. Used to test and
+simulate the behavior of the system when confronted with a slow writing MMC.
+Set to 0 or "off" to place no speed limit.
+
+/sys/block/mmcblk0/cache_size:
+
+Number of MB of high speed memory or high speed SLC cache expected on the
+eMMC device being simulated. Used to help simulate the write-back behavior
+more accurately. The assumption is the cache has no delay, but draws down
+in the background to the MLC/TLC primary store at the max_write_speed rate.
+Any write speed delays will show up when the cache is full, or when an I/O
+request to flush is issued.
diff --git a/drivers/mmc/card/Kconfig b/drivers/mmc/card/Kconfig
index 5562308699bc..6142ec1b9dfb 100644
--- a/drivers/mmc/card/Kconfig
+++ b/drivers/mmc/card/Kconfig
@@ -68,3 +68,15 @@ config MMC_TEST
 
 	  This driver is only of interest to those developing or
 	  testing a host driver. Most people should say N here.
+
+config MMC_SIMULATE_MAX_SPEED
+	bool "Turn on maximum speed control per block device"
+	depends on MMC_BLOCK
+	help
+	  Say Y here to enable MMC device speed limiting. Used to test and
+	  simulate the behavior of the system when confronted with a slow MMC.
+
+	  Enables max_read_speed, max_write_speed and cache_size attributes to
+	  control the write or read maximum KB/second speed behaviors.
+
+	  If unsure, say N here.
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index f2ce13ab7ae6..c15d3f380524 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -288,6 +288,250 @@ static ssize_t force_ro_store(struct device *dev, struct device_attribute *attr,
 	return ret;
 }
 
+#ifdef CONFIG_MMC_SIMULATE_MAX_SPEED
+
+static int max_read_speed, max_write_speed, cache_size = 4;
+
+module_param(max_read_speed, int, S_IRUSR | S_IRGRP);
+MODULE_PARM_DESC(max_read_speed, "maximum KB/s read speed 0=off");
+module_param(max_write_speed, int, S_IRUSR | S_IRGRP);
+MODULE_PARM_DESC(max_write_speed, "maximum KB/s write speed 0=off");
+module_param(cache_size, int, S_IRUSR | S_IRGRP);
+MODULE_PARM_DESC(cache_size, "MB high speed memory or SLC cache");
+
+/*
+ * helper macros and expectations:
+ *  size    - unsigned long number of bytes
+ *  jiffies - unsigned long HZ timestamp difference
+ *  speed   - unsigned KB/s transfer rate
+ */
+#define size_and_speed_to_jiffies(size, speed) \
+		((size) * HZ / (speed) / 1024UL)
+#define jiffies_and_speed_to_size(jiffies, speed) \
+		(((speed) * (jiffies) * 1024UL) / HZ)
+#define jiffies_and_size_to_speed(jiffies, size) \
+		((size) * HZ / (jiffies) / 1024UL)
+
+/* Limits to report warning */
+/* jiffies_and_size_to_speed(10*HZ, queue_max_hw_sectors(q) * 512UL) ~ 25 */
+#define MIN_SPEED(q) 250 /* 10 times faster than a floppy disk */
+#define MAX_SPEED(q) jiffies_and_size_to_speed(1, queue_max_sectors(q) * 512UL)
+
+#define speed_valid(speed) ((speed) > 0)
+
+static const char off[] = "off\n";
+
+static int max_speed_show(int speed, char *buf)
+{
+	if (speed)
+		return scnprintf(buf, PAGE_SIZE, "%uKB/s\n", speed);
+	else
+		return scnprintf(buf, PAGE_SIZE, off);
+}
+
+static int max_speed_store(const char *buf, struct request_queue *q)
+{
+	unsigned int limit, set = 0;
+
+	if (!strncasecmp(off, buf, sizeof(off) - 2))
+		return set;
+	if (kstrtouint(buf, 0, &set) || (set > INT_MAX))
+		return -EINVAL;
+	if (set == 0)
+		return set;
+	limit = MAX_SPEED(q);
+	if (set > limit)
+		pr_warn("max speed %u ineffective above %u\n", set, limit);
+	limit = MIN_SPEED(q);
+	if (set < limit)
+		pr_warn("max speed %u painful below %u\n", set, limit);
+	return set;
+}
+
+static ssize_t max_write_speed_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev));
+	int ret = max_speed_show(atomic_read(&md->queue.max_write_speed), buf);
+
+	mmc_blk_put(md);
+	return ret;
+}
+
+static ssize_t max_write_speed_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev));
+	int set = max_speed_store(buf, md->queue.queue);
+
+	if (set < 0) {
+		mmc_blk_put(md);
+		return set;
+	}
+
+	atomic_set(&md->queue.max_write_speed, set);
+	mmc_blk_put(md);
+	return count;
+}
+
+static const DEVICE_ATTR(max_write_speed, S_IRUGO | S_IWUSR,
+	max_write_speed_show, max_write_speed_store);
+
+static ssize_t max_read_speed_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev));
+	int ret = max_speed_show(atomic_read(&md->queue.max_read_speed), buf);
+
+	mmc_blk_put(md);
+	return ret;
+}
+
+static ssize_t max_read_speed_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev));
+	int set = max_speed_store(buf, md->queue.queue);
+
+	if (set < 0) {
+		mmc_blk_put(md);
+		return set;
+	}
+
+	atomic_set(&md->queue.max_read_speed, set);
+	mmc_blk_put(md);
+	return count;
+}
+
+static const DEVICE_ATTR(max_read_speed, S_IRUGO | S_IWUSR,
+	max_read_speed_show, max_read_speed_store);
+
+static ssize_t cache_size_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev));
+	struct mmc_queue *mq = &md->queue;
+	int cache_size = atomic_read(&mq->cache_size);
+	int ret;
+
+	if (!cache_size)
+		ret = scnprintf(buf, PAGE_SIZE, off);
+	else {
+		int speed = atomic_read(&mq->max_write_speed);
+
+		if (!speed_valid(speed))
+			ret = scnprintf(buf, PAGE_SIZE, "%uMB\n", cache_size);
+		else { /* We accept race between cache_jiffies and cache_used */
+			unsigned long size = jiffies_and_speed_to_size(
+				jiffies - mq->cache_jiffies, speed);
+			long used = atomic_long_read(&mq->cache_used);
+
+			if (size >= used)
+				size = 0;
+			else
+				size = (used - size) * 100 / cache_size
+					/ 1024UL / 1024UL;
+
+			ret = scnprintf(buf, PAGE_SIZE, "%uMB %lu%% used\n",
+				cache_size, size);
+		}
+	}
+
+	mmc_blk_put(md);
+	return ret;
+}
+
+static ssize_t cache_size_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct mmc_blk_data *md;
+	unsigned int set = 0;
+
+	if (strncasecmp(off, buf, sizeof(off) - 2)
+	 && (kstrtouint(buf, 0, &set) || (set > INT_MAX)))
+		return -EINVAL;
+
+	md = mmc_blk_get(dev_to_disk(dev));
+	atomic_set(&md->queue.cache_size, set);
+	mmc_blk_put(md);
+	return count;
+}
+
+static const DEVICE_ATTR(cache_size, S_IRUGO | S_IWUSR,
+	cache_size_show, cache_size_store);
+
+/* correct for write-back */
+static long mmc_blk_cache_used(struct mmc_queue *mq, unsigned long waitfor)
+{
+	long used = 0;
+	int speed = atomic_read(&mq->max_write_speed);
+
+	if (speed_valid(speed)) {
+		unsigned long size = jiffies_and_speed_to_size(
+					waitfor - mq->cache_jiffies, speed);
+		used = atomic_long_read(&mq->cache_used);
+
+		if (size >= used)
+			used = 0;
+		else
+			used -= size;
+	}
+
+	atomic_long_set(&mq->cache_used, used);
+	mq->cache_jiffies = waitfor;
+
+	return used;
+}
+
+static void mmc_blk_simulate_delay(
+	struct mmc_queue *mq,
+	struct request *req,
+	unsigned long waitfor)
+{
+	int max_speed;
+
+	if (!req)
+		return;
+
+	max_speed = (rq_data_dir(req) == READ)
+		? atomic_read(&mq->max_read_speed)
+		: atomic_read(&mq->max_write_speed);
+	if (speed_valid(max_speed)) {
+		unsigned long bytes = blk_rq_bytes(req);
+
+		if (rq_data_dir(req) != READ) {
+			int cache_size = atomic_read(&mq->cache_size);
+
+			if (cache_size) {
+				unsigned long size = cache_size * 1024L * 1024L;
+				long used = mmc_blk_cache_used(mq, waitfor);
+
+				used += bytes;
+				atomic_long_set(&mq->cache_used, used);
+				bytes = 0;
+				if (used > size)
+					bytes = used - size;
+			}
+		}
+		waitfor += size_and_speed_to_jiffies(bytes, max_speed);
+		if (time_is_after_jiffies(waitfor)) {
+			long msecs = jiffies_to_msecs(waitfor - jiffies);
+
+			if (likely(msecs > 0))
+				msleep(msecs);
+		}
+	}
+}
+
+#else
+
+#define mmc_blk_simulate_delay(mq, req, waitfor)
+
+#endif
+
 static int mmc_blk_open(struct block_device *bdev, fmode_t mode)
 {
 	struct mmc_blk_data *md = mmc_blk_get(bdev->bd_disk);
@@ -1264,6 +1508,23 @@ static int mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req)
 	if (ret)
 		ret = -EIO;
 
+#ifdef CONFIG_MMC_SIMULATE_MAX_SPEED
+	else if (atomic_read(&mq->cache_size)) {
+		long used = mmc_blk_cache_used(mq, jiffies);
+
+		if (used) {
+			int speed = atomic_read(&mq->max_write_speed);
+
+			if (speed_valid(speed)) {
+				unsigned long msecs = jiffies_to_msecs(
+					size_and_speed_to_jiffies(
+						used, speed));
+				if (msecs)
+					msleep(msecs);
+			}
+		}
+	}
+#endif
 	blk_end_request_all(req, ret);
 
 	return ret ? 0 : 1;
@@ -1943,6 +2204,9 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
 	struct mmc_async_req *areq;
 	const u8 packed_nr = 2;
 	u8 reqs = 0;
+#ifdef CONFIG_MMC_SIMULATE_MAX_SPEED
+	unsigned long waitfor = jiffies;
+#endif
 
 	if (!rqc && !mq->mqrq_prev->req)
 		return 0;
@@ -1993,6 +2257,8 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
 			 */
 			mmc_blk_reset_success(md, type);
 
+			mmc_blk_simulate_delay(mq, rqc, waitfor);
+
 			if (mmc_packed_cmd(mq_rq->cmd_type)) {
 				ret = mmc_blk_end_packed_req(mq_rq);
 				break;
@@ -2411,6 +2677,14 @@ static void mmc_blk_remove_req(struct mmc_blk_data *md)
 					card->ext_csd.boot_ro_lockable)
 				device_remove_file(disk_to_dev(md->disk),
 					&md->power_ro_lock);
+#ifdef CONFIG_MMC_SIMULATE_MAX_SPEED
+			device_remove_file(disk_to_dev(md->disk),
+						&dev_attr_max_write_speed);
+			device_remove_file(disk_to_dev(md->disk),
+						&dev_attr_max_read_speed);
+			device_remove_file(disk_to_dev(md->disk),
+						&dev_attr_cache_size);
+#endif
 
 			del_gendisk(md->disk);
 		}
@@ -2446,6 +2720,24 @@ static int mmc_add_disk(struct mmc_blk_data *md)
 	ret = device_create_file(disk_to_dev(md->disk), &md->force_ro);
 	if (ret)
 		goto force_ro_fail;
+#ifdef CONFIG_MMC_SIMULATE_MAX_SPEED
+	atomic_set(&md->queue.max_write_speed, max_write_speed);
+	ret = device_create_file(disk_to_dev(md->disk),
+			&dev_attr_max_write_speed);
+	if (ret)
+		goto max_write_speed_fail;
+	atomic_set(&md->queue.max_read_speed, max_read_speed);
+	ret = device_create_file(disk_to_dev(md->disk),
+			&dev_attr_max_read_speed);
+	if (ret)
+		goto max_read_speed_fail;
+	atomic_set(&md->queue.cache_size, cache_size);
+	atomic_long_set(&md->queue.cache_used, 0);
+	md->queue.cache_jiffies = jiffies;
+	ret = device_create_file(disk_to_dev(md->disk), &dev_attr_cache_size);
+	if (ret)
+		goto cache_size_fail;
+#endif
 
 	if ((md->area_type & MMC_BLK_DATA_AREA_BOOT) &&
 	     card->ext_csd.boot_ro_lockable) {
@@ -2470,6 +2762,14 @@ static int mmc_add_disk(struct mmc_blk_data *md)
 	return ret;
 
 power_ro_lock_fail:
+#ifdef CONFIG_MMC_SIMULATE_MAX_SPEED
+	device_remove_file(disk_to_dev(md->disk), &dev_attr_cache_size);
+cache_size_fail:
+	device_remove_file(disk_to_dev(md->disk), &dev_attr_max_read_speed);
+max_read_speed_fail:
+	device_remove_file(disk_to_dev(md->disk), &dev_attr_max_write_speed);
+max_write_speed_fail:
+#endif
 	device_remove_file(disk_to_dev(md->disk), &md->force_ro);
 force_ro_fail:
 	del_gendisk(md->disk);
diff --git a/drivers/mmc/card/queue.h b/drivers/mmc/card/queue.h
index 36cddab57d77..d890d8832e21 100644
--- a/drivers/mmc/card/queue.h
+++ b/drivers/mmc/card/queue.h
@@ -58,6 +58,14 @@ struct mmc_queue {
 	struct mmc_queue_req	mqrq[2];
 	struct mmc_queue_req	*mqrq_cur;
 	struct mmc_queue_req	*mqrq_prev;
+#ifdef CONFIG_MMC_SIMULATE_MAX_SPEED
+	atomic_t max_write_speed;
+	atomic_t max_read_speed;
+	atomic_t cache_size;
+	/* i/o tracking */
+	atomic_long_t cache_used;
+	unsigned long cache_jiffies;
+#endif
 };
 
 extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *,

From 0871a32c4d1b28aa266be3f33f2068a193196ac7 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 11 Nov 2015 07:59:01 +0530
Subject: [PATCH 065/797] PM / OPP: Add debugfs support

This patch adds debugfs support to OPP layer to export OPPs and their
properties for all the devices.

This creates a top level directory: /sys/kernel/debug/opp and then
device specific directories (based on device names) inside it. For
example: 'cpu0', 'cpu1', etc..

If multiple devices share the OPP table, then the real directory is
created only for the first device. For all others, links are created to
the real directory.

Inside the device specific directory, a separate directory is created
for each OPP. And within that files per opp property.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit deaa51465105a7eda19a627b10372f4f7c51a4df)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/base/power/opp/Makefile  |   1 +
 drivers/base/power/opp/core.c    |  21 ++-
 drivers/base/power/opp/debugfs.c | 219 +++++++++++++++++++++++++++++++
 drivers/base/power/opp/opp.h     |  42 ++++++
 4 files changed, 281 insertions(+), 2 deletions(-)
 create mode 100644 drivers/base/power/opp/debugfs.c

diff --git a/drivers/base/power/opp/Makefile b/drivers/base/power/opp/Makefile
index 33c1e18c41a4..19837ef04d8e 100644
--- a/drivers/base/power/opp/Makefile
+++ b/drivers/base/power/opp/Makefile
@@ -1,2 +1,3 @@
 ccflags-$(CONFIG_DEBUG_DRIVER)	:= -DDEBUG
 obj-y				+= core.o cpu.o
+obj-$(CONFIG_DEBUG_FS)		+= debugfs.o
diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index b8e76f75073b..6aa172be6e8e 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -463,6 +463,7 @@ static void _kfree_list_dev_rcu(struct rcu_head *head)
 static void _remove_list_dev(struct device_list_opp *list_dev,
 			     struct device_opp *dev_opp)
 {
+	opp_debug_unregister(list_dev, dev_opp);
 	list_del(&list_dev->node);
 	call_srcu(&dev_opp->srcu_head.srcu, &list_dev->rcu_head,
 		  _kfree_list_dev_rcu);
@@ -472,6 +473,7 @@ struct device_list_opp *_add_list_dev(const struct device *dev,
 				      struct device_opp *dev_opp)
 {
 	struct device_list_opp *list_dev;
+	int ret;
 
 	list_dev = kzalloc(sizeof(*list_dev), GFP_KERNEL);
 	if (!list_dev)
@@ -481,6 +483,12 @@ struct device_list_opp *_add_list_dev(const struct device *dev,
 	list_dev->dev = dev;
 	list_add_rcu(&list_dev->node, &dev_opp->dev_list);
 
+	/* Create debugfs entries for the dev_opp */
+	ret = opp_debug_register(list_dev, dev_opp);
+	if (ret)
+		dev_err(dev, "%s: Failed to register opp debugfs (%d)\n",
+			__func__, ret);
+
 	return list_dev;
 }
 
@@ -596,6 +604,7 @@ static void _opp_remove(struct device_opp *dev_opp,
 	 */
 	if (notify)
 		srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_REMOVE, opp);
+	opp_debug_remove_one(opp);
 	list_del_rcu(&opp->node);
 	call_srcu(&dev_opp->srcu_head.srcu, &opp->rcu_head, _kfree_opp_rcu);
 
@@ -673,6 +682,7 @@ static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp,
 {
 	struct dev_pm_opp *opp;
 	struct list_head *head = &dev_opp->opp_list;
+	int ret;
 
 	/*
 	 * Insert new OPP in order of increasing frequency and discard if
@@ -703,6 +713,11 @@ static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp,
 	new_opp->dev_opp = dev_opp;
 	list_add_rcu(&new_opp->node, head);
 
+	ret = opp_debug_create_one(new_opp, dev_opp);
+	if (ret)
+		dev_err(dev, "%s: Failed to register opp to debugfs (%d)\n",
+			__func__, ret);
+
 	return 0;
 }
 
@@ -889,12 +904,14 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np)
 
 	/* OPP to select on device suspend */
 	if (of_property_read_bool(np, "opp-suspend")) {
-		if (dev_opp->suspend_opp)
+		if (dev_opp->suspend_opp) {
 			dev_warn(dev, "%s: Multiple suspend OPPs found (%lu %lu)\n",
 				 __func__, dev_opp->suspend_opp->rate,
 				 new_opp->rate);
-		else
+		} else {
+			new_opp->suspend = true;
 			dev_opp->suspend_opp = new_opp;
+		}
 	}
 
 	if (new_opp->clock_latency_ns > dev_opp->clock_latency_ns_max)
diff --git a/drivers/base/power/opp/debugfs.c b/drivers/base/power/opp/debugfs.c
new file mode 100644
index 000000000000..ddfe4773e922
--- /dev/null
+++ b/drivers/base/power/opp/debugfs.c
@@ -0,0 +1,219 @@
+/*
+ * Generic OPP debugfs interface
+ *
+ * Copyright (C) 2015-2016 Viresh Kumar <viresh.kumar@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/limits.h>
+
+#include "opp.h"
+
+static struct dentry *rootdir;
+
+static void opp_set_dev_name(const struct device *dev, char *name)
+{
+	if (dev->parent)
+		snprintf(name, NAME_MAX, "%s-%s", dev_name(dev->parent),
+			 dev_name(dev));
+	else
+		snprintf(name, NAME_MAX, "%s", dev_name(dev));
+}
+
+void opp_debug_remove_one(struct dev_pm_opp *opp)
+{
+	debugfs_remove_recursive(opp->dentry);
+}
+
+int opp_debug_create_one(struct dev_pm_opp *opp, struct device_opp *dev_opp)
+{
+	struct dentry *pdentry = dev_opp->dentry;
+	struct dentry *d;
+	char name[25];	/* 20 chars for 64 bit value + 5 (opp:\0) */
+
+	/* Rate is unique to each OPP, use it to give opp-name */
+	snprintf(name, sizeof(name), "opp:%lu", opp->rate);
+
+	/* Create per-opp directory */
+	d = debugfs_create_dir(name, pdentry);
+	if (!d)
+		return -ENOMEM;
+
+	if (!debugfs_create_bool("available", S_IRUGO, d, &opp->available))
+		return -ENOMEM;
+
+	if (!debugfs_create_bool("dynamic", S_IRUGO, d, &opp->dynamic))
+		return -ENOMEM;
+
+	if (!debugfs_create_bool("turbo", S_IRUGO, d, &opp->turbo))
+		return -ENOMEM;
+
+	if (!debugfs_create_bool("suspend", S_IRUGO, d, &opp->suspend))
+		return -ENOMEM;
+
+	if (!debugfs_create_ulong("rate_hz", S_IRUGO, d, &opp->rate))
+		return -ENOMEM;
+
+	if (!debugfs_create_ulong("u_volt_target", S_IRUGO, d, &opp->u_volt))
+		return -ENOMEM;
+
+	if (!debugfs_create_ulong("u_volt_min", S_IRUGO, d, &opp->u_volt_min))
+		return -ENOMEM;
+
+	if (!debugfs_create_ulong("u_volt_max", S_IRUGO, d, &opp->u_volt_max))
+		return -ENOMEM;
+
+	if (!debugfs_create_ulong("u_amp", S_IRUGO, d, &opp->u_amp))
+		return -ENOMEM;
+
+	if (!debugfs_create_ulong("clock_latency_ns", S_IRUGO, d,
+				  &opp->clock_latency_ns))
+		return -ENOMEM;
+
+	opp->dentry = d;
+	return 0;
+}
+
+static int device_opp_debug_create_dir(struct device_list_opp *list_dev,
+				       struct device_opp *dev_opp)
+{
+	const struct device *dev = list_dev->dev;
+	struct dentry *d;
+
+	opp_set_dev_name(dev, dev_opp->dentry_name);
+
+	/* Create device specific directory */
+	d = debugfs_create_dir(dev_opp->dentry_name, rootdir);
+	if (!d) {
+		dev_err(dev, "%s: Failed to create debugfs dir\n", __func__);
+		return -ENOMEM;
+	}
+
+	list_dev->dentry = d;
+	dev_opp->dentry = d;
+
+	return 0;
+}
+
+static int device_opp_debug_create_link(struct device_list_opp *list_dev,
+					struct device_opp *dev_opp)
+{
+	const struct device *dev = list_dev->dev;
+	char name[NAME_MAX];
+	struct dentry *d;
+
+	opp_set_dev_name(list_dev->dev, name);
+
+	/* Create device specific directory link */
+	d = debugfs_create_symlink(name, rootdir, dev_opp->dentry_name);
+	if (!d) {
+		dev_err(dev, "%s: Failed to create link\n", __func__);
+		return -ENOMEM;
+	}
+
+	list_dev->dentry = d;
+
+	return 0;
+}
+
+/**
+ * opp_debug_register - add a device opp node to the debugfs 'opp' directory
+ * @list_dev: list-dev pointer for device
+ * @dev_opp: the device-opp being added
+ *
+ * Dynamically adds device specific directory in debugfs 'opp' directory. If the
+ * device-opp is shared with other devices, then links will be created for all
+ * devices except the first.
+ *
+ * Return: 0 on success, otherwise negative error.
+ */
+int opp_debug_register(struct device_list_opp *list_dev,
+		       struct device_opp *dev_opp)
+{
+	if (!rootdir) {
+		pr_debug("%s: Uninitialized rootdir\n", __func__);
+		return -EINVAL;
+	}
+
+	if (dev_opp->dentry)
+		return device_opp_debug_create_link(list_dev, dev_opp);
+
+	return device_opp_debug_create_dir(list_dev, dev_opp);
+}
+
+static void opp_migrate_dentry(struct device_list_opp *list_dev,
+			       struct device_opp *dev_opp)
+{
+	struct device_list_opp *new_dev;
+	const struct device *dev;
+	struct dentry *dentry;
+
+	/* Look for next list-dev */
+	list_for_each_entry(new_dev, &dev_opp->dev_list, node)
+		if (new_dev != list_dev)
+			break;
+
+	/* new_dev is guaranteed to be valid here */
+	dev = new_dev->dev;
+	debugfs_remove_recursive(new_dev->dentry);
+
+	opp_set_dev_name(dev, dev_opp->dentry_name);
+
+	dentry = debugfs_rename(rootdir, list_dev->dentry, rootdir,
+				dev_opp->dentry_name);
+	if (!dentry) {
+		dev_err(dev, "%s: Failed to rename link from: %s to %s\n",
+			__func__, dev_name(list_dev->dev), dev_name(dev));
+		return;
+	}
+
+	new_dev->dentry = dentry;
+	dev_opp->dentry = dentry;
+}
+
+/**
+ * opp_debug_unregister - remove a device opp node from debugfs opp directory
+ * @list_dev: list-dev pointer for device
+ * @dev_opp: the device-opp being removed
+ *
+ * Dynamically removes device specific directory from debugfs 'opp' directory.
+ */
+void opp_debug_unregister(struct device_list_opp *list_dev,
+			  struct device_opp *dev_opp)
+{
+	if (list_dev->dentry == dev_opp->dentry) {
+		/* Move the real dentry object under another device */
+		if (!list_is_singular(&dev_opp->dev_list)) {
+			opp_migrate_dentry(list_dev, dev_opp);
+			goto out;
+		}
+		dev_opp->dentry = NULL;
+	}
+
+	debugfs_remove_recursive(list_dev->dentry);
+
+out:
+	list_dev->dentry = NULL;
+}
+
+static int __init opp_debug_init(void)
+{
+	/* Create /sys/kernel/debug/opp directory */
+	rootdir = debugfs_create_dir("opp", NULL);
+	if (!rootdir) {
+		pr_err("%s: Failed to create root directory\n", __func__);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+core_initcall(opp_debug_init);
diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h
index 7366b2aa8997..a6bd8d2c2b47 100644
--- a/drivers/base/power/opp/opp.h
+++ b/drivers/base/power/opp/opp.h
@@ -17,6 +17,7 @@
 #include <linux/device.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
+#include <linux/limits.h>
 #include <linux/pm_opp.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
@@ -53,6 +54,7 @@ extern struct mutex dev_opp_list_lock;
  * @dynamic:	not-created from static DT entries.
  * @available:	true/false - marks if this OPP as available or not
  * @turbo:	true if turbo (boost) OPP
+ * @suspend:	true if suspend OPP
  * @rate:	Frequency in hertz
  * @u_volt:	Target voltage in microvolts corresponding to this OPP
  * @u_volt_min:	Minimum voltage in microvolts corresponding to this OPP
@@ -63,6 +65,7 @@ extern struct mutex dev_opp_list_lock;
  * @dev_opp:	points back to the device_opp struct this opp belongs to
  * @rcu_head:	RCU callback head used for deferred freeing
  * @np:		OPP's device node.
+ * @dentry:	debugfs dentry pointer (per opp)
  *
  * This structure stores the OPP information for a given device.
  */
@@ -72,6 +75,7 @@ struct dev_pm_opp {
 	bool available;
 	bool dynamic;
 	bool turbo;
+	bool suspend;
 	unsigned long rate;
 
 	unsigned long u_volt;
@@ -84,6 +88,10 @@ struct dev_pm_opp {
 	struct rcu_head rcu_head;
 
 	struct device_node *np;
+
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *dentry;
+#endif
 };
 
 /**
@@ -91,6 +99,7 @@ struct dev_pm_opp {
  * @node:	list node
  * @dev:	device to which the struct object belongs
  * @rcu_head:	RCU callback head used for deferred freeing
+ * @dentry:	debugfs dentry pointer (per device)
  *
  * This is an internal data structure maintaining the list of devices that are
  * managed by 'struct device_opp'.
@@ -99,6 +108,10 @@ struct device_list_opp {
 	struct list_head node;
 	const struct device *dev;
 	struct rcu_head rcu_head;
+
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *dentry;
+#endif
 };
 
 /**
@@ -114,6 +127,8 @@ struct device_list_opp {
  * @opp_list:	list of opps
  * @np:		struct device_node pointer for opp's DT node.
  * @shared_opp: OPP is shared between multiple devices.
+ * @dentry:	debugfs dentry pointer of the real device directory (not links).
+ * @dentry_name: Name of the real dentry.
  *
  * This is an internal data structure maintaining the link to opps attached to
  * a device. This structure is not meant to be shared to users as it is
@@ -135,6 +150,11 @@ struct device_opp {
 	unsigned long clock_latency_ns_max;
 	bool shared_opp;
 	struct dev_pm_opp *suspend_opp;
+
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *dentry;
+	char dentry_name[NAME_MAX];
+#endif
 };
 
 /* Routines internal to opp core */
@@ -143,4 +163,26 @@ struct device_list_opp *_add_list_dev(const struct device *dev,
 				      struct device_opp *dev_opp);
 struct device_node *_of_get_opp_desc_node(struct device *dev);
 
+#ifdef CONFIG_DEBUG_FS
+void opp_debug_remove_one(struct dev_pm_opp *opp);
+int opp_debug_create_one(struct dev_pm_opp *opp, struct device_opp *dev_opp);
+int opp_debug_register(struct device_list_opp *list_dev,
+		       struct device_opp *dev_opp);
+void opp_debug_unregister(struct device_list_opp *list_dev,
+			  struct device_opp *dev_opp);
+#else
+static inline void opp_debug_remove_one(struct dev_pm_opp *opp) {}
+
+static inline int opp_debug_create_one(struct dev_pm_opp *opp,
+				       struct device_opp *dev_opp)
+{ return 0; }
+static inline int opp_debug_register(struct device_list_opp *list_dev,
+				     struct device_opp *dev_opp)
+{ return 0; }
+
+static inline void opp_debug_unregister(struct device_list_opp *list_dev,
+					struct device_opp *dev_opp)
+{ }
+#endif		/* DEBUG_FS */
+
 #endif		/* __DRIVER_OPP_H__ */

From 10f4691214f81fad8e5ed2273ac72c1b9a7e22cf Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 11 Nov 2015 08:10:54 +0530
Subject: [PATCH 066/797] PM / OPP: Add "opp-supported-hw" binding

We may want to enable only a subset of OPPs, from the bigger list of
OPPs, based on what version of the hardware we are running on. This
would enable us to not duplicate OPP tables for every version of the
hardware we support.

To enable that, this patch defines a new property 'opp-supported-hw'. It
can support any number of hierarchy levels of the versions the hardware
follows. And based on the selected hardware versions, we can pick only
the relevant OPPs at runtime.

Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit 1c4d12de2719dfdf27c6dab31e7a5641ee293c94)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 Documentation/devicetree/bindings/opp/opp.txt | 65 +++++++++++++++++++
 1 file changed, 65 insertions(+)

diff --git a/Documentation/devicetree/bindings/opp/opp.txt b/Documentation/devicetree/bindings/opp/opp.txt
index 0cb44dc21f97..d072fa0ffbd4 100644
--- a/Documentation/devicetree/bindings/opp/opp.txt
+++ b/Documentation/devicetree/bindings/opp/opp.txt
@@ -123,6 +123,26 @@ Optional properties:
 - opp-suspend: Marks the OPP to be used during device suspend. Only one OPP in
   the table should have this.
 
+- opp-supported-hw: This enables us to select only a subset of OPPs from the
+  larger OPP table, based on what version of the hardware we are running on. We
+  still can't have multiple nodes with the same opp-hz value in OPP table.
+
+  It's an user defined array containing a hierarchy of hardware version numbers,
+  supported by the OPP. For example: a platform with hierarchy of three levels
+  of versions (A, B and C), this field should be like <X Y Z>, where X
+  corresponds to Version hierarchy A, Y corresponds to version hierarchy B and Z
+  corresponds to version hierarchy C.
+
+  Each level of hierarchy is represented by a 32 bit value, and so there can be
+  only 32 different supported version per hierarchy. i.e. 1 bit per version. A
+  value of 0xFFFFFFFF will enable the OPP for all versions for that hierarchy
+  level. And a value of 0x00000000 will disable the OPP completely, and so we
+  never want that to happen.
+
+  If 32 values aren't sufficient for a version hierarchy, than that version
+  hierarchy can be contained in multiple 32 bit values. i.e. <X Y Z1 Z2> in the
+  above example, Z1 & Z2 refer to the version hierarchy Z.
+
 - status: Marks the node enabled/disabled.
 
 Example 1: Single cluster Dual-core ARM cortex A9, switch DVFS states together.
@@ -463,3 +483,48 @@ Example 5: Multiple OPP tables
 		};
 	};
 };
+
+Example 6: opp-supported-hw
+(example: three level hierarchy of versions: cuts, substrate and process)
+
+/ {
+	cpus {
+		cpu@0 {
+			compatible = "arm,cortex-a7";
+			...
+
+			cpu-supply = <&cpu_supply>
+			operating-points-v2 = <&cpu0_opp_table_slow>;
+		};
+	};
+
+	opp_table {
+		compatible = "operating-points-v2";
+		status = "okay";
+		opp-shared;
+
+		opp00 {
+			/*
+			 * Supports all substrate and process versions for 0xF
+			 * cuts, i.e. only first four cuts.
+			 */
+			opp-supported-hw = <0xF 0xFFFFFFFF 0xFFFFFFFF>
+			opp-hz = /bits/ 64 <600000000>;
+			opp-microvolt = <900000 915000 925000>;
+			...
+		};
+
+		opp01 {
+			/*
+			 * Supports:
+			 * - cuts: only one, 6th cut (represented by 6th bit).
+			 * - substrate: supports 16 different substrate versions
+			 * - process: supports 9 different process versions
+			 */
+			opp-supported-hw = <0x20 0xff0000ff 0x0000f4f0>
+			opp-hz = /bits/ 64 <800000000>;
+			opp-microvolt = <900000 915000 925000>;
+			...
+		};
+	};
+};

From 2470ae172511e862b67f3a3eaa5586f16da30741 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 11 Nov 2015 08:10:55 +0530
Subject: [PATCH 067/797] PM / OPP: Add {opp-microvolt|opp-microamp}-<name>
 binding

Depending on the version of hardware or its properties, which are only
known at runtime, various properties of the OPP can change. For example,
an OPP with frequency 1.2 GHz, may have different voltage/current
requirements based on the version of the hardware it is running on.

In order to not replicate the same OPP tables for varying values of all
such fields, this commit introduces the concept of opp-property-<name>.
The <name> can be chosen by the platform at runtime, and OPPs will be
initialized depending on that name string. Currently support is extended
for the following properties:
- opp-microvolt-<name>
- opp-microamp-<name>

If the name string isn't provided by the platform, or if it is provided
but doesn't match the properties present in the OPP node, we will fall
back to the original properties without the -<name> string, if they are
available.

Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit ffdb8cc7a27c89175e541e68e2a73f1f63ab8c6b)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 Documentation/devicetree/bindings/opp/opp.txt | 47 +++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/Documentation/devicetree/bindings/opp/opp.txt b/Documentation/devicetree/bindings/opp/opp.txt
index d072fa0ffbd4..a3e7f0d5e1fb 100644
--- a/Documentation/devicetree/bindings/opp/opp.txt
+++ b/Documentation/devicetree/bindings/opp/opp.txt
@@ -100,6 +100,14 @@ Optional properties:
   Entries for multiple regulators must be present in the same order as
   regulators are specified in device's DT node.
 
+- opp-microvolt-<name>: Named opp-microvolt property. This is exactly similar to
+  the above opp-microvolt property, but allows multiple voltage ranges to be
+  provided for the same OPP. At runtime, the platform can pick a <name> and
+  matching opp-microvolt-<name> property will be enabled for all OPPs. If the
+  platform doesn't pick a specific <name> or the <name> doesn't match with any
+  opp-microvolt-<name> properties, then opp-microvolt property shall be used, if
+  present.
+
 - opp-microamp: The maximum current drawn by the device in microamperes
   considering system specific parameters (such as transients, process, aging,
   maximum operating temperature range etc.) as necessary. This may be used to
@@ -112,6 +120,9 @@ Optional properties:
   for few regulators, then this should be marked as zero for them. If it isn't
   required for any regulator, then this property need not be present.
 
+- opp-microamp-<name>: Named opp-microamp property. Similar to
+  opp-microvolt-<name> property, but for microamp instead.
+
 - clock-latency-ns: Specifies the maximum possible transition latency (in
   nanoseconds) for switching to this OPP from any other OPP.
 
@@ -528,3 +539,39 @@ Example 6: opp-supported-hw
 		};
 	};
 };
+
+Example 7: opp-microvolt-<name>, opp-microamp-<name>:
+(example: device with two possible microvolt ranges: slow and fast)
+
+/ {
+	cpus {
+		cpu@0 {
+			compatible = "arm,cortex-a7";
+			...
+
+			operating-points-v2 = <&cpu0_opp_table>;
+		};
+	};
+
+	cpu0_opp_table: opp_table0 {
+		compatible = "operating-points-v2";
+		opp-shared;
+
+		opp00 {
+			opp-hz = /bits/ 64 <1000000000>;
+			opp-microvolt-slow = <900000 915000 925000>;
+			opp-microvolt-fast = <970000 975000 985000>;
+			opp-microamp-slow =  <70000>;
+			opp-microamp-fast =  <71000>;
+		};
+
+		opp01 {
+			opp-hz = /bits/ 64 <1200000000>;
+			opp-microvolt-slow = <900000 915000 925000>, /* Supply vcc0 */
+					      <910000 925000 935000>; /* Supply vcc1 */
+			opp-microvolt-fast = <970000 975000 985000>, /* Supply vcc0 */
+					     <960000 965000 975000>; /* Supply vcc1 */
+			opp-microamp =  <70000>; /* Will be used for both slow/fast */
+		};
+	};
+};

From 2e86d07d460dbb7949fceffdac7845395d22b4d6 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 11 Nov 2015 08:10:56 +0530
Subject: [PATCH 068/797] PM / OPP: Remove 'operating-points-names' binding

These aren't used until now by any DT files and wouldn't be used now as
we have a better scheme in place now, i.e. opp-property-<name>
properties.

Remove the (useless) binding without breaking ABI.

Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit af87a39a5f7cf6ef252b1aec3e2e6508a40e51f1)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 Documentation/devicetree/bindings/opp/opp.txt | 62 +------------------
 1 file changed, 2 insertions(+), 60 deletions(-)

diff --git a/Documentation/devicetree/bindings/opp/opp.txt b/Documentation/devicetree/bindings/opp/opp.txt
index a3e7f0d5e1fb..24eac9a97749 100644
--- a/Documentation/devicetree/bindings/opp/opp.txt
+++ b/Documentation/devicetree/bindings/opp/opp.txt
@@ -45,21 +45,10 @@ Devices supporting OPPs must set their "operating-points-v2" property with
 phandle to a OPP table in their DT node. The OPP core will use this phandle to
 find the operating points for the device.
 
-Devices may want to choose OPP tables at runtime and so can provide a list of
-phandles here. But only *one* of them should be chosen at runtime. This must be
-accompanied by a corresponding "operating-points-names" property, to uniquely
-identify the OPP tables.
-
 If required, this can be extended for SoC vendor specfic bindings. Such bindings
 should be documented as Documentation/devicetree/bindings/power/<vendor>-opp.txt
 and should have a compatible description like: "operating-points-v2-<vendor>".
 
-Optional properties:
-- operating-points-names: Names of OPP tables (required if multiple OPP
-  tables are present), to uniquely identify them. The same list must be present
-  for all the CPUs which are sharing clock/voltage rails and hence the OPP
-  tables.
-
 * OPP Table Node
 
 This describes the OPPs belonging to a device. This node can have following
@@ -448,54 +437,7 @@ Example 4: Handling multiple regulators
 	};
 };
 
-Example 5: Multiple OPP tables
-
-/ {
-	cpus {
-		cpu@0 {
-			compatible = "arm,cortex-a7";
-			...
-
-			cpu-supply = <&cpu_supply>
-			operating-points-v2 = <&cpu0_opp_table_slow>, <&cpu0_opp_table_fast>;
-			operating-points-names = "slow", "fast";
-		};
-	};
-
-	cpu0_opp_table_slow: opp_table_slow {
-		compatible = "operating-points-v2";
-		status = "okay";
-		opp-shared;
-
-		opp00 {
-			opp-hz = /bits/ 64 <600000000>;
-			...
-		};
-
-		opp01 {
-			opp-hz = /bits/ 64 <800000000>;
-			...
-		};
-	};
-
-	cpu0_opp_table_fast: opp_table_fast {
-		compatible = "operating-points-v2";
-		status = "okay";
-		opp-shared;
-
-		opp10 {
-			opp-hz = /bits/ 64 <1000000000>;
-			...
-		};
-
-		opp11 {
-			opp-hz = /bits/ 64 <1100000000>;
-			...
-		};
-	};
-};
-
-Example 6: opp-supported-hw
+Example 5: opp-supported-hw
 (example: three level hierarchy of versions: cuts, substrate and process)
 
 / {
@@ -540,7 +482,7 @@ Example 6: opp-supported-hw
 	};
 };
 
-Example 7: opp-microvolt-<name>, opp-microamp-<name>:
+Example 6: opp-microvolt-<name>, opp-microamp-<name>:
 (example: device with two possible microvolt ranges: slow and fast)
 
 / {

From 068d12e6dda9460ac0be82d25b9ec2dccbf7f197 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 11 Nov 2015 08:10:57 +0530
Subject: [PATCH 069/797] PM / OPP: Rename OPP nodes as opp@<opp-hz>

It would be better to name OPP nodes as opp@<opp-hz> as that will ensure
that multiple DT nodes don't contain the same frequency. Of course we
expect the writer to name the node with its opp-hz frequency and not any
other frequency.

And that will let the compile error out if multiple nodes are using the
same opp-hz frequency.

Suggested-by: Stephen Boyd <sboyd@codeaurora.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit 754dcf35f34698661801ae1d391efa02affe83a7)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 Documentation/devicetree/bindings/opp/opp.txt | 38 +++++++++----------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/Documentation/devicetree/bindings/opp/opp.txt b/Documentation/devicetree/bindings/opp/opp.txt
index 24eac9a97749..601256fe8c0d 100644
--- a/Documentation/devicetree/bindings/opp/opp.txt
+++ b/Documentation/devicetree/bindings/opp/opp.txt
@@ -177,20 +177,20 @@ Example 1: Single cluster Dual-core ARM cortex A9, switch DVFS states together.
 		compatible = "operating-points-v2";
 		opp-shared;
 
-		opp00 {
+		opp@1000000000 {
 			opp-hz = /bits/ 64 <1000000000>;
 			opp-microvolt = <970000 975000 985000>;
 			opp-microamp = <70000>;
 			clock-latency-ns = <300000>;
 			opp-suspend;
 		};
-		opp01 {
+		opp@1100000000 {
 			opp-hz = /bits/ 64 <1100000000>;
 			opp-microvolt = <980000 1000000 1010000>;
 			opp-microamp = <80000>;
 			clock-latency-ns = <310000>;
 		};
-		opp02 {
+		opp@1200000000 {
 			opp-hz = /bits/ 64 <1200000000>;
 			opp-microvolt = <1025000>;
 			clock-latency-ns = <290000>;
@@ -256,20 +256,20 @@ independently.
 		 * independently.
 		 */
 
-		opp00 {
+		opp@1000000000 {
 			opp-hz = /bits/ 64 <1000000000>;
 			opp-microvolt = <970000 975000 985000>;
 			opp-microamp = <70000>;
 			clock-latency-ns = <300000>;
 			opp-suspend;
 		};
-		opp01 {
+		opp@1100000000 {
 			opp-hz = /bits/ 64 <1100000000>;
 			opp-microvolt = <980000 1000000 1010000>;
 			opp-microamp = <80000>;
 			clock-latency-ns = <310000>;
 		};
-		opp02 {
+		opp@1200000000 {
 			opp-hz = /bits/ 64 <1200000000>;
 			opp-microvolt = <1025000>;
 			opp-microamp = <90000;
@@ -332,20 +332,20 @@ DVFS state together.
 		compatible = "operating-points-v2";
 		opp-shared;
 
-		opp00 {
+		opp@1000000000 {
 			opp-hz = /bits/ 64 <1000000000>;
 			opp-microvolt = <970000 975000 985000>;
 			opp-microamp = <70000>;
 			clock-latency-ns = <300000>;
 			opp-suspend;
 		};
-		opp01 {
+		opp@1100000000 {
 			opp-hz = /bits/ 64 <1100000000>;
 			opp-microvolt = <980000 1000000 1010000>;
 			opp-microamp = <80000>;
 			clock-latency-ns = <310000>;
 		};
-		opp02 {
+		opp@1200000000 {
 			opp-hz = /bits/ 64 <1200000000>;
 			opp-microvolt = <1025000>;
 			opp-microamp = <90000>;
@@ -358,20 +358,20 @@ DVFS state together.
 		compatible = "operating-points-v2";
 		opp-shared;
 
-		opp10 {
+		opp@1300000000 {
 			opp-hz = /bits/ 64 <1300000000>;
 			opp-microvolt = <1045000 1050000 1055000>;
 			opp-microamp = <95000>;
 			clock-latency-ns = <400000>;
 			opp-suspend;
 		};
-		opp11 {
+		opp@1400000000 {
 			opp-hz = /bits/ 64 <1400000000>;
 			opp-microvolt = <1075000>;
 			opp-microamp = <100000>;
 			clock-latency-ns = <400000>;
 		};
-		opp12 {
+		opp@1500000000 {
 			opp-hz = /bits/ 64 <1500000000>;
 			opp-microvolt = <1010000 1100000 1110000>;
 			opp-microamp = <95000>;
@@ -398,7 +398,7 @@ Example 4: Handling multiple regulators
 		compatible = "operating-points-v2";
 		opp-shared;
 
-		opp00 {
+		opp@1000000000 {
 			opp-hz = /bits/ 64 <1000000000>;
 			opp-microvolt = <970000>, /* Supply 0 */
 					<960000>, /* Supply 1 */
@@ -411,7 +411,7 @@ Example 4: Handling multiple regulators
 
 		/* OR */
 
-		opp00 {
+		opp@1000000000 {
 			opp-hz = /bits/ 64 <1000000000>;
 			opp-microvolt = <970000 975000 985000>, /* Supply 0 */
 					<960000 965000 975000>, /* Supply 1 */
@@ -424,7 +424,7 @@ Example 4: Handling multiple regulators
 
 		/* OR */
 
-		opp00 {
+		opp@1000000000 {
 			opp-hz = /bits/ 64 <1000000000>;
 			opp-microvolt = <970000 975000 985000>, /* Supply 0 */
 					<960000 965000 975000>, /* Supply 1 */
@@ -456,7 +456,7 @@ Example 5: opp-supported-hw
 		status = "okay";
 		opp-shared;
 
-		opp00 {
+		opp@600000000 {
 			/*
 			 * Supports all substrate and process versions for 0xF
 			 * cuts, i.e. only first four cuts.
@@ -467,7 +467,7 @@ Example 5: opp-supported-hw
 			...
 		};
 
-		opp01 {
+		opp@800000000 {
 			/*
 			 * Supports:
 			 * - cuts: only one, 6th cut (represented by 6th bit).
@@ -499,7 +499,7 @@ Example 6: opp-microvolt-<name>, opp-microamp-<name>:
 		compatible = "operating-points-v2";
 		opp-shared;
 
-		opp00 {
+		opp@1000000000 {
 			opp-hz = /bits/ 64 <1000000000>;
 			opp-microvolt-slow = <900000 915000 925000>;
 			opp-microvolt-fast = <970000 975000 985000>;
@@ -507,7 +507,7 @@ Example 6: opp-microvolt-<name>, opp-microamp-<name>:
 			opp-microamp-fast =  <71000>;
 		};
 
-		opp01 {
+		opp@1200000000 {
 			opp-hz = /bits/ 64 <1200000000>;
 			opp-microvolt-slow = <900000 915000 925000>, /* Supply vcc0 */
 					      <910000 925000 935000>; /* Supply vcc1 */

From d03b18f790c87a0ab9385ba33ba4f706630cc198 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 19 Nov 2015 09:13:56 +0530
Subject: [PATCH 070/797] PM / OPP: Add missing doc comments

Few doc-style comments were missing, add them. Rearrange another one to
match the sequence within the structure.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit dc4e7b1fa20a840d2317fcfdaa1064fc09d2afcb)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/base/power/opp/opp.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h
index a6bd8d2c2b47..b8880c7f8be1 100644
--- a/drivers/base/power/opp/opp.h
+++ b/drivers/base/power/opp/opp.h
@@ -51,8 +51,8 @@ extern struct mutex dev_opp_list_lock;
  *		are protected by the dev_opp_list_lock for integrity.
  *		IMPORTANT: the opp nodes should be maintained in increasing
  *		order.
- * @dynamic:	not-created from static DT entries.
  * @available:	true/false - marks if this OPP as available or not
+ * @dynamic:	not-created from static DT entries.
  * @turbo:	true if turbo (boost) OPP
  * @suspend:	true if suspend OPP
  * @rate:	Frequency in hertz
@@ -126,7 +126,9 @@ struct device_list_opp {
  * @dev_list:	list of devices that share these OPPs
  * @opp_list:	list of opps
  * @np:		struct device_node pointer for opp's DT node.
+ * @clock_latency_ns_max: Max clock latency in nanoseconds.
  * @shared_opp: OPP is shared between multiple devices.
+ * @suspend_opp: Pointer to OPP to be used during device suspend.
  * @dentry:	debugfs dentry pointer of the real device directory (not links).
  * @dentry_name: Name of the real dentry.
  *

From ae6a227802225358803c9749a703d11741229c86 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 9 Dec 2015 08:01:46 +0530
Subject: [PATCH 071/797] PM / OPP: Parse 'opp-supported-hw' binding

OPP bindings allow a platform to enable OPPs based on the version of the
hardware they are used for.

Add support to the OPP-core to parse these bindings, by introducing
dev_pm_opp_{set|put}_supported_hw() APIs.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Tested-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit 7de36b0aa51a5a59e28fb2da768fa3ab07de0674)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/base/power/opp/core.c | 148 ++++++++++++++++++++++++++++++++++
 drivers/base/power/opp/opp.h  |   5 ++
 include/linux/pm_opp.h        |  13 +++
 3 files changed, 166 insertions(+)

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index 6aa172be6e8e..55cf1a99b532 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -559,6 +559,9 @@ static void _remove_device_opp(struct device_opp *dev_opp)
 	if (!list_empty(&dev_opp->opp_list))
 		return;
 
+	if (dev_opp->supported_hw)
+		return;
+
 	list_dev = list_first_entry(&dev_opp->dev_list, struct device_list_opp,
 				    node);
 
@@ -833,6 +836,145 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev)
 	return 0;
 }
 
+/**
+ * dev_pm_opp_set_supported_hw() - Set supported platforms
+ * @dev: Device for which supported-hw has to be set.
+ * @versions: Array of hierarchy of versions to match.
+ * @count: Number of elements in the array.
+ *
+ * This is required only for the V2 bindings, and it enables a platform to
+ * specify the hierarchy of versions it supports. OPP layer will then enable
+ * OPPs, which are available for those versions, based on its 'opp-supported-hw'
+ * property.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions,
+				unsigned int count)
+{
+	struct device_opp *dev_opp;
+	int ret = 0;
+
+	/* Hold our list modification lock here */
+	mutex_lock(&dev_opp_list_lock);
+
+	dev_opp = _add_device_opp(dev);
+	if (!dev_opp) {
+		ret = -ENOMEM;
+		goto unlock;
+	}
+
+	/* Make sure there are no concurrent readers while updating dev_opp */
+	WARN_ON(!list_empty(&dev_opp->opp_list));
+
+	/* Do we already have a version hierarchy associated with dev_opp? */
+	if (dev_opp->supported_hw) {
+		dev_err(dev, "%s: Already have supported hardware list\n",
+			__func__);
+		ret = -EBUSY;
+		goto err;
+	}
+
+	dev_opp->supported_hw = kmemdup(versions, count * sizeof(*versions),
+					GFP_KERNEL);
+	if (!dev_opp->supported_hw) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	dev_opp->supported_hw_count = count;
+	mutex_unlock(&dev_opp_list_lock);
+	return 0;
+
+err:
+	_remove_device_opp(dev_opp);
+unlock:
+	mutex_unlock(&dev_opp_list_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_set_supported_hw);
+
+/**
+ * dev_pm_opp_put_supported_hw() - Releases resources blocked for supported hw
+ * @dev: Device for which supported-hw has to be set.
+ *
+ * This is required only for the V2 bindings, and is called for a matching
+ * dev_pm_opp_set_supported_hw(). Until this is called, the device_opp structure
+ * will not be freed.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+void dev_pm_opp_put_supported_hw(struct device *dev)
+{
+	struct device_opp *dev_opp;
+
+	/* Hold our list modification lock here */
+	mutex_lock(&dev_opp_list_lock);
+
+	/* Check for existing list for 'dev' first */
+	dev_opp = _find_device_opp(dev);
+	if (IS_ERR(dev_opp)) {
+		dev_err(dev, "Failed to find dev_opp: %ld\n", PTR_ERR(dev_opp));
+		goto unlock;
+	}
+
+	/* Make sure there are no concurrent readers while updating dev_opp */
+	WARN_ON(!list_empty(&dev_opp->opp_list));
+
+	if (!dev_opp->supported_hw) {
+		dev_err(dev, "%s: Doesn't have supported hardware list\n",
+			__func__);
+		goto unlock;
+	}
+
+	kfree(dev_opp->supported_hw);
+	dev_opp->supported_hw = NULL;
+	dev_opp->supported_hw_count = 0;
+
+	/* Try freeing device_opp if this was the last blocking resource */
+	_remove_device_opp(dev_opp);
+
+unlock:
+	mutex_unlock(&dev_opp_list_lock);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_put_supported_hw);
+
+static bool _opp_is_supported(struct device *dev, struct device_opp *dev_opp,
+			      struct device_node *np)
+{
+	unsigned int count = dev_opp->supported_hw_count;
+	u32 version;
+	int ret;
+
+	if (!dev_opp->supported_hw)
+		return true;
+
+	while (count--) {
+		ret = of_property_read_u32_index(np, "opp-supported-hw", count,
+						 &version);
+		if (ret) {
+			dev_warn(dev, "%s: failed to read opp-supported-hw property at index %d: %d\n",
+				 __func__, count, ret);
+			return false;
+		}
+
+		/* Both of these are bitwise masks of the versions */
+		if (!(version & dev_opp->supported_hw[count]))
+			return false;
+	}
+
+	return true;
+}
+
 /**
  * _opp_add_static_v2() - Allocate static OPPs (As per 'v2' DT bindings)
  * @dev:	device for which we do this operation
@@ -879,6 +1021,12 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np)
 		goto free_opp;
 	}
 
+	/* Check if the OPP supports hardware's hierarchy of versions or not */
+	if (!_opp_is_supported(dev, dev_opp, np)) {
+		dev_dbg(dev, "OPP not supported by hardware: %llu\n", rate);
+		goto free_opp;
+	}
+
 	/*
 	 * Rate is defined as an unsigned long in clk API, and so casting
 	 * explicitly to its type. Must be fixed once rate is 64 bit
diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h
index b8880c7f8be1..70f4564a6ab9 100644
--- a/drivers/base/power/opp/opp.h
+++ b/drivers/base/power/opp/opp.h
@@ -129,6 +129,8 @@ struct device_list_opp {
  * @clock_latency_ns_max: Max clock latency in nanoseconds.
  * @shared_opp: OPP is shared between multiple devices.
  * @suspend_opp: Pointer to OPP to be used during device suspend.
+ * @supported_hw: Array of version number to support.
+ * @supported_hw_count: Number of elements in supported_hw array.
  * @dentry:	debugfs dentry pointer of the real device directory (not links).
  * @dentry_name: Name of the real dentry.
  *
@@ -153,6 +155,9 @@ struct device_opp {
 	bool shared_opp;
 	struct dev_pm_opp *suspend_opp;
 
+	unsigned int *supported_hw;
+	unsigned int supported_hw_count;
+
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *dentry;
 	char dentry_name[NAME_MAX];
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 9a2e50337af9..3a85110242f0 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -55,6 +55,9 @@ int dev_pm_opp_enable(struct device *dev, unsigned long freq);
 int dev_pm_opp_disable(struct device *dev, unsigned long freq);
 
 struct srcu_notifier_head *dev_pm_opp_get_notifier(struct device *dev);
+int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions,
+				unsigned int count);
+void dev_pm_opp_put_supported_hw(struct device *dev);
 #else
 static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
 {
@@ -129,6 +132,16 @@ static inline struct srcu_notifier_head *dev_pm_opp_get_notifier(
 {
 	return ERR_PTR(-EINVAL);
 }
+
+static inline int dev_pm_opp_set_supported_hw(struct device *dev,
+					      const u32 *versions,
+					      unsigned int count)
+{
+	return -EINVAL;
+}
+
+static inline void dev_pm_opp_put_supported_hw(struct device *dev) {}
+
 #endif		/* CONFIG_PM_OPP */
 
 #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF)

From 6a0f8e913de021196bded343c6742fa0db34a1a5 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 9 Dec 2015 08:01:47 +0530
Subject: [PATCH 072/797] PM / OPP: Parse 'opp-<prop>-<name>' bindings

OPP bindings (for few properties) allow a platform to choose a
value/range among a set of available options. The options are present as
opp-<prop>-<name>, where the platform needs to supply the <name> string.

The OPP properties which allow such an option are: opp-microvolt and
opp-microamp.

Add support to the OPP-core to parse these bindings, by introducing
dev_pm_opp_{set|put}_prop_name() APIs.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Tested-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit 01fb4d3c39d35b725441e8a9a26b3f3ad67793ed)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/base/power/opp/core.c | 165 ++++++++++++++++++++++++++++++----
 drivers/base/power/opp/opp.h  |   2 +
 include/linux/pm_opp.h        |   9 ++
 3 files changed, 161 insertions(+), 15 deletions(-)

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index 55cf1a99b532..5c01fec1ed14 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -562,6 +562,9 @@ static void _remove_device_opp(struct device_opp *dev_opp)
 	if (dev_opp->supported_hw)
 		return;
 
+	if (dev_opp->prop_name)
+		return;
+
 	list_dev = list_first_entry(&dev_opp->dev_list, struct device_list_opp,
 				    node);
 
@@ -794,35 +797,48 @@ static int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt,
 }
 
 /* TODO: Support multiple regulators */
-static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev)
+static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
+			      struct device_opp *dev_opp)
 {
 	u32 microvolt[3] = {0};
 	u32 val;
 	int count, ret;
+	struct property *prop = NULL;
+	char name[NAME_MAX];
 
-	/* Missing property isn't a problem, but an invalid entry is */
-	if (!of_find_property(opp->np, "opp-microvolt", NULL))
-		return 0;
+	/* Search for "opp-microvolt-<name>" */
+	if (dev_opp->prop_name) {
+		sprintf(name, "opp-microvolt-%s", dev_opp->prop_name);
+		prop = of_find_property(opp->np, name, NULL);
+	}
 
-	count = of_property_count_u32_elems(opp->np, "opp-microvolt");
+	if (!prop) {
+		/* Search for "opp-microvolt" */
+		name[13] = '\0';
+		prop = of_find_property(opp->np, name, NULL);
+
+		/* Missing property isn't a problem, but an invalid entry is */
+		if (!prop)
+			return 0;
+	}
+
+	count = of_property_count_u32_elems(opp->np, name);
 	if (count < 0) {
-		dev_err(dev, "%s: Invalid opp-microvolt property (%d)\n",
-			__func__, count);
+		dev_err(dev, "%s: Invalid %s property (%d)\n",
+			__func__, name, count);
 		return count;
 	}
 
 	/* There can be one or three elements here */
 	if (count != 1 && count != 3) {
-		dev_err(dev, "%s: Invalid number of elements in opp-microvolt property (%d)\n",
-			__func__, count);
+		dev_err(dev, "%s: Invalid number of elements in %s property (%d)\n",
+			__func__, name, count);
 		return -EINVAL;
 	}
 
-	ret = of_property_read_u32_array(opp->np, "opp-microvolt", microvolt,
-					 count);
+	ret = of_property_read_u32_array(opp->np, name, microvolt, count);
 	if (ret) {
-		dev_err(dev, "%s: error parsing opp-microvolt: %d\n", __func__,
-			ret);
+		dev_err(dev, "%s: error parsing %s: %d\n", __func__, name, ret);
 		return -EINVAL;
 	}
 
@@ -830,7 +846,20 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev)
 	opp->u_volt_min = microvolt[1];
 	opp->u_volt_max = microvolt[2];
 
-	if (!of_property_read_u32(opp->np, "opp-microamp", &val))
+	/* Search for "opp-microamp-<name>" */
+	prop = NULL;
+	if (dev_opp->prop_name) {
+		sprintf(name, "opp-microamp-%s", dev_opp->prop_name);
+		prop = of_find_property(opp->np, name, NULL);
+	}
+
+	if (!prop) {
+		/* Search for "opp-microamp" */
+		name[12] = '\0';
+		prop = of_find_property(opp->np, name, NULL);
+	}
+
+	if (prop && !of_property_read_u32(opp->np, name, &val))
 		opp->u_amp = val;
 
 	return 0;
@@ -948,6 +977,112 @@ void dev_pm_opp_put_supported_hw(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_put_supported_hw);
 
+/**
+ * dev_pm_opp_set_prop_name() - Set prop-extn name
+ * @dev: Device for which the regulator has to be set.
+ * @name: name to postfix to properties.
+ *
+ * This is required only for the V2 bindings, and it enables a platform to
+ * specify the extn to be used for certain property names. The properties to
+ * which the extension will apply are opp-microvolt and opp-microamp. OPP core
+ * should postfix the property name with -<name> while looking for them.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+int dev_pm_opp_set_prop_name(struct device *dev, const char *name)
+{
+	struct device_opp *dev_opp;
+	int ret = 0;
+
+	/* Hold our list modification lock here */
+	mutex_lock(&dev_opp_list_lock);
+
+	dev_opp = _add_device_opp(dev);
+	if (!dev_opp) {
+		ret = -ENOMEM;
+		goto unlock;
+	}
+
+	/* Make sure there are no concurrent readers while updating dev_opp */
+	WARN_ON(!list_empty(&dev_opp->opp_list));
+
+	/* Do we already have a prop-name associated with dev_opp? */
+	if (dev_opp->prop_name) {
+		dev_err(dev, "%s: Already have prop-name %s\n", __func__,
+			dev_opp->prop_name);
+		ret = -EBUSY;
+		goto err;
+	}
+
+	dev_opp->prop_name = kstrdup(name, GFP_KERNEL);
+	if (!dev_opp->prop_name) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	mutex_unlock(&dev_opp_list_lock);
+	return 0;
+
+err:
+	_remove_device_opp(dev_opp);
+unlock:
+	mutex_unlock(&dev_opp_list_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_set_prop_name);
+
+/**
+ * dev_pm_opp_put_prop_name() - Releases resources blocked for prop-name
+ * @dev: Device for which the regulator has to be set.
+ *
+ * This is required only for the V2 bindings, and is called for a matching
+ * dev_pm_opp_set_prop_name(). Until this is called, the device_opp structure
+ * will not be freed.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+void dev_pm_opp_put_prop_name(struct device *dev)
+{
+	struct device_opp *dev_opp;
+
+	/* Hold our list modification lock here */
+	mutex_lock(&dev_opp_list_lock);
+
+	/* Check for existing list for 'dev' first */
+	dev_opp = _find_device_opp(dev);
+	if (IS_ERR(dev_opp)) {
+		dev_err(dev, "Failed to find dev_opp: %ld\n", PTR_ERR(dev_opp));
+		goto unlock;
+	}
+
+	/* Make sure there are no concurrent readers while updating dev_opp */
+	WARN_ON(!list_empty(&dev_opp->opp_list));
+
+	if (!dev_opp->prop_name) {
+		dev_err(dev, "%s: Doesn't have a prop-name\n", __func__);
+		goto unlock;
+	}
+
+	kfree(dev_opp->prop_name);
+	dev_opp->prop_name = NULL;
+
+	/* Try freeing device_opp if this was the last blocking resource */
+	_remove_device_opp(dev_opp);
+
+unlock:
+	mutex_unlock(&dev_opp_list_lock);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_put_prop_name);
+
 static bool _opp_is_supported(struct device *dev, struct device_opp *dev_opp,
 			      struct device_node *np)
 {
@@ -1042,7 +1177,7 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np)
 	if (!of_property_read_u32(np, "clock-latency-ns", &val))
 		new_opp->clock_latency_ns = val;
 
-	ret = opp_parse_supplies(new_opp, dev);
+	ret = opp_parse_supplies(new_opp, dev, dev_opp);
 	if (ret)
 		goto free_opp;
 
diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h
index 70f4564a6ab9..690638ef36ee 100644
--- a/drivers/base/power/opp/opp.h
+++ b/drivers/base/power/opp/opp.h
@@ -131,6 +131,7 @@ struct device_list_opp {
  * @suspend_opp: Pointer to OPP to be used during device suspend.
  * @supported_hw: Array of version number to support.
  * @supported_hw_count: Number of elements in supported_hw array.
+ * @prop_name: A name to postfix to many DT properties, while parsing them.
  * @dentry:	debugfs dentry pointer of the real device directory (not links).
  * @dentry_name: Name of the real dentry.
  *
@@ -157,6 +158,7 @@ struct device_opp {
 
 	unsigned int *supported_hw;
 	unsigned int supported_hw_count;
+	const char *prop_name;
 
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *dentry;
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 3a85110242f0..95403d2ccaf5 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -58,6 +58,8 @@ struct srcu_notifier_head *dev_pm_opp_get_notifier(struct device *dev);
 int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions,
 				unsigned int count);
 void dev_pm_opp_put_supported_hw(struct device *dev);
+int dev_pm_opp_set_prop_name(struct device *dev, const char *name);
+void dev_pm_opp_put_prop_name(struct device *dev);
 #else
 static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
 {
@@ -142,6 +144,13 @@ static inline int dev_pm_opp_set_supported_hw(struct device *dev,
 
 static inline void dev_pm_opp_put_supported_hw(struct device *dev) {}
 
+static inline int dev_pm_opp_set_prop_name(struct device *dev, const char *name)
+{
+	return -EINVAL;
+}
+
+static inline void dev_pm_opp_put_prop_name(struct device *dev) {}
+
 #endif		/* CONFIG_PM_OPP */
 
 #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF)

From ff7d038f3c5def395c4b06c156cca9947d4b5a02 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Date: Thu, 17 Dec 2015 19:04:38 +0100
Subject: [PATCH 073/797] PM / OPP: Fix parsing of opp-microvolt and
 opp-microamp properties

Commit 01fb4d3c39d3 ("PM / OPP: Parse 'opp-<prop>-<name>'
bindings") broke support for parsing standard opp-microvolt and
opp-microamp properties.  Fix it by setting 'name' string to
proper value for !prop cases.

Fixes: 01fb4d3c39d3 ("PM / OPP: Parse 'opp-<prop>-<name> 'bindings")
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit fd8d8e63467c922be9ae4452cca2980d473477d9)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/base/power/opp/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index 5c01fec1ed14..cd230c63aee6 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -814,7 +814,7 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
 
 	if (!prop) {
 		/* Search for "opp-microvolt" */
-		name[13] = '\0';
+		sprintf(name, "opp-microvolt");
 		prop = of_find_property(opp->np, name, NULL);
 
 		/* Missing property isn't a problem, but an invalid entry is */
@@ -855,7 +855,7 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
 
 	if (!prop) {
 		/* Search for "opp-microamp" */
-		name[12] = '\0';
+		sprintf(name, "opp-microamp");
 		prop = of_find_property(opp->np, name, NULL);
 	}
 

From cb8acf6df06ef9b4a708d34e169b420e52d0c941 Mon Sep 17 00:00:00 2001
From: Pi-Cheng Chen <pi-cheng.chen@linaro.org>
Date: Mon, 28 Dec 2015 21:06:17 +0800
Subject: [PATCH 074/797] PM / OPP: Set cpu_dev->id in cpumask first

Set cpu_dev->id in cpumask first when setting up cpumask for CPUs that
share the same OPP table. This might be helpful when handling cpumask
without the original CPU bitfield set.

Signed-off-by: Pi-Cheng Chen <pi-cheng.chen@linaro.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit d9de19b1cc013433ad293365b5b3902ec73dfd60)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/base/power/opp/cpu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c
index 7b445e88a0d5..9f0c15570f64 100644
--- a/drivers/base/power/opp/cpu.c
+++ b/drivers/base/power/opp/cpu.c
@@ -214,7 +214,6 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_add_table);
 /*
  * Works only for OPP v2 bindings.
  *
- * cpumask should be already set to mask of cpu_dev->id.
  * Returns -ENOENT if operating-points-v2 bindings aren't supported.
  */
 int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
@@ -230,6 +229,8 @@ int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask
 		return -ENOENT;
 	}
 
+	cpumask_set_cpu(cpu_dev->id, cpumask);
+
 	/* OPPs are shared ? */
 	if (!of_property_read_bool(np, "opp-shared"))
 		goto put_cpu_node;

From 59dfbb981e645f30e53a32222dd3bd478b242496 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 5 Jan 2016 16:15:54 +0530
Subject: [PATCH 075/797] PM / OPP: Use snprintf() instead of sprintf()

sprintf() can access memory outside of the range of the character array,
and is risky in some situations. The driver specified prop_name string
can be longer than NAME_MAX here (only an attacker will do that though)
and so blindly copying it into the character array of size NAME_MAX
isn't safe. Instead we must use snprintf() here.

Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit 5ff24d601092b222340b28466e263b1c4559407e)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/base/power/opp/core.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index cd230c63aee6..cf351d3dab1c 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -808,7 +808,8 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
 
 	/* Search for "opp-microvolt-<name>" */
 	if (dev_opp->prop_name) {
-		sprintf(name, "opp-microvolt-%s", dev_opp->prop_name);
+		snprintf(name, sizeof(name), "opp-microvolt-%s",
+			 dev_opp->prop_name);
 		prop = of_find_property(opp->np, name, NULL);
 	}
 
@@ -849,7 +850,8 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
 	/* Search for "opp-microamp-<name>" */
 	prop = NULL;
 	if (dev_opp->prop_name) {
-		sprintf(name, "opp-microamp-%s", dev_opp->prop_name);
+		snprintf(name, sizeof(name), "opp-microamp-%s",
+			 dev_opp->prop_name);
 		prop = of_find_property(opp->np, name, NULL);
 	}
 

From 5b9202b77c8d670f23581ddebd36ce12a619d9f2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 25 Jan 2016 16:45:48 +0100
Subject: [PATCH 076/797] cpufreq: cpufreq-dt: avoid uninitialized variable
 warnings:

gcc warns quite a bit about values returned from allocate_resources()
in cpufreq-dt.c:

cpufreq-dt.c: In function 'cpufreq_init':
cpufreq-dt.c:327:6: error: 'cpu_dev' may be used uninitialized in this function [-Werror=maybe-uninitialized]
cpufreq-dt.c:197:17: note: 'cpu_dev' was declared here
cpufreq-dt.c:376:2: error: 'cpu_clk' may be used uninitialized in this function [-Werror=maybe-uninitialized]
cpufreq-dt.c:199:14: note: 'cpu_clk' was declared here
cpufreq-dt.c: In function 'dt_cpufreq_probe':
cpufreq-dt.c:461:2: error: 'cpu_clk' may be used uninitialized in this function [-Werror=maybe-uninitialized]
cpufreq-dt.c:447:14: note: 'cpu_clk' was declared here

The problem is that it's slightly hard for gcc to follow return
codes across PTR_ERR() calls.

This patch uses explicit assignments to the "ret" variable to make
it easier for gcc to verify that the code is actually correct,
without the need to add a bogus initialization.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit b331bc20d9281213f7fb67912638e0fb5baeb324)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/cpufreq/cpufreq-dt.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 90d64081ddb3..68232fef54c9 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -141,15 +141,16 @@ static int allocate_resources(int cpu, struct device **cdev,
 
 try_again:
 	cpu_reg = regulator_get_optional(cpu_dev, reg);
-	if (IS_ERR(cpu_reg)) {
+	ret = PTR_ERR_OR_ZERO(cpu_reg);
+	if (ret) {
 		/*
 		 * If cpu's regulator supply node is present, but regulator is
 		 * not yet registered, we should try defering probe.
 		 */
-		if (PTR_ERR(cpu_reg) == -EPROBE_DEFER) {
+		if (ret == -EPROBE_DEFER) {
 			dev_dbg(cpu_dev, "cpu%d regulator not ready, retry\n",
 				cpu);
-			return -EPROBE_DEFER;
+			return ret;
 		}
 
 		/* Try with "cpu-supply" */
@@ -158,18 +159,16 @@ static int allocate_resources(int cpu, struct device **cdev,
 			goto try_again;
 		}
 
-		dev_dbg(cpu_dev, "no regulator for cpu%d: %ld\n",
-			cpu, PTR_ERR(cpu_reg));
+		dev_dbg(cpu_dev, "no regulator for cpu%d: %d\n", cpu, ret);
 	}
 
 	cpu_clk = clk_get(cpu_dev, NULL);
-	if (IS_ERR(cpu_clk)) {
+	ret = PTR_ERR_OR_ZERO(cpu_clk);
+	if (ret) {
 		/* put regulator */
 		if (!IS_ERR(cpu_reg))
 			regulator_put(cpu_reg);
 
-		ret = PTR_ERR(cpu_clk);
-
 		/*
 		 * If cpu's clk node is present, but clock is not yet
 		 * registered, we should try defering probe.

From c070696dd69afe9d5432fde66270d29e36da8da2 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 9 Feb 2016 10:30:33 +0530
Subject: [PATCH 077/797] PM / OPP: get/put regulators from OPP core

This allows the OPP core to request/free the regulator resource,
attached to a device OPP. The regulator device is fetched using the name
provided by the driver, while calling: dev_pm_opp_set_regulator().

This will work for both OPP-v1 and v2 bindings.

This is a preliminary step for moving the OPP switching logic into the
OPP core.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit 9f8ea969d5cfdd4353d2adb004e8e2286b984369)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/base/power/opp/core.c | 111 ++++++++++++++++++++++++++++++++++
 drivers/base/power/opp/opp.h  |   4 ++
 include/linux/pm_opp.h        |   9 +++
 3 files changed, 124 insertions(+)

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index cf351d3dab1c..1e22b71abf1e 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -19,6 +19,7 @@
 #include <linux/device.h>
 #include <linux/of.h>
 #include <linux/export.h>
+#include <linux/regulator/consumer.h>
 
 #include "opp.h"
 
@@ -565,6 +566,9 @@ static void _remove_device_opp(struct device_opp *dev_opp)
 	if (dev_opp->prop_name)
 		return;
 
+	if (!IS_ERR_OR_NULL(dev_opp->regulator))
+		return;
+
 	list_dev = list_first_entry(&dev_opp->dev_list, struct device_list_opp,
 				    node);
 
@@ -1085,6 +1089,113 @@ void dev_pm_opp_put_prop_name(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_put_prop_name);
 
+/**
+ * dev_pm_opp_set_regulator() - Set regulator name for the device
+ * @dev: Device for which regulator name is being set.
+ * @name: Name of the regulator.
+ *
+ * In order to support OPP switching, OPP layer needs to know the name of the
+ * device's regulator, as the core would be required to switch voltages as well.
+ *
+ * This must be called before any OPPs are initialized for the device.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+int dev_pm_opp_set_regulator(struct device *dev, const char *name)
+{
+	struct device_opp *dev_opp;
+	struct regulator *reg;
+	int ret;
+
+	mutex_lock(&dev_opp_list_lock);
+
+	dev_opp = _add_device_opp(dev);
+	if (!dev_opp) {
+		ret = -ENOMEM;
+		goto unlock;
+	}
+
+	/* This should be called before OPPs are initialized */
+	if (WARN_ON(!list_empty(&dev_opp->opp_list))) {
+		ret = -EBUSY;
+		goto err;
+	}
+
+	/* Already have a regulator set */
+	if (WARN_ON(!IS_ERR_OR_NULL(dev_opp->regulator))) {
+		ret = -EBUSY;
+		goto err;
+	}
+	/* Allocate the regulator */
+	reg = regulator_get_optional(dev, name);
+	if (IS_ERR(reg)) {
+		ret = PTR_ERR(reg);
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "%s: no regulator (%s) found: %d\n",
+				__func__, name, ret);
+		goto err;
+	}
+
+	dev_opp->regulator = reg;
+
+	mutex_unlock(&dev_opp_list_lock);
+	return 0;
+
+err:
+	_remove_device_opp(dev_opp);
+unlock:
+	mutex_unlock(&dev_opp_list_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_set_regulator);
+
+/**
+ * dev_pm_opp_put_regulator() - Releases resources blocked for regulator
+ * @dev: Device for which regulator was set.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+void dev_pm_opp_put_regulator(struct device *dev)
+{
+	struct device_opp *dev_opp;
+
+	mutex_lock(&dev_opp_list_lock);
+
+	/* Check for existing list for 'dev' first */
+	dev_opp = _find_device_opp(dev);
+	if (IS_ERR(dev_opp)) {
+		dev_err(dev, "Failed to find dev_opp: %ld\n", PTR_ERR(dev_opp));
+		goto unlock;
+	}
+
+	if (IS_ERR_OR_NULL(dev_opp->regulator)) {
+		dev_err(dev, "%s: Doesn't have regulator set\n", __func__);
+		goto unlock;
+	}
+
+	/* Make sure there are no concurrent readers while updating dev_opp */
+	WARN_ON(!list_empty(&dev_opp->opp_list));
+
+	regulator_put(dev_opp->regulator);
+	dev_opp->regulator = ERR_PTR(-EINVAL);
+
+	/* Try freeing device_opp if this was the last blocking resource */
+	_remove_device_opp(dev_opp);
+
+unlock:
+	mutex_unlock(&dev_opp_list_lock);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_put_regulator);
+
 static bool _opp_is_supported(struct device *dev, struct device_opp *dev_opp,
 			      struct device_node *np)
 {
diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h
index 690638ef36ee..416293b7da23 100644
--- a/drivers/base/power/opp/opp.h
+++ b/drivers/base/power/opp/opp.h
@@ -22,6 +22,8 @@
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 
+struct regulator;
+
 /* Lock to allow exclusive modification to the device and opp lists */
 extern struct mutex dev_opp_list_lock;
 
@@ -132,6 +134,7 @@ struct device_list_opp {
  * @supported_hw: Array of version number to support.
  * @supported_hw_count: Number of elements in supported_hw array.
  * @prop_name: A name to postfix to many DT properties, while parsing them.
+ * @regulator: Supply regulator
  * @dentry:	debugfs dentry pointer of the real device directory (not links).
  * @dentry_name: Name of the real dentry.
  *
@@ -159,6 +162,7 @@ struct device_opp {
 	unsigned int *supported_hw;
 	unsigned int supported_hw_count;
 	const char *prop_name;
+	struct regulator *regulator;
 
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *dentry;
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 95403d2ccaf5..c70a18ac9c8a 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -60,6 +60,8 @@ int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions,
 void dev_pm_opp_put_supported_hw(struct device *dev);
 int dev_pm_opp_set_prop_name(struct device *dev, const char *name);
 void dev_pm_opp_put_prop_name(struct device *dev);
+int dev_pm_opp_set_regulator(struct device *dev, const char *name);
+void dev_pm_opp_put_regulator(struct device *dev);
 #else
 static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
 {
@@ -151,6 +153,13 @@ static inline int dev_pm_opp_set_prop_name(struct device *dev, const char *name)
 
 static inline void dev_pm_opp_put_prop_name(struct device *dev) {}
 
+static inline int dev_pm_opp_set_regulator(struct device *dev, const char *name)
+{
+	return -EINVAL;
+}
+
+static inline void dev_pm_opp_put_regulator(struct device *dev) {}
+
 #endif		/* CONFIG_PM_OPP */
 
 #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF)

From c3fae6d33a5d32c439938410cbb1e89d8a70d918 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 9 Feb 2016 10:30:34 +0530
Subject: [PATCH 078/797] PM / OPP: Disable OPPs that aren't supported by the
 regulator

Disable any OPPs where the connected regulator isn't able to provide the
specified voltage.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit 7d34d56ef3349cd5f29cf7aab6650f3414fa81b9)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/base/power/opp/core.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index 1e22b71abf1e..71545becfca1 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -687,6 +687,22 @@ static struct dev_pm_opp *_allocate_opp(struct device *dev,
 	return opp;
 }
 
+static bool _opp_supported_by_regulators(struct dev_pm_opp *opp,
+					 struct device_opp *dev_opp)
+{
+	struct regulator *reg = dev_opp->regulator;
+
+	if (!IS_ERR(reg) &&
+	    !regulator_is_supported_voltage(reg, opp->u_volt_min,
+					    opp->u_volt_max)) {
+		pr_warn("%s: OPP minuV: %lu maxuV: %lu, not supported by regulator\n",
+			__func__, opp->u_volt_min, opp->u_volt_max);
+		return false;
+	}
+
+	return true;
+}
+
 static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp,
 		    struct device_opp *dev_opp)
 {
@@ -728,6 +744,12 @@ static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp,
 		dev_err(dev, "%s: Failed to register opp to debugfs (%d)\n",
 			__func__, ret);
 
+	if (!_opp_supported_by_regulators(new_opp, dev_opp)) {
+		new_opp->available = false;
+		dev_warn(dev, "%s: OPP not supported by regulators (%lu)\n",
+			 __func__, new_opp->rate);
+	}
+
 	return 0;
 }
 

From 525680c1a93d13d236e1eaecbdf360c32bfc6917 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 9 Feb 2016 10:30:35 +0530
Subject: [PATCH 079/797] PM / OPP: Introduce dev_pm_opp_get_max_volt_latency()

In few use cases (like: cpufreq), it is desired to get the maximum
voltage latency for changing OPPs. Add support for that.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit 655c9df961751ce21466f6e97e8033932c27a675)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/base/power/opp/core.c | 59 +++++++++++++++++++++++++++++++++++
 include/linux/pm_opp.h        |  6 ++++
 2 files changed, 65 insertions(+)

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index 71545becfca1..ffe2406af882 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -230,6 +230,65 @@ unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_clock_latency);
 
+/**
+ * dev_pm_opp_get_max_volt_latency() - Get max voltage latency in nanoseconds
+ * @dev: device for which we do this operation
+ *
+ * Return: This function returns the max voltage latency in nanoseconds.
+ *
+ * Locking: This function takes rcu_read_lock().
+ */
+unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev)
+{
+	struct device_opp *dev_opp;
+	struct dev_pm_opp *opp;
+	struct regulator *reg;
+	unsigned long latency_ns = 0;
+	unsigned long min_uV = ~0, max_uV = 0;
+	int ret;
+
+	rcu_read_lock();
+
+	dev_opp = _find_device_opp(dev);
+	if (IS_ERR(dev_opp)) {
+		rcu_read_unlock();
+		return 0;
+	}
+
+	reg = dev_opp->regulator;
+	if (IS_ERR_OR_NULL(reg)) {
+		/* Regulator may not be required for device */
+		if (reg)
+			dev_err(dev, "%s: Invalid regulator (%ld)\n", __func__,
+				PTR_ERR(reg));
+		rcu_read_unlock();
+		return 0;
+	}
+
+	list_for_each_entry_rcu(opp, &dev_opp->opp_list, node) {
+		if (!opp->available)
+			continue;
+
+		if (opp->u_volt_min < min_uV)
+			min_uV = opp->u_volt_min;
+		if (opp->u_volt_max > max_uV)
+			max_uV = opp->u_volt_max;
+	}
+
+	rcu_read_unlock();
+
+	/*
+	 * The caller needs to ensure that dev_opp (and hence the regulator)
+	 * isn't freed, while we are executing this routine.
+	 */
+	ret = regulator_set_voltage_time(reg, min_uV, max_uV);
+	if (ret > 0)
+		latency_ns = ret * 1000;
+
+	return latency_ns;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_volt_latency);
+
 /**
  * dev_pm_opp_get_suspend_opp() - Get suspend opp
  * @dev:	device for which we do this operation
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index c70a18ac9c8a..5daa43058ac1 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -34,6 +34,7 @@ bool dev_pm_opp_is_turbo(struct dev_pm_opp *opp);
 
 int dev_pm_opp_get_opp_count(struct device *dev);
 unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev);
+unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev);
 struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev);
 
 struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
@@ -88,6 +89,11 @@ static inline unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev)
 	return 0;
 }
 
+static inline unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev)
+{
+	return 0;
+}
+
 static inline struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev)
 {
 	return NULL;

From 2f5f3fb48d3e36e6855fb658f25ce088ac6fe277 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 9 Feb 2016 10:30:36 +0530
Subject: [PATCH 080/797] PM / OPP: Introduce
 dev_pm_opp_get_max_transition_latency()

In few use cases (like: cpufreq), it is desired to get the maximum
latency for changing OPPs. Add support for that.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit 2174344765f472895c076d703c9cdc58215e1393)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/base/power/opp/core.c | 17 +++++++++++++++++
 include/linux/pm_opp.h        |  6 ++++++
 2 files changed, 23 insertions(+)

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index ffe2406af882..b0f5c72f0fc3 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -289,6 +289,23 @@ unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_volt_latency);
 
+/**
+ * dev_pm_opp_get_max_transition_latency() - Get max transition latency in
+ *					     nanoseconds
+ * @dev: device for which we do this operation
+ *
+ * Return: This function returns the max transition latency, in nanoseconds, to
+ * switch from one OPP to other.
+ *
+ * Locking: This function takes rcu_read_lock().
+ */
+unsigned long dev_pm_opp_get_max_transition_latency(struct device *dev)
+{
+	return dev_pm_opp_get_max_volt_latency(dev) +
+		dev_pm_opp_get_max_clock_latency(dev);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_transition_latency);
+
 /**
  * dev_pm_opp_get_suspend_opp() - Get suspend opp
  * @dev:	device for which we do this operation
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 5daa43058ac1..59da3d9e11ea 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -35,6 +35,7 @@ bool dev_pm_opp_is_turbo(struct dev_pm_opp *opp);
 int dev_pm_opp_get_opp_count(struct device *dev);
 unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev);
 unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev);
+unsigned long dev_pm_opp_get_max_transition_latency(struct device *dev);
 struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev);
 
 struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
@@ -94,6 +95,11 @@ static inline unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev)
 	return 0;
 }
 
+static inline unsigned long dev_pm_opp_get_max_transition_latency(struct device *dev)
+{
+	return 0;
+}
+
 static inline struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev)
 {
 	return NULL;

From df8ba0ff4a9638731dbe1355b4b9fbf1b26fa5a9 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 9 Feb 2016 10:30:37 +0530
Subject: [PATCH 081/797] PM / OPP: Parse clock-latency and voltage-tolerance
 for v1 bindings

V2 bindings have better support for clock-latency and voltage-tolerance
and doesn't need special care. To use callbacks, like
dev_pm_opp_get_max_{transition|volt}_latency(), irrespective of the
bindings, the core needs to know clock-latency/voltage-tolerance for the
earlier bindings.

This patch reads clock-latency/voltage-tolerance from the device node,
irrespective of the bindings (to keep it simple) and use them only for
V1 bindings.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit 50f8cfbd5897ca182d43f4caf19937153f17a604)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/base/power/opp/core.c | 20 ++++++++++++++++++++
 drivers/base/power/opp/opp.h  |  6 ++++++
 2 files changed, 26 insertions(+)

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index b0f5c72f0fc3..4fafa733a1c7 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -582,6 +582,7 @@ static struct device_opp *_add_device_opp(struct device *dev)
 {
 	struct device_opp *dev_opp;
 	struct device_list_opp *list_dev;
+	struct device_node *np;
 
 	/* Check for existing list for 'dev' first */
 	dev_opp = _find_device_opp(dev);
@@ -604,6 +605,21 @@ static struct device_opp *_add_device_opp(struct device *dev)
 		return NULL;
 	}
 
+	/*
+	 * Only required for backward compatibility with v1 bindings, but isn't
+	 * harmful for other cases. And so we do it unconditionally.
+	 */
+	np = of_node_get(dev->of_node);
+	if (np) {
+		u32 val;
+
+		if (!of_property_read_u32(np, "clock-latency", &val))
+			dev_opp->clock_latency_ns_max = val;
+		of_property_read_u32(np, "voltage-tolerance",
+				     &dev_opp->voltage_tolerance_v1);
+		of_node_put(np);
+	}
+
 	srcu_init_notifier_head(&dev_opp->srcu_head);
 	INIT_LIST_HEAD(&dev_opp->opp_list);
 
@@ -861,6 +877,7 @@ static int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt,
 {
 	struct device_opp *dev_opp;
 	struct dev_pm_opp *new_opp;
+	unsigned long tol;
 	int ret;
 
 	/* Hold our list modification lock here */
@@ -874,7 +891,10 @@ static int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt,
 
 	/* populate the opp table */
 	new_opp->rate = freq;
+	tol = u_volt * dev_opp->voltage_tolerance_v1 / 100;
 	new_opp->u_volt = u_volt;
+	new_opp->u_volt_min = u_volt - tol;
+	new_opp->u_volt_max = u_volt + tol;
 	new_opp->available = true;
 	new_opp->dynamic = dynamic;
 
diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h
index 416293b7da23..fe44beb404ba 100644
--- a/drivers/base/power/opp/opp.h
+++ b/drivers/base/power/opp/opp.h
@@ -138,6 +138,8 @@ struct device_list_opp {
  * @dentry:	debugfs dentry pointer of the real device directory (not links).
  * @dentry_name: Name of the real dentry.
  *
+ * @voltage_tolerance_v1: In percentage, for v1 bindings only.
+ *
  * This is an internal data structure maintaining the link to opps attached to
  * a device. This structure is not meant to be shared to users as it is
  * meant for book keeping and private to OPP library.
@@ -156,6 +158,10 @@ struct device_opp {
 
 	struct device_node *np;
 	unsigned long clock_latency_ns_max;
+
+	/* For backward compatibility with v1 bindings */
+	unsigned int voltage_tolerance_v1;
+
 	bool shared_opp;
 	struct dev_pm_opp *suspend_opp;
 

From b7737cf7aaeb35319e61acdc0166a3a06e25bd82 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 9 Feb 2016 10:30:38 +0530
Subject: [PATCH 082/797] PM / OPP: Manage device clk

OPP core has got almost everything now to manage device's OPP
transitions, the only thing left is device's clk. Get that as well.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit d54974c2513f487e9e70fbdc79c5da51c53e23da)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/base/power/opp/core.c | 15 +++++++++++++++
 drivers/base/power/opp/opp.h  |  3 +++
 2 files changed, 18 insertions(+)

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index 4fafa733a1c7..7d7749ce1ce4 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -13,6 +13,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/clk.h>
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/slab.h>
@@ -583,6 +584,7 @@ static struct device_opp *_add_device_opp(struct device *dev)
 	struct device_opp *dev_opp;
 	struct device_list_opp *list_dev;
 	struct device_node *np;
+	int ret;
 
 	/* Check for existing list for 'dev' first */
 	dev_opp = _find_device_opp(dev);
@@ -620,6 +622,15 @@ static struct device_opp *_add_device_opp(struct device *dev)
 		of_node_put(np);
 	}
 
+	/* Find clk for the device */
+	dev_opp->clk = clk_get(dev, NULL);
+	if (IS_ERR(dev_opp->clk)) {
+		ret = PTR_ERR(dev_opp->clk);
+		if (ret != -EPROBE_DEFER)
+			dev_dbg(dev, "%s: Couldn't find clock: %d\n", __func__,
+				ret);
+	}
+
 	srcu_init_notifier_head(&dev_opp->srcu_head);
 	INIT_LIST_HEAD(&dev_opp->opp_list);
 
@@ -661,6 +672,10 @@ static void _remove_device_opp(struct device_opp *dev_opp)
 	if (!IS_ERR_OR_NULL(dev_opp->regulator))
 		return;
 
+	/* Release clk */
+	if (!IS_ERR(dev_opp->clk))
+		clk_put(dev_opp->clk);
+
 	list_dev = list_first_entry(&dev_opp->dev_list, struct device_list_opp,
 				    node);
 
diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h
index fe44beb404ba..4f1bdfc7da03 100644
--- a/drivers/base/power/opp/opp.h
+++ b/drivers/base/power/opp/opp.h
@@ -22,6 +22,7 @@
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 
+struct clk;
 struct regulator;
 
 /* Lock to allow exclusive modification to the device and opp lists */
@@ -134,6 +135,7 @@ struct device_list_opp {
  * @supported_hw: Array of version number to support.
  * @supported_hw_count: Number of elements in supported_hw array.
  * @prop_name: A name to postfix to many DT properties, while parsing them.
+ * @clk: Device's clock handle
  * @regulator: Supply regulator
  * @dentry:	debugfs dentry pointer of the real device directory (not links).
  * @dentry_name: Name of the real dentry.
@@ -168,6 +170,7 @@ struct device_opp {
 	unsigned int *supported_hw;
 	unsigned int supported_hw_count;
 	const char *prop_name;
+	struct clk *clk;
 	struct regulator *regulator;
 
 #ifdef CONFIG_DEBUG_FS

From 6229dc0db2699e56996cb114a85eb59988e1163a Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 9 Feb 2016 10:30:39 +0530
Subject: [PATCH 083/797] PM / OPP: Add dev_pm_opp_set_rate()

This adds a routine, dev_pm_opp_set_rate(), responsible for configuring
power-supply and clock source for an OPP.

The OPP is found by matching against the target_freq passed to the
routine. This shall replace similar code present in most of the OPP
users and help simplify them a lot.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit 6a0712f6f199e737aa5913d28ec4bd3a25de9660)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/base/power/opp/core.c | 176 ++++++++++++++++++++++++++++++++++
 include/linux/pm_opp.h        |   6 ++
 2 files changed, 182 insertions(+)

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index 7d7749ce1ce4..ab711c2c3e00 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -529,6 +529,182 @@ struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor);
 
+/*
+ * The caller needs to ensure that device_opp (and hence the clk) isn't freed,
+ * while clk returned here is used.
+ */
+static struct clk *_get_opp_clk(struct device *dev)
+{
+	struct device_opp *dev_opp;
+	struct clk *clk;
+
+	rcu_read_lock();
+
+	dev_opp = _find_device_opp(dev);
+	if (IS_ERR(dev_opp)) {
+		dev_err(dev, "%s: device opp doesn't exist\n", __func__);
+		clk = ERR_CAST(dev_opp);
+		goto unlock;
+	}
+
+	clk = dev_opp->clk;
+	if (IS_ERR(clk))
+		dev_err(dev, "%s: No clock available for the device\n",
+			__func__);
+
+unlock:
+	rcu_read_unlock();
+	return clk;
+}
+
+static int _set_opp_voltage(struct device *dev, struct regulator *reg,
+			    unsigned long u_volt, unsigned long u_volt_min,
+			    unsigned long u_volt_max)
+{
+	int ret;
+
+	/* Regulator not available for device */
+	if (IS_ERR(reg)) {
+		dev_dbg(dev, "%s: regulator not available: %ld\n", __func__,
+			PTR_ERR(reg));
+		return 0;
+	}
+
+	dev_dbg(dev, "%s: voltages (mV): %lu %lu %lu\n", __func__, u_volt_min,
+		u_volt, u_volt_max);
+
+	ret = regulator_set_voltage_triplet(reg, u_volt_min, u_volt,
+					    u_volt_max);
+	if (ret)
+		dev_err(dev, "%s: failed to set voltage (%lu %lu %lu mV): %d\n",
+			__func__, u_volt_min, u_volt, u_volt_max, ret);
+
+	return ret;
+}
+
+/**
+ * dev_pm_opp_set_rate() - Configure new OPP based on frequency
+ * @dev:	 device for which we do this operation
+ * @target_freq: frequency to achieve
+ *
+ * This configures the power-supplies and clock source to the levels specified
+ * by the OPP corresponding to the target_freq.
+ *
+ * Locking: This function takes rcu_read_lock().
+ */
+int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
+{
+	struct device_opp *dev_opp;
+	struct dev_pm_opp *old_opp, *opp;
+	struct regulator *reg;
+	struct clk *clk;
+	unsigned long freq, old_freq;
+	unsigned long u_volt, u_volt_min, u_volt_max;
+	unsigned long ou_volt, ou_volt_min, ou_volt_max;
+	int ret;
+
+	if (unlikely(!target_freq)) {
+		dev_err(dev, "%s: Invalid target frequency %lu\n", __func__,
+			target_freq);
+		return -EINVAL;
+	}
+
+	clk = _get_opp_clk(dev);
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
+
+	freq = clk_round_rate(clk, target_freq);
+	if ((long)freq <= 0)
+		freq = target_freq;
+
+	old_freq = clk_get_rate(clk);
+
+	/* Return early if nothing to do */
+	if (old_freq == freq) {
+		dev_dbg(dev, "%s: old/new frequencies (%lu Hz) are same, nothing to do\n",
+			__func__, freq);
+		return 0;
+	}
+
+	rcu_read_lock();
+
+	dev_opp = _find_device_opp(dev);
+	if (IS_ERR(dev_opp)) {
+		dev_err(dev, "%s: device opp doesn't exist\n", __func__);
+		rcu_read_unlock();
+		return PTR_ERR(dev_opp);
+	}
+
+	old_opp = dev_pm_opp_find_freq_ceil(dev, &old_freq);
+	if (!IS_ERR(old_opp)) {
+		ou_volt = old_opp->u_volt;
+		ou_volt_min = old_opp->u_volt_min;
+		ou_volt_max = old_opp->u_volt_max;
+	} else {
+		dev_err(dev, "%s: failed to find current OPP for freq %lu (%ld)\n",
+			__func__, old_freq, PTR_ERR(old_opp));
+	}
+
+	opp = dev_pm_opp_find_freq_ceil(dev, &freq);
+	if (IS_ERR(opp)) {
+		ret = PTR_ERR(opp);
+		dev_err(dev, "%s: failed to find OPP for freq %lu (%d)\n",
+			__func__, freq, ret);
+		rcu_read_unlock();
+		return ret;
+	}
+
+	u_volt = opp->u_volt;
+	u_volt_min = opp->u_volt_min;
+	u_volt_max = opp->u_volt_max;
+
+	reg = dev_opp->regulator;
+
+	rcu_read_unlock();
+
+	/* Scaling up? Scale voltage before frequency */
+	if (freq > old_freq) {
+		ret = _set_opp_voltage(dev, reg, u_volt, u_volt_min,
+				       u_volt_max);
+		if (ret)
+			goto restore_voltage;
+	}
+
+	/* Change frequency */
+
+	dev_dbg(dev, "%s: switching OPP: %lu Hz --> %lu Hz\n",
+		__func__, old_freq, freq);
+
+	ret = clk_set_rate(clk, freq);
+	if (ret) {
+		dev_err(dev, "%s: failed to set clock rate: %d\n", __func__,
+			ret);
+		goto restore_voltage;
+	}
+
+	/* Scaling down? Scale voltage after frequency */
+	if (freq < old_freq) {
+		ret = _set_opp_voltage(dev, reg, u_volt, u_volt_min,
+				       u_volt_max);
+		if (ret)
+			goto restore_freq;
+	}
+
+	return 0;
+
+restore_freq:
+	if (clk_set_rate(clk, old_freq))
+		dev_err(dev, "%s: failed to restore old-freq (%lu Hz)\n",
+			__func__, old_freq);
+restore_voltage:
+	/* This shouldn't harm even if the voltages weren't updated earlier */
+	if (!IS_ERR(old_opp))
+		_set_opp_voltage(dev, reg, ou_volt, ou_volt_min, ou_volt_max);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_set_rate);
+
 /* List-dev Helpers */
 static void _kfree_list_dev_rcu(struct rcu_head *head)
 {
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 59da3d9e11ea..cccaf4a29e9f 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -64,6 +64,7 @@ int dev_pm_opp_set_prop_name(struct device *dev, const char *name);
 void dev_pm_opp_put_prop_name(struct device *dev);
 int dev_pm_opp_set_regulator(struct device *dev, const char *name);
 void dev_pm_opp_put_regulator(struct device *dev);
+int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq);
 #else
 static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
 {
@@ -172,6 +173,11 @@ static inline int dev_pm_opp_set_regulator(struct device *dev, const char *name)
 
 static inline void dev_pm_opp_put_regulator(struct device *dev) {}
 
+static inline int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
+{
+	return -EINVAL;
+}
+
 #endif		/* CONFIG_PM_OPP */
 
 #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF)

From ed343155489aa67740862d959e71266992ddf1b5 Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Thu, 11 Feb 2016 11:25:59 +0000
Subject: [PATCH 084/797] PM / OPP: Fix NULL pointer dereference crash when
 disabling OPPs

Commit 7d34d56ef334 (PM / OPP: Disable OPPs that aren't supported by
the regulator) causes a crash to happen on Tegra124 Jetson TK1 when
using the DFLL clock source for the CPU.  The DFLL manages the voltage
itself and so there is no regulator specified for the OPPs and so we
get a crash when we try to dereference the regulator pointer.  Fix
this by checking to see if the regulator IS_ERR_OR_NULL before
dereferencing it.

Fixes: 7d34d56ef334 (PM / OPP: Disable OPPs that aren't supported by the regulator)
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Reported-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
[ rjw: Changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

(cherry picked from commit 78ecc56247f0ec2bc0cf6f2f2af69e98d99767bc)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/base/power/opp/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index ab711c2c3e00..d7cd4e265766 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -975,7 +975,7 @@ static bool _opp_supported_by_regulators(struct dev_pm_opp *opp,
 {
 	struct regulator *reg = dev_opp->regulator;
 
-	if (!IS_ERR(reg) &&
+	if (!IS_ERR_OR_NULL(reg) &&
 	    !regulator_is_supported_voltage(reg, opp->u_volt_min,
 					    opp->u_volt_max)) {
 		pr_warn("%s: OPP minuV: %lu maxuV: %lu, not supported by regulator\n",

From 0ff7577f4fb24d42a34d33a0f16719b05328aa97 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 15 Feb 2016 10:21:53 +0530
Subject: [PATCH 085/797] PM / OPP: Initialize u_volt_min/max to a valid value

We kept u_volt_min/max initialized to 0, when only the target voltage is
present in DT, instead of the target/min/max triplet.

This didn't go well with the regulator framework, as on few calls the
min voltage was set to target and max was set to 0 and so resulted in a
kernel crash like below:

kernel BUG at ../drivers/regulator/core.c:216!

[<c0684af4>] (regulator_check_voltage) from [<c06857ac>] (regulator_set_voltage_unlocked+0x58/0x230)
[<c06857ac>] (regulator_set_voltage_unlocked) from [<c06859ac>] (regulator_set_voltage+0x28/0x54)
[<c06859ac>] (regulator_set_voltage) from [<c0775b28>] (_set_opp_voltage+0x30/0x98)
[<c0775b28>] (_set_opp_voltage) from [<c0776630>] (dev_pm_opp_set_rate+0xf0/0x28c)
[<c0776630>] (dev_pm_opp_set_rate) from [<c096f784>] (__cpufreq_driver_target+0x184/0x2b4)
[<c096f784>] (__cpufreq_driver_target) from [<c0973760>] (dbs_check_cpu+0x1b0/0x1f4)
[<c0973760>] (dbs_check_cpu) from [<c0973f30>] (cpufreq_governor_dbs+0x324/0x5c4)
[<c0973f30>] (cpufreq_governor_dbs) from [<c0970958>] (__cpufreq_governor+0xe4/0x1ec)
[<c0970958>] (__cpufreq_governor) from [<c09711e0>] (cpufreq_init_policy+0x64/0x8c)
[<c09711e0>] (cpufreq_init_policy) from [<c09718cc>] (cpufreq_online+0x2fc/0x708)
[<c09718cc>] (cpufreq_online) from [<c0765ff0>] (subsys_interface_register+0x94/0xd8)
[<c0765ff0>] (subsys_interface_register) from [<c0970530>] (cpufreq_register_driver+0x14c/0x19c)
[<c0970530>] (cpufreq_register_driver) from [<c09746dc>] (dt_cpufreq_probe+0x70/0xec)
[<c09746dc>] (dt_cpufreq_probe) from [<c076907c>] (platform_drv_probe+0x4c/0xb0)
[<c076907c>] (platform_drv_probe) from [<c07678e0>] (driver_probe_device+0x214/0x2c0)
[<c07678e0>] (driver_probe_device) from [<c0767a18>] (__driver_attach+0x8c/0x90)
[<c0767a18>] (__driver_attach) from [<c0765c2c>] (bus_for_each_dev+0x68/0x9c)
[<c0765c2c>] (bus_for_each_dev) from [<c0766d78>] (bus_add_driver+0x1a0/0x218)
[<c0766d78>] (bus_add_driver) from [<c076810c>] (driver_register+0x78/0xf8)
[<c076810c>] (driver_register) from [<c0301d74>] (do_one_initcall+0x90/0x1d8)
[<c0301d74>] (do_one_initcall) from [<c1100e14>] (kernel_init_freeable+0x15c/0x1fc)
[<c1100e14>] (kernel_init_freeable) from [<c0b27a0c>] (kernel_init+0x8/0xf0)
[<c0b27a0c>] (kernel_init) from [<c0307d78>] (ret_from_fork+0x14/0x3c)
Code: e1550004 baffffeb e3a00000 e8bd8070 (e7f001f2)

Fix that by initializing u_volt_min/max to the target voltage in such cases.

Reported-and-tested-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Fixes: 274659029c9d (PM / OPP: Add support to parse "operating-points-v2" bindings)
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit c88c395f4a6485f23f81e385c79945d68bcd5c5d)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/base/power/opp/core.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index d7cd4e265766..19fd7e7a3969 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -1157,8 +1157,14 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
 	}
 
 	opp->u_volt = microvolt[0];
-	opp->u_volt_min = microvolt[1];
-	opp->u_volt_max = microvolt[2];
+
+	if (count == 1) {
+		opp->u_volt_min = opp->u_volt;
+		opp->u_volt_max = opp->u_volt;
+	} else {
+		opp->u_volt_min = microvolt[1];
+		opp->u_volt_max = microvolt[2];
+	}
 
 	/* Search for "opp-microamp-<name>" */
 	prop = NULL;

From da978b6ea868b3b2e15025349be34282940039d9 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 15 Feb 2016 21:56:42 +0530
Subject: [PATCH 086/797] PM / OPP: Initialize regulator pointer to an error
 value

We are currently required to do two checks for regulator pointer:
IS_ERR() and IS_NULL().

And multiple instances are reported, about both of these not being used
consistently and so resulting in crashes.

Fix that by initializing regulator pointer with an error value and
checking it only against an error.

This makes code more consistent and more efficient.

Fixes: 7d34d56ef334 (PM / OPP: Disable OPPs that aren't supported by the regulator)
Reported-and-tested-by: Jon Hunter <jonathanh@nvidia.com>
Reported-and-tested-by: Tony Lindgren <tony@atomide.com>
Reported-and-tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
[ rjw: Initialize to -ENXIO ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

(cherry picked from commit 0c717d0f9cb46259dce5272705adce64a2d646d9)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/base/power/opp/core.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index 19fd7e7a3969..5fb2f061129e 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -257,7 +257,7 @@ unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev)
 	}
 
 	reg = dev_opp->regulator;
-	if (IS_ERR_OR_NULL(reg)) {
+	if (IS_ERR(reg)) {
 		/* Regulator may not be required for device */
 		if (reg)
 			dev_err(dev, "%s: Invalid regulator (%ld)\n", __func__,
@@ -798,6 +798,9 @@ static struct device_opp *_add_device_opp(struct device *dev)
 		of_node_put(np);
 	}
 
+	/* Set regulator to a non-NULL error value */
+	dev_opp->regulator = ERR_PTR(-ENXIO);
+
 	/* Find clk for the device */
 	dev_opp->clk = clk_get(dev, NULL);
 	if (IS_ERR(dev_opp->clk)) {
@@ -845,7 +848,7 @@ static void _remove_device_opp(struct device_opp *dev_opp)
 	if (dev_opp->prop_name)
 		return;
 
-	if (!IS_ERR_OR_NULL(dev_opp->regulator))
+	if (!IS_ERR(dev_opp->regulator))
 		return;
 
 	/* Release clk */
@@ -975,7 +978,7 @@ static bool _opp_supported_by_regulators(struct dev_pm_opp *opp,
 {
 	struct regulator *reg = dev_opp->regulator;
 
-	if (!IS_ERR_OR_NULL(reg) &&
+	if (!IS_ERR(reg) &&
 	    !regulator_is_supported_voltage(reg, opp->u_volt_min,
 					    opp->u_volt_max)) {
 		pr_warn("%s: OPP minuV: %lu maxuV: %lu, not supported by regulator\n",
@@ -1441,7 +1444,7 @@ int dev_pm_opp_set_regulator(struct device *dev, const char *name)
 	}
 
 	/* Already have a regulator set */
-	if (WARN_ON(!IS_ERR_OR_NULL(dev_opp->regulator))) {
+	if (WARN_ON(!IS_ERR(dev_opp->regulator))) {
 		ret = -EBUSY;
 		goto err;
 	}
@@ -1492,7 +1495,7 @@ void dev_pm_opp_put_regulator(struct device *dev)
 		goto unlock;
 	}
 
-	if (IS_ERR_OR_NULL(dev_opp->regulator)) {
+	if (IS_ERR(dev_opp->regulator)) {
 		dev_err(dev, "%s: Doesn't have regulator set\n", __func__);
 		goto unlock;
 	}
@@ -1501,7 +1504,7 @@ void dev_pm_opp_put_regulator(struct device *dev)
 	WARN_ON(!list_empty(&dev_opp->opp_list));
 
 	regulator_put(dev_opp->regulator);
-	dev_opp->regulator = ERR_PTR(-EINVAL);
+	dev_opp->regulator = ERR_PTR(-ENXIO);
 
 	/* Try freeing device_opp if this was the last blocking resource */
 	_remove_device_opp(dev_opp);

From 753a14dbcc37565faa1caf17873fda776002648d Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 16 Feb 2016 14:17:52 +0530
Subject: [PATCH 087/797] PM / OPP: Fix incorrect comments

Some comments were just copy/pasted from other sections and don't match
to the routines they were added for. Fix them.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit a5da64477ee79efa748df256928ec8840a2a7986)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/base/power/opp/core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index 5fb2f061129e..bdae09c1d8eb 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -1254,7 +1254,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_supported_hw);
 
 /**
  * dev_pm_opp_put_supported_hw() - Releases resources blocked for supported hw
- * @dev: Device for which supported-hw has to be set.
+ * @dev: Device for which supported-hw has to be put.
  *
  * This is required only for the V2 bindings, and is called for a matching
  * dev_pm_opp_set_supported_hw(). Until this is called, the device_opp structure
@@ -1303,7 +1303,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_put_supported_hw);
 
 /**
  * dev_pm_opp_set_prop_name() - Set prop-extn name
- * @dev: Device for which the regulator has to be set.
+ * @dev: Device for which the prop-name has to be set.
  * @name: name to postfix to properties.
  *
  * This is required only for the V2 bindings, and it enables a platform to
@@ -1362,7 +1362,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_prop_name);
 
 /**
  * dev_pm_opp_put_prop_name() - Releases resources blocked for prop-name
- * @dev: Device for which the regulator has to be set.
+ * @dev: Device for which the prop-name has to be put.
  *
  * This is required only for the V2 bindings, and is called for a matching
  * dev_pm_opp_set_prop_name(). Until this is called, the device_opp structure

From 811cd7442b982402c898e6a55798cecd76be0272 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 16 Feb 2016 14:17:53 +0530
Subject: [PATCH 088/797] PM / OPP: Rename structures for clarity

Stephen pointed out recently, that few structures always confuse him as
they aren't named properly. And this patch tries to address that:

Names are updated as:
- device_opp or dev_opp -> opp_table
- dev_opp_list -> opp_tables
- dev_opp_list_lock -> opp_table_lock
- device_list_opp -> opp_device (it was never a list, but a structure)
- list_dev -> opp_dev
- And similar changes in comments and function names as well.

This also fixes checkpatch warnings that were generated with this patch.

No functional changes.

Suggested-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit 2c2709dc6921c5d246b686521f932c73a20f428f)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/base/power/opp/core.c    | 752 ++++++++++++++++---------------
 drivers/base/power/opp/cpu.c     |  22 +-
 drivers/base/power/opp/debugfs.c |  85 ++--
 drivers/base/power/opp/opp.h     |  61 ++-
 4 files changed, 461 insertions(+), 459 deletions(-)

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index bdae09c1d8eb..433b60092972 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -25,40 +25,40 @@
 #include "opp.h"
 
 /*
- * The root of the list of all devices. All device_opp structures branch off
- * from here, with each device_opp containing the list of opp it supports in
+ * The root of the list of all opp-tables. All opp_table structures branch off
+ * from here, with each opp_table containing the list of opps it supports in
  * various states of availability.
  */
-static LIST_HEAD(dev_opp_list);
+static LIST_HEAD(opp_tables);
 /* Lock to allow exclusive modification to the device and opp lists */
-DEFINE_MUTEX(dev_opp_list_lock);
+DEFINE_MUTEX(opp_table_lock);
 
 #define opp_rcu_lockdep_assert()					\
 do {									\
 	RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&			\
-				!lockdep_is_held(&dev_opp_list_lock),	\
-			   "Missing rcu_read_lock() or "		\
-			   "dev_opp_list_lock protection");		\
+			 !lockdep_is_held(&opp_table_lock),		\
+			 "Missing rcu_read_lock() or "			\
+			 "opp_table_lock protection");			\
 } while (0)
 
-static struct device_list_opp *_find_list_dev(const struct device *dev,
-					      struct device_opp *dev_opp)
+static struct opp_device *_find_opp_dev(const struct device *dev,
+					struct opp_table *opp_table)
 {
-	struct device_list_opp *list_dev;
+	struct opp_device *opp_dev;
 
-	list_for_each_entry(list_dev, &dev_opp->dev_list, node)
-		if (list_dev->dev == dev)
-			return list_dev;
+	list_for_each_entry(opp_dev, &opp_table->dev_list, node)
+		if (opp_dev->dev == dev)
+			return opp_dev;
 
 	return NULL;
 }
 
-static struct device_opp *_managed_opp(const struct device_node *np)
+static struct opp_table *_managed_opp(const struct device_node *np)
 {
-	struct device_opp *dev_opp;
+	struct opp_table *opp_table;
 
-	list_for_each_entry_rcu(dev_opp, &dev_opp_list, node) {
-		if (dev_opp->np == np) {
+	list_for_each_entry_rcu(opp_table, &opp_tables, node) {
+		if (opp_table->np == np) {
 			/*
 			 * Multiple devices can point to the same OPP table and
 			 * so will have same node-pointer, np.
@@ -66,7 +66,7 @@ static struct device_opp *_managed_opp(const struct device_node *np)
 			 * But the OPPs will be considered as shared only if the
 			 * OPP table contains a "opp-shared" property.
 			 */
-			return dev_opp->shared_opp ? dev_opp : NULL;
+			return opp_table->shared_opp ? opp_table : NULL;
 		}
 	}
 
@@ -74,24 +74,24 @@ static struct device_opp *_managed_opp(const struct device_node *np)
 }
 
 /**
- * _find_device_opp() - find device_opp struct using device pointer
- * @dev:	device pointer used to lookup device OPPs
+ * _find_opp_table() - find opp_table struct using device pointer
+ * @dev:	device pointer used to lookup OPP table
  *
- * Search list of device OPPs for one containing matching device. Does a RCU
- * reader operation to grab the pointer needed.
+ * Search OPP table for one containing matching device. Does a RCU reader
+ * operation to grab the pointer needed.
  *
- * Return: pointer to 'struct device_opp' if found, otherwise -ENODEV or
+ * Return: pointer to 'struct opp_table' if found, otherwise -ENODEV or
  * -EINVAL based on type of error.
  *
  * Locking: For readers, this function must be called under rcu_read_lock().
- * device_opp is a RCU protected pointer, which means that device_opp is valid
+ * opp_table is a RCU protected pointer, which means that opp_table is valid
  * as long as we are under RCU lock.
  *
- * For Writers, this function must be called with dev_opp_list_lock held.
+ * For Writers, this function must be called with opp_table_lock held.
  */
-struct device_opp *_find_device_opp(struct device *dev)
+struct opp_table *_find_opp_table(struct device *dev)
 {
-	struct device_opp *dev_opp;
+	struct opp_table *opp_table;
 
 	opp_rcu_lockdep_assert();
 
@@ -100,9 +100,9 @@ struct device_opp *_find_device_opp(struct device *dev)
 		return ERR_PTR(-EINVAL);
 	}
 
-	list_for_each_entry_rcu(dev_opp, &dev_opp_list, node)
-		if (_find_list_dev(dev, dev_opp))
-			return dev_opp;
+	list_for_each_entry_rcu(opp_table, &opp_tables, node)
+		if (_find_opp_dev(dev, opp_table))
+			return opp_table;
 
 	return ERR_PTR(-ENODEV);
 }
@@ -215,16 +215,16 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_is_turbo);
  */
 unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev)
 {
-	struct device_opp *dev_opp;
+	struct opp_table *opp_table;
 	unsigned long clock_latency_ns;
 
 	rcu_read_lock();
 
-	dev_opp = _find_device_opp(dev);
-	if (IS_ERR(dev_opp))
+	opp_table = _find_opp_table(dev);
+	if (IS_ERR(opp_table))
 		clock_latency_ns = 0;
 	else
-		clock_latency_ns = dev_opp->clock_latency_ns_max;
+		clock_latency_ns = opp_table->clock_latency_ns_max;
 
 	rcu_read_unlock();
 	return clock_latency_ns;
@@ -241,7 +241,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_clock_latency);
  */
 unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev)
 {
-	struct device_opp *dev_opp;
+	struct opp_table *opp_table;
 	struct dev_pm_opp *opp;
 	struct regulator *reg;
 	unsigned long latency_ns = 0;
@@ -250,13 +250,13 @@ unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev)
 
 	rcu_read_lock();
 
-	dev_opp = _find_device_opp(dev);
-	if (IS_ERR(dev_opp)) {
+	opp_table = _find_opp_table(dev);
+	if (IS_ERR(opp_table)) {
 		rcu_read_unlock();
 		return 0;
 	}
 
-	reg = dev_opp->regulator;
+	reg = opp_table->regulator;
 	if (IS_ERR(reg)) {
 		/* Regulator may not be required for device */
 		if (reg)
@@ -266,7 +266,7 @@ unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev)
 		return 0;
 	}
 
-	list_for_each_entry_rcu(opp, &dev_opp->opp_list, node) {
+	list_for_each_entry_rcu(opp, &opp_table->opp_list, node) {
 		if (!opp->available)
 			continue;
 
@@ -279,7 +279,7 @@ unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev)
 	rcu_read_unlock();
 
 	/*
-	 * The caller needs to ensure that dev_opp (and hence the regulator)
+	 * The caller needs to ensure that opp_table (and hence the regulator)
 	 * isn't freed, while we are executing this routine.
 	 */
 	ret = regulator_set_voltage_time(reg, min_uV, max_uV);
@@ -322,21 +322,21 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_transition_latency);
  */
 struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev)
 {
-	struct device_opp *dev_opp;
+	struct opp_table *opp_table;
 
 	opp_rcu_lockdep_assert();
 
-	dev_opp = _find_device_opp(dev);
-	if (IS_ERR(dev_opp) || !dev_opp->suspend_opp ||
-	    !dev_opp->suspend_opp->available)
+	opp_table = _find_opp_table(dev);
+	if (IS_ERR(opp_table) || !opp_table->suspend_opp ||
+	    !opp_table->suspend_opp->available)
 		return NULL;
 
-	return dev_opp->suspend_opp;
+	return opp_table->suspend_opp;
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_get_suspend_opp);
 
 /**
- * dev_pm_opp_get_opp_count() - Get number of opps available in the opp list
+ * dev_pm_opp_get_opp_count() - Get number of opps available in the opp table
  * @dev:	device for which we do this operation
  *
  * Return: This function returns the number of available opps if there are any,
@@ -346,21 +346,21 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_suspend_opp);
  */
 int dev_pm_opp_get_opp_count(struct device *dev)
 {
-	struct device_opp *dev_opp;
+	struct opp_table *opp_table;
 	struct dev_pm_opp *temp_opp;
 	int count = 0;
 
 	rcu_read_lock();
 
-	dev_opp = _find_device_opp(dev);
-	if (IS_ERR(dev_opp)) {
-		count = PTR_ERR(dev_opp);
-		dev_err(dev, "%s: device OPP not found (%d)\n",
+	opp_table = _find_opp_table(dev);
+	if (IS_ERR(opp_table)) {
+		count = PTR_ERR(opp_table);
+		dev_err(dev, "%s: OPP table not found (%d)\n",
 			__func__, count);
 		goto out_unlock;
 	}
 
-	list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
+	list_for_each_entry_rcu(temp_opp, &opp_table->opp_list, node) {
 		if (temp_opp->available)
 			count++;
 	}
@@ -377,7 +377,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_opp_count);
  * @freq:		frequency to search for
  * @available:		true/false - match for available opp
  *
- * Return: Searches for exact match in the opp list and returns pointer to the
+ * Return: Searches for exact match in the opp table and returns pointer to the
  * matching opp if found, else returns ERR_PTR in case of error and should
  * be handled using IS_ERR. Error return values can be:
  * EINVAL:	for bad pointer
@@ -401,19 +401,20 @@ struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
 					      unsigned long freq,
 					      bool available)
 {
-	struct device_opp *dev_opp;
+	struct opp_table *opp_table;
 	struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE);
 
 	opp_rcu_lockdep_assert();
 
-	dev_opp = _find_device_opp(dev);
-	if (IS_ERR(dev_opp)) {
-		int r = PTR_ERR(dev_opp);
-		dev_err(dev, "%s: device OPP not found (%d)\n", __func__, r);
+	opp_table = _find_opp_table(dev);
+	if (IS_ERR(opp_table)) {
+		int r = PTR_ERR(opp_table);
+
+		dev_err(dev, "%s: OPP table not found (%d)\n", __func__, r);
 		return ERR_PTR(r);
 	}
 
-	list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
+	list_for_each_entry_rcu(temp_opp, &opp_table->opp_list, node) {
 		if (temp_opp->available == available &&
 				temp_opp->rate == freq) {
 			opp = temp_opp;
@@ -449,7 +450,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_exact);
 struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
 					     unsigned long *freq)
 {
-	struct device_opp *dev_opp;
+	struct opp_table *opp_table;
 	struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE);
 
 	opp_rcu_lockdep_assert();
@@ -459,11 +460,11 @@ struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
 		return ERR_PTR(-EINVAL);
 	}
 
-	dev_opp = _find_device_opp(dev);
-	if (IS_ERR(dev_opp))
-		return ERR_CAST(dev_opp);
+	opp_table = _find_opp_table(dev);
+	if (IS_ERR(opp_table))
+		return ERR_CAST(opp_table);
 
-	list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
+	list_for_each_entry_rcu(temp_opp, &opp_table->opp_list, node) {
 		if (temp_opp->available && temp_opp->rate >= *freq) {
 			opp = temp_opp;
 			*freq = opp->rate;
@@ -499,7 +500,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_ceil);
 struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
 					      unsigned long *freq)
 {
-	struct device_opp *dev_opp;
+	struct opp_table *opp_table;
 	struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE);
 
 	opp_rcu_lockdep_assert();
@@ -509,11 +510,11 @@ struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
 		return ERR_PTR(-EINVAL);
 	}
 
-	dev_opp = _find_device_opp(dev);
-	if (IS_ERR(dev_opp))
-		return ERR_CAST(dev_opp);
+	opp_table = _find_opp_table(dev);
+	if (IS_ERR(opp_table))
+		return ERR_CAST(opp_table);
 
-	list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
+	list_for_each_entry_rcu(temp_opp, &opp_table->opp_list, node) {
 		if (temp_opp->available) {
 			/* go to the next node, before choosing prev */
 			if (temp_opp->rate > *freq)
@@ -530,24 +531,24 @@ struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
 EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor);
 
 /*
- * The caller needs to ensure that device_opp (and hence the clk) isn't freed,
+ * The caller needs to ensure that opp_table (and hence the clk) isn't freed,
  * while clk returned here is used.
  */
 static struct clk *_get_opp_clk(struct device *dev)
 {
-	struct device_opp *dev_opp;
+	struct opp_table *opp_table;
 	struct clk *clk;
 
 	rcu_read_lock();
 
-	dev_opp = _find_device_opp(dev);
-	if (IS_ERR(dev_opp)) {
+	opp_table = _find_opp_table(dev);
+	if (IS_ERR(opp_table)) {
 		dev_err(dev, "%s: device opp doesn't exist\n", __func__);
-		clk = ERR_CAST(dev_opp);
+		clk = ERR_CAST(opp_table);
 		goto unlock;
 	}
 
-	clk = dev_opp->clk;
+	clk = opp_table->clk;
 	if (IS_ERR(clk))
 		dev_err(dev, "%s: No clock available for the device\n",
 			__func__);
@@ -594,7 +595,7 @@ static int _set_opp_voltage(struct device *dev, struct regulator *reg,
  */
 int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
 {
-	struct device_opp *dev_opp;
+	struct opp_table *opp_table;
 	struct dev_pm_opp *old_opp, *opp;
 	struct regulator *reg;
 	struct clk *clk;
@@ -628,11 +629,11 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
 
 	rcu_read_lock();
 
-	dev_opp = _find_device_opp(dev);
-	if (IS_ERR(dev_opp)) {
+	opp_table = _find_opp_table(dev);
+	if (IS_ERR(opp_table)) {
 		dev_err(dev, "%s: device opp doesn't exist\n", __func__);
 		rcu_read_unlock();
-		return PTR_ERR(dev_opp);
+		return PTR_ERR(opp_table);
 	}
 
 	old_opp = dev_pm_opp_find_freq_ceil(dev, &old_freq);
@@ -658,7 +659,7 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
 	u_volt_min = opp->u_volt_min;
 	u_volt_max = opp->u_volt_max;
 
-	reg = dev_opp->regulator;
+	reg = opp_table->regulator;
 
 	rcu_read_unlock();
 
@@ -705,81 +706,81 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_set_rate);
 
-/* List-dev Helpers */
-static void _kfree_list_dev_rcu(struct rcu_head *head)
+/* OPP-dev Helpers */
+static void _kfree_opp_dev_rcu(struct rcu_head *head)
 {
-	struct device_list_opp *list_dev;
+	struct opp_device *opp_dev;
 
-	list_dev = container_of(head, struct device_list_opp, rcu_head);
-	kfree_rcu(list_dev, rcu_head);
+	opp_dev = container_of(head, struct opp_device, rcu_head);
+	kfree_rcu(opp_dev, rcu_head);
 }
 
-static void _remove_list_dev(struct device_list_opp *list_dev,
-			     struct device_opp *dev_opp)
+static void _remove_opp_dev(struct opp_device *opp_dev,
+			    struct opp_table *opp_table)
 {
-	opp_debug_unregister(list_dev, dev_opp);
-	list_del(&list_dev->node);
-	call_srcu(&dev_opp->srcu_head.srcu, &list_dev->rcu_head,
-		  _kfree_list_dev_rcu);
+	opp_debug_unregister(opp_dev, opp_table);
+	list_del(&opp_dev->node);
+	call_srcu(&opp_table->srcu_head.srcu, &opp_dev->rcu_head,
+		  _kfree_opp_dev_rcu);
 }
 
-struct device_list_opp *_add_list_dev(const struct device *dev,
-				      struct device_opp *dev_opp)
+struct opp_device *_add_opp_dev(const struct device *dev,
+				struct opp_table *opp_table)
 {
-	struct device_list_opp *list_dev;
+	struct opp_device *opp_dev;
 	int ret;
 
-	list_dev = kzalloc(sizeof(*list_dev), GFP_KERNEL);
-	if (!list_dev)
+	opp_dev = kzalloc(sizeof(*opp_dev), GFP_KERNEL);
+	if (!opp_dev)
 		return NULL;
 
-	/* Initialize list-dev */
-	list_dev->dev = dev;
-	list_add_rcu(&list_dev->node, &dev_opp->dev_list);
+	/* Initialize opp-dev */
+	opp_dev->dev = dev;
+	list_add_rcu(&opp_dev->node, &opp_table->dev_list);
 
-	/* Create debugfs entries for the dev_opp */
-	ret = opp_debug_register(list_dev, dev_opp);
+	/* Create debugfs entries for the opp_table */
+	ret = opp_debug_register(opp_dev, opp_table);
 	if (ret)
 		dev_err(dev, "%s: Failed to register opp debugfs (%d)\n",
 			__func__, ret);
 
-	return list_dev;
+	return opp_dev;
 }
 
 /**
- * _add_device_opp() - Find device OPP table or allocate a new one
+ * _add_opp_table() - Find OPP table or allocate a new one
  * @dev:	device for which we do this operation
  *
  * It tries to find an existing table first, if it couldn't find one, it
  * allocates a new OPP table and returns that.
  *
- * Return: valid device_opp pointer if success, else NULL.
+ * Return: valid opp_table pointer if success, else NULL.
  */
-static struct device_opp *_add_device_opp(struct device *dev)
+static struct opp_table *_add_opp_table(struct device *dev)
 {
-	struct device_opp *dev_opp;
-	struct device_list_opp *list_dev;
+	struct opp_table *opp_table;
+	struct opp_device *opp_dev;
 	struct device_node *np;
 	int ret;
 
-	/* Check for existing list for 'dev' first */
-	dev_opp = _find_device_opp(dev);
-	if (!IS_ERR(dev_opp))
-		return dev_opp;
+	/* Check for existing table for 'dev' first */
+	opp_table = _find_opp_table(dev);
+	if (!IS_ERR(opp_table))
+		return opp_table;
 
 	/*
-	 * Allocate a new device OPP table. In the infrequent case where a new
+	 * Allocate a new OPP table. In the infrequent case where a new
 	 * device is needed to be added, we pay this penalty.
 	 */
-	dev_opp = kzalloc(sizeof(*dev_opp), GFP_KERNEL);
-	if (!dev_opp)
+	opp_table = kzalloc(sizeof(*opp_table), GFP_KERNEL);
+	if (!opp_table)
 		return NULL;
 
-	INIT_LIST_HEAD(&dev_opp->dev_list);
+	INIT_LIST_HEAD(&opp_table->dev_list);
 
-	list_dev = _add_list_dev(dev, dev_opp);
-	if (!list_dev) {
-		kfree(dev_opp);
+	opp_dev = _add_opp_dev(dev, opp_table);
+	if (!opp_dev) {
+		kfree(opp_table);
 		return NULL;
 	}
 
@@ -792,79 +793,80 @@ static struct device_opp *_add_device_opp(struct device *dev)
 		u32 val;
 
 		if (!of_property_read_u32(np, "clock-latency", &val))
-			dev_opp->clock_latency_ns_max = val;
+			opp_table->clock_latency_ns_max = val;
 		of_property_read_u32(np, "voltage-tolerance",
-				     &dev_opp->voltage_tolerance_v1);
+				     &opp_table->voltage_tolerance_v1);
 		of_node_put(np);
 	}
 
 	/* Set regulator to a non-NULL error value */
-	dev_opp->regulator = ERR_PTR(-ENXIO);
+	opp_table->regulator = ERR_PTR(-ENXIO);
 
 	/* Find clk for the device */
-	dev_opp->clk = clk_get(dev, NULL);
-	if (IS_ERR(dev_opp->clk)) {
-		ret = PTR_ERR(dev_opp->clk);
+	opp_table->clk = clk_get(dev, NULL);
+	if (IS_ERR(opp_table->clk)) {
+		ret = PTR_ERR(opp_table->clk);
 		if (ret != -EPROBE_DEFER)
 			dev_dbg(dev, "%s: Couldn't find clock: %d\n", __func__,
 				ret);
 	}
 
-	srcu_init_notifier_head(&dev_opp->srcu_head);
-	INIT_LIST_HEAD(&dev_opp->opp_list);
+	srcu_init_notifier_head(&opp_table->srcu_head);
+	INIT_LIST_HEAD(&opp_table->opp_list);
 
-	/* Secure the device list modification */
-	list_add_rcu(&dev_opp->node, &dev_opp_list);
-	return dev_opp;
+	/* Secure the device table modification */
+	list_add_rcu(&opp_table->node, &opp_tables);
+	return opp_table;
 }
 
 /**
- * _kfree_device_rcu() - Free device_opp RCU handler
+ * _kfree_device_rcu() - Free opp_table RCU handler
  * @head:	RCU head
  */
 static void _kfree_device_rcu(struct rcu_head *head)
 {
-	struct device_opp *device_opp = container_of(head, struct device_opp, rcu_head);
+	struct opp_table *opp_table = container_of(head, struct opp_table,
+						   rcu_head);
 
-	kfree_rcu(device_opp, rcu_head);
+	kfree_rcu(opp_table, rcu_head);
 }
 
 /**
- * _remove_device_opp() - Removes a device OPP table
- * @dev_opp: device OPP table to be removed.
+ * _remove_opp_table() - Removes a OPP table
+ * @opp_table: OPP table to be removed.
  *
- * Removes/frees device OPP table it it doesn't contain any OPPs.
+ * Removes/frees OPP table if it doesn't contain any OPPs.
  */
-static void _remove_device_opp(struct device_opp *dev_opp)
+static void _remove_opp_table(struct opp_table *opp_table)
 {
-	struct device_list_opp *list_dev;
+	struct opp_device *opp_dev;
 
-	if (!list_empty(&dev_opp->opp_list))
+	if (!list_empty(&opp_table->opp_list))
 		return;
 
-	if (dev_opp->supported_hw)
+	if (opp_table->supported_hw)
 		return;
 
-	if (dev_opp->prop_name)
+	if (opp_table->prop_name)
 		return;
 
-	if (!IS_ERR(dev_opp->regulator))
+	if (!IS_ERR(opp_table->regulator))
 		return;
 
 	/* Release clk */
-	if (!IS_ERR(dev_opp->clk))
-		clk_put(dev_opp->clk);
+	if (!IS_ERR(opp_table->clk))
+		clk_put(opp_table->clk);
 
-	list_dev = list_first_entry(&dev_opp->dev_list, struct device_list_opp,
-				    node);
+	opp_dev = list_first_entry(&opp_table->dev_list, struct opp_device,
+				   node);
 
-	_remove_list_dev(list_dev, dev_opp);
+	_remove_opp_dev(opp_dev, opp_table);
 
 	/* dev_list must be empty now */
-	WARN_ON(!list_empty(&dev_opp->dev_list));
+	WARN_ON(!list_empty(&opp_table->dev_list));
 
-	list_del_rcu(&dev_opp->node);
-	call_srcu(&dev_opp->srcu_head.srcu, &dev_opp->rcu_head,
+	list_del_rcu(&opp_table->node);
+	call_srcu(&opp_table->srcu_head.srcu, &opp_table->rcu_head,
 		  _kfree_device_rcu);
 }
 
@@ -881,17 +883,17 @@ static void _kfree_opp_rcu(struct rcu_head *head)
 
 /**
  * _opp_remove()  - Remove an OPP from a table definition
- * @dev_opp:	points back to the device_opp struct this opp belongs to
+ * @opp_table:	points back to the opp_table struct this opp belongs to
  * @opp:	pointer to the OPP to remove
  * @notify:	OPP_EVENT_REMOVE notification should be sent or not
  *
- * This function removes an opp definition from the opp list.
+ * This function removes an opp definition from the opp table.
  *
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
  * It is assumed that the caller holds required mutex for an RCU updater
  * strategy.
  */
-static void _opp_remove(struct device_opp *dev_opp,
+static void _opp_remove(struct opp_table *opp_table,
 			struct dev_pm_opp *opp, bool notify)
 {
 	/*
@@ -899,22 +901,23 @@ static void _opp_remove(struct device_opp *dev_opp,
 	 * frequency/voltage list.
 	 */
 	if (notify)
-		srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_REMOVE, opp);
+		srcu_notifier_call_chain(&opp_table->srcu_head,
+					 OPP_EVENT_REMOVE, opp);
 	opp_debug_remove_one(opp);
 	list_del_rcu(&opp->node);
-	call_srcu(&dev_opp->srcu_head.srcu, &opp->rcu_head, _kfree_opp_rcu);
+	call_srcu(&opp_table->srcu_head.srcu, &opp->rcu_head, _kfree_opp_rcu);
 
-	_remove_device_opp(dev_opp);
+	_remove_opp_table(opp_table);
 }
 
 /**
- * dev_pm_opp_remove()  - Remove an OPP from OPP list
+ * dev_pm_opp_remove()  - Remove an OPP from OPP table
  * @dev:	device for which we do this operation
  * @freq:	OPP to remove with matching 'freq'
  *
- * This function removes an opp from the opp list.
+ * This function removes an opp from the opp table.
  *
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
  * Hence this function internally uses RCU updater strategy with mutex locks
  * to keep the integrity of the internal data structures. Callers should ensure
  * that this function is *NOT* called under RCU protection or in contexts where
@@ -923,17 +926,17 @@ static void _opp_remove(struct device_opp *dev_opp,
 void dev_pm_opp_remove(struct device *dev, unsigned long freq)
 {
 	struct dev_pm_opp *opp;
-	struct device_opp *dev_opp;
+	struct opp_table *opp_table;
 	bool found = false;
 
-	/* Hold our list modification lock here */
-	mutex_lock(&dev_opp_list_lock);
+	/* Hold our table modification lock here */
+	mutex_lock(&opp_table_lock);
 
-	dev_opp = _find_device_opp(dev);
-	if (IS_ERR(dev_opp))
+	opp_table = _find_opp_table(dev);
+	if (IS_ERR(opp_table))
 		goto unlock;
 
-	list_for_each_entry(opp, &dev_opp->opp_list, node) {
+	list_for_each_entry(opp, &opp_table->opp_list, node) {
 		if (opp->rate == freq) {
 			found = true;
 			break;
@@ -946,14 +949,14 @@ void dev_pm_opp_remove(struct device *dev, unsigned long freq)
 		goto unlock;
 	}
 
-	_opp_remove(dev_opp, opp, true);
+	_opp_remove(opp_table, opp, true);
 unlock:
-	mutex_unlock(&dev_opp_list_lock);
+	mutex_unlock(&opp_table_lock);
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_remove);
 
 static struct dev_pm_opp *_allocate_opp(struct device *dev,
-					struct device_opp **dev_opp)
+					struct opp_table **opp_table)
 {
 	struct dev_pm_opp *opp;
 
@@ -964,8 +967,8 @@ static struct dev_pm_opp *_allocate_opp(struct device *dev,
 
 	INIT_LIST_HEAD(&opp->node);
 
-	*dev_opp = _add_device_opp(dev);
-	if (!*dev_opp) {
+	*opp_table = _add_opp_table(dev);
+	if (!*opp_table) {
 		kfree(opp);
 		return NULL;
 	}
@@ -974,9 +977,9 @@ static struct dev_pm_opp *_allocate_opp(struct device *dev,
 }
 
 static bool _opp_supported_by_regulators(struct dev_pm_opp *opp,
-					 struct device_opp *dev_opp)
+					 struct opp_table *opp_table)
 {
-	struct regulator *reg = dev_opp->regulator;
+	struct regulator *reg = opp_table->regulator;
 
 	if (!IS_ERR(reg) &&
 	    !regulator_is_supported_voltage(reg, opp->u_volt_min,
@@ -990,21 +993,21 @@ static bool _opp_supported_by_regulators(struct dev_pm_opp *opp,
 }
 
 static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp,
-		    struct device_opp *dev_opp)
+		    struct opp_table *opp_table)
 {
 	struct dev_pm_opp *opp;
-	struct list_head *head = &dev_opp->opp_list;
+	struct list_head *head = &opp_table->opp_list;
 	int ret;
 
 	/*
 	 * Insert new OPP in order of increasing frequency and discard if
 	 * already present.
 	 *
-	 * Need to use &dev_opp->opp_list in the condition part of the 'for'
+	 * Need to use &opp_table->opp_list in the condition part of the 'for'
 	 * loop, don't replace it with head otherwise it will become an infinite
 	 * loop.
 	 */
-	list_for_each_entry_rcu(opp, &dev_opp->opp_list, node) {
+	list_for_each_entry_rcu(opp, &opp_table->opp_list, node) {
 		if (new_opp->rate > opp->rate) {
 			head = &opp->node;
 			continue;
@@ -1022,15 +1025,15 @@ static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp,
 			0 : -EEXIST;
 	}
 
-	new_opp->dev_opp = dev_opp;
+	new_opp->opp_table = opp_table;
 	list_add_rcu(&new_opp->node, head);
 
-	ret = opp_debug_create_one(new_opp, dev_opp);
+	ret = opp_debug_create_one(new_opp, opp_table);
 	if (ret)
 		dev_err(dev, "%s: Failed to register opp to debugfs (%d)\n",
 			__func__, ret);
 
-	if (!_opp_supported_by_regulators(new_opp, dev_opp)) {
+	if (!_opp_supported_by_regulators(new_opp, opp_table)) {
 		new_opp->available = false;
 		dev_warn(dev, "%s: OPP not supported by regulators (%lu)\n",
 			 __func__, new_opp->rate);
@@ -1046,14 +1049,14 @@ static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp,
  * @u_volt:	Voltage in uVolts for this OPP
  * @dynamic:	Dynamically added OPPs.
  *
- * This function adds an opp definition to the opp list and returns status.
+ * This function adds an opp definition to the opp table and returns status.
  * The opp is made available by default and it can be controlled using
  * dev_pm_opp_enable/disable functions and may be removed by dev_pm_opp_remove.
  *
  * NOTE: "dynamic" parameter impacts OPPs added by the dev_pm_opp_of_add_table
  * and freed by dev_pm_opp_of_remove_table.
  *
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
  * Hence this function internally uses RCU updater strategy with mutex locks
  * to keep the integrity of the internal data structures. Callers should ensure
  * that this function is *NOT* called under RCU protection or in contexts where
@@ -1069,15 +1072,15 @@ static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp,
 static int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt,
 		       bool dynamic)
 {
-	struct device_opp *dev_opp;
+	struct opp_table *opp_table;
 	struct dev_pm_opp *new_opp;
 	unsigned long tol;
 	int ret;
 
-	/* Hold our list modification lock here */
-	mutex_lock(&dev_opp_list_lock);
+	/* Hold our table modification lock here */
+	mutex_lock(&opp_table_lock);
 
-	new_opp = _allocate_opp(dev, &dev_opp);
+	new_opp = _allocate_opp(dev, &opp_table);
 	if (!new_opp) {
 		ret = -ENOMEM;
 		goto unlock;
@@ -1085,36 +1088,36 @@ static int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt,
 
 	/* populate the opp table */
 	new_opp->rate = freq;
-	tol = u_volt * dev_opp->voltage_tolerance_v1 / 100;
+	tol = u_volt * opp_table->voltage_tolerance_v1 / 100;
 	new_opp->u_volt = u_volt;
 	new_opp->u_volt_min = u_volt - tol;
 	new_opp->u_volt_max = u_volt + tol;
 	new_opp->available = true;
 	new_opp->dynamic = dynamic;
 
-	ret = _opp_add(dev, new_opp, dev_opp);
+	ret = _opp_add(dev, new_opp, opp_table);
 	if (ret)
 		goto free_opp;
 
-	mutex_unlock(&dev_opp_list_lock);
+	mutex_unlock(&opp_table_lock);
 
 	/*
 	 * Notify the changes in the availability of the operable
 	 * frequency/voltage list.
 	 */
-	srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ADD, new_opp);
+	srcu_notifier_call_chain(&opp_table->srcu_head, OPP_EVENT_ADD, new_opp);
 	return 0;
 
 free_opp:
-	_opp_remove(dev_opp, new_opp, false);
+	_opp_remove(opp_table, new_opp, false);
 unlock:
-	mutex_unlock(&dev_opp_list_lock);
+	mutex_unlock(&opp_table_lock);
 	return ret;
 }
 
 /* TODO: Support multiple regulators */
 static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
-			      struct device_opp *dev_opp)
+			      struct opp_table *opp_table)
 {
 	u32 microvolt[3] = {0};
 	u32 val;
@@ -1123,9 +1126,9 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
 	char name[NAME_MAX];
 
 	/* Search for "opp-microvolt-<name>" */
-	if (dev_opp->prop_name) {
+	if (opp_table->prop_name) {
 		snprintf(name, sizeof(name), "opp-microvolt-%s",
-			 dev_opp->prop_name);
+			 opp_table->prop_name);
 		prop = of_find_property(opp->np, name, NULL);
 	}
 
@@ -1171,9 +1174,9 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
 
 	/* Search for "opp-microamp-<name>" */
 	prop = NULL;
-	if (dev_opp->prop_name) {
+	if (opp_table->prop_name) {
 		snprintf(name, sizeof(name), "opp-microamp-%s",
-			 dev_opp->prop_name);
+			 opp_table->prop_name);
 		prop = of_find_property(opp->np, name, NULL);
 	}
 
@@ -1200,7 +1203,7 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
  * OPPs, which are available for those versions, based on its 'opp-supported-hw'
  * property.
  *
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
  * Hence this function internally uses RCU updater strategy with mutex locks
  * to keep the integrity of the internal data structures. Callers should ensure
  * that this function is *NOT* called under RCU protection or in contexts where
@@ -1209,44 +1212,44 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
 int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions,
 				unsigned int count)
 {
-	struct device_opp *dev_opp;
+	struct opp_table *opp_table;
 	int ret = 0;
 
-	/* Hold our list modification lock here */
-	mutex_lock(&dev_opp_list_lock);
+	/* Hold our table modification lock here */
+	mutex_lock(&opp_table_lock);
 
-	dev_opp = _add_device_opp(dev);
-	if (!dev_opp) {
+	opp_table = _add_opp_table(dev);
+	if (!opp_table) {
 		ret = -ENOMEM;
 		goto unlock;
 	}
 
-	/* Make sure there are no concurrent readers while updating dev_opp */
-	WARN_ON(!list_empty(&dev_opp->opp_list));
+	/* Make sure there are no concurrent readers while updating opp_table */
+	WARN_ON(!list_empty(&opp_table->opp_list));
 
-	/* Do we already have a version hierarchy associated with dev_opp? */
-	if (dev_opp->supported_hw) {
+	/* Do we already have a version hierarchy associated with opp_table? */
+	if (opp_table->supported_hw) {
 		dev_err(dev, "%s: Already have supported hardware list\n",
 			__func__);
 		ret = -EBUSY;
 		goto err;
 	}
 
-	dev_opp->supported_hw = kmemdup(versions, count * sizeof(*versions),
+	opp_table->supported_hw = kmemdup(versions, count * sizeof(*versions),
 					GFP_KERNEL);
-	if (!dev_opp->supported_hw) {
+	if (!opp_table->supported_hw) {
 		ret = -ENOMEM;
 		goto err;
 	}
 
-	dev_opp->supported_hw_count = count;
-	mutex_unlock(&dev_opp_list_lock);
+	opp_table->supported_hw_count = count;
+	mutex_unlock(&opp_table_lock);
 	return 0;
 
 err:
-	_remove_device_opp(dev_opp);
+	_remove_opp_table(opp_table);
 unlock:
-	mutex_unlock(&dev_opp_list_lock);
+	mutex_unlock(&opp_table_lock);
 
 	return ret;
 }
@@ -1257,10 +1260,10 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_supported_hw);
  * @dev: Device for which supported-hw has to be put.
  *
  * This is required only for the V2 bindings, and is called for a matching
- * dev_pm_opp_set_supported_hw(). Until this is called, the device_opp structure
+ * dev_pm_opp_set_supported_hw(). Until this is called, the opp_table structure
  * will not be freed.
  *
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
  * Hence this function internally uses RCU updater strategy with mutex locks
  * to keep the integrity of the internal data structures. Callers should ensure
  * that this function is *NOT* called under RCU protection or in contexts where
@@ -1268,36 +1271,37 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_supported_hw);
  */
 void dev_pm_opp_put_supported_hw(struct device *dev)
 {
-	struct device_opp *dev_opp;
+	struct opp_table *opp_table;
 
-	/* Hold our list modification lock here */
-	mutex_lock(&dev_opp_list_lock);
+	/* Hold our table modification lock here */
+	mutex_lock(&opp_table_lock);
 
-	/* Check for existing list for 'dev' first */
-	dev_opp = _find_device_opp(dev);
-	if (IS_ERR(dev_opp)) {
-		dev_err(dev, "Failed to find dev_opp: %ld\n", PTR_ERR(dev_opp));
+	/* Check for existing table for 'dev' first */
+	opp_table = _find_opp_table(dev);
+	if (IS_ERR(opp_table)) {
+		dev_err(dev, "Failed to find opp_table: %ld\n",
+			PTR_ERR(opp_table));
 		goto unlock;
 	}
 
-	/* Make sure there are no concurrent readers while updating dev_opp */
-	WARN_ON(!list_empty(&dev_opp->opp_list));
+	/* Make sure there are no concurrent readers while updating opp_table */
+	WARN_ON(!list_empty(&opp_table->opp_list));
 
-	if (!dev_opp->supported_hw) {
+	if (!opp_table->supported_hw) {
 		dev_err(dev, "%s: Doesn't have supported hardware list\n",
 			__func__);
 		goto unlock;
 	}
 
-	kfree(dev_opp->supported_hw);
-	dev_opp->supported_hw = NULL;
-	dev_opp->supported_hw_count = 0;
+	kfree(opp_table->supported_hw);
+	opp_table->supported_hw = NULL;
+	opp_table->supported_hw_count = 0;
 
-	/* Try freeing device_opp if this was the last blocking resource */
-	_remove_device_opp(dev_opp);
+	/* Try freeing opp_table if this was the last blocking resource */
+	_remove_opp_table(opp_table);
 
 unlock:
-	mutex_unlock(&dev_opp_list_lock);
+	mutex_unlock(&opp_table_lock);
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_put_supported_hw);
 
@@ -1311,7 +1315,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_put_supported_hw);
  * which the extension will apply are opp-microvolt and opp-microamp. OPP core
  * should postfix the property name with -<name> while looking for them.
  *
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
  * Hence this function internally uses RCU updater strategy with mutex locks
  * to keep the integrity of the internal data structures. Callers should ensure
  * that this function is *NOT* called under RCU protection or in contexts where
@@ -1319,42 +1323,42 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_put_supported_hw);
  */
 int dev_pm_opp_set_prop_name(struct device *dev, const char *name)
 {
-	struct device_opp *dev_opp;
+	struct opp_table *opp_table;
 	int ret = 0;
 
-	/* Hold our list modification lock here */
-	mutex_lock(&dev_opp_list_lock);
+	/* Hold our table modification lock here */
+	mutex_lock(&opp_table_lock);
 
-	dev_opp = _add_device_opp(dev);
-	if (!dev_opp) {
+	opp_table = _add_opp_table(dev);
+	if (!opp_table) {
 		ret = -ENOMEM;
 		goto unlock;
 	}
 
-	/* Make sure there are no concurrent readers while updating dev_opp */
-	WARN_ON(!list_empty(&dev_opp->opp_list));
+	/* Make sure there are no concurrent readers while updating opp_table */
+	WARN_ON(!list_empty(&opp_table->opp_list));
 
-	/* Do we already have a prop-name associated with dev_opp? */
-	if (dev_opp->prop_name) {
+	/* Do we already have a prop-name associated with opp_table? */
+	if (opp_table->prop_name) {
 		dev_err(dev, "%s: Already have prop-name %s\n", __func__,
-			dev_opp->prop_name);
+			opp_table->prop_name);
 		ret = -EBUSY;
 		goto err;
 	}
 
-	dev_opp->prop_name = kstrdup(name, GFP_KERNEL);
-	if (!dev_opp->prop_name) {
+	opp_table->prop_name = kstrdup(name, GFP_KERNEL);
+	if (!opp_table->prop_name) {
 		ret = -ENOMEM;
 		goto err;
 	}
 
-	mutex_unlock(&dev_opp_list_lock);
+	mutex_unlock(&opp_table_lock);
 	return 0;
 
 err:
-	_remove_device_opp(dev_opp);
+	_remove_opp_table(opp_table);
 unlock:
-	mutex_unlock(&dev_opp_list_lock);
+	mutex_unlock(&opp_table_lock);
 
 	return ret;
 }
@@ -1365,10 +1369,10 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_prop_name);
  * @dev: Device for which the prop-name has to be put.
  *
  * This is required only for the V2 bindings, and is called for a matching
- * dev_pm_opp_set_prop_name(). Until this is called, the device_opp structure
+ * dev_pm_opp_set_prop_name(). Until this is called, the opp_table structure
  * will not be freed.
  *
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
  * Hence this function internally uses RCU updater strategy with mutex locks
  * to keep the integrity of the internal data structures. Callers should ensure
  * that this function is *NOT* called under RCU protection or in contexts where
@@ -1376,34 +1380,35 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_prop_name);
  */
 void dev_pm_opp_put_prop_name(struct device *dev)
 {
-	struct device_opp *dev_opp;
+	struct opp_table *opp_table;
 
-	/* Hold our list modification lock here */
-	mutex_lock(&dev_opp_list_lock);
+	/* Hold our table modification lock here */
+	mutex_lock(&opp_table_lock);
 
-	/* Check for existing list for 'dev' first */
-	dev_opp = _find_device_opp(dev);
-	if (IS_ERR(dev_opp)) {
-		dev_err(dev, "Failed to find dev_opp: %ld\n", PTR_ERR(dev_opp));
+	/* Check for existing table for 'dev' first */
+	opp_table = _find_opp_table(dev);
+	if (IS_ERR(opp_table)) {
+		dev_err(dev, "Failed to find opp_table: %ld\n",
+			PTR_ERR(opp_table));
 		goto unlock;
 	}
 
-	/* Make sure there are no concurrent readers while updating dev_opp */
-	WARN_ON(!list_empty(&dev_opp->opp_list));
+	/* Make sure there are no concurrent readers while updating opp_table */
+	WARN_ON(!list_empty(&opp_table->opp_list));
 
-	if (!dev_opp->prop_name) {
+	if (!opp_table->prop_name) {
 		dev_err(dev, "%s: Doesn't have a prop-name\n", __func__);
 		goto unlock;
 	}
 
-	kfree(dev_opp->prop_name);
-	dev_opp->prop_name = NULL;
+	kfree(opp_table->prop_name);
+	opp_table->prop_name = NULL;
 
-	/* Try freeing device_opp if this was the last blocking resource */
-	_remove_device_opp(dev_opp);
+	/* Try freeing opp_table if this was the last blocking resource */
+	_remove_opp_table(opp_table);
 
 unlock:
-	mutex_unlock(&dev_opp_list_lock);
+	mutex_unlock(&opp_table_lock);
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_put_prop_name);
 
@@ -1417,7 +1422,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_put_prop_name);
  *
  * This must be called before any OPPs are initialized for the device.
  *
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
  * Hence this function internally uses RCU updater strategy with mutex locks
  * to keep the integrity of the internal data structures. Callers should ensure
  * that this function is *NOT* called under RCU protection or in contexts where
@@ -1425,26 +1430,26 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_put_prop_name);
  */
 int dev_pm_opp_set_regulator(struct device *dev, const char *name)
 {
-	struct device_opp *dev_opp;
+	struct opp_table *opp_table;
 	struct regulator *reg;
 	int ret;
 
-	mutex_lock(&dev_opp_list_lock);
+	mutex_lock(&opp_table_lock);
 
-	dev_opp = _add_device_opp(dev);
-	if (!dev_opp) {
+	opp_table = _add_opp_table(dev);
+	if (!opp_table) {
 		ret = -ENOMEM;
 		goto unlock;
 	}
 
 	/* This should be called before OPPs are initialized */
-	if (WARN_ON(!list_empty(&dev_opp->opp_list))) {
+	if (WARN_ON(!list_empty(&opp_table->opp_list))) {
 		ret = -EBUSY;
 		goto err;
 	}
 
 	/* Already have a regulator set */
-	if (WARN_ON(!IS_ERR(dev_opp->regulator))) {
+	if (WARN_ON(!IS_ERR(opp_table->regulator))) {
 		ret = -EBUSY;
 		goto err;
 	}
@@ -1458,15 +1463,15 @@ int dev_pm_opp_set_regulator(struct device *dev, const char *name)
 		goto err;
 	}
 
-	dev_opp->regulator = reg;
+	opp_table->regulator = reg;
 
-	mutex_unlock(&dev_opp_list_lock);
+	mutex_unlock(&opp_table_lock);
 	return 0;
 
 err:
-	_remove_device_opp(dev_opp);
+	_remove_opp_table(opp_table);
 unlock:
-	mutex_unlock(&dev_opp_list_lock);
+	mutex_unlock(&opp_table_lock);
 
 	return ret;
 }
@@ -1476,7 +1481,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_regulator);
  * dev_pm_opp_put_regulator() - Releases resources blocked for regulator
  * @dev: Device for which regulator was set.
  *
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
  * Hence this function internally uses RCU updater strategy with mutex locks
  * to keep the integrity of the internal data structures. Callers should ensure
  * that this function is *NOT* called under RCU protection or in contexts where
@@ -1484,44 +1489,45 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_regulator);
  */
 void dev_pm_opp_put_regulator(struct device *dev)
 {
-	struct device_opp *dev_opp;
+	struct opp_table *opp_table;
 
-	mutex_lock(&dev_opp_list_lock);
+	mutex_lock(&opp_table_lock);
 
-	/* Check for existing list for 'dev' first */
-	dev_opp = _find_device_opp(dev);
-	if (IS_ERR(dev_opp)) {
-		dev_err(dev, "Failed to find dev_opp: %ld\n", PTR_ERR(dev_opp));
+	/* Check for existing table for 'dev' first */
+	opp_table = _find_opp_table(dev);
+	if (IS_ERR(opp_table)) {
+		dev_err(dev, "Failed to find opp_table: %ld\n",
+			PTR_ERR(opp_table));
 		goto unlock;
 	}
 
-	if (IS_ERR(dev_opp->regulator)) {
+	if (IS_ERR(opp_table->regulator)) {
 		dev_err(dev, "%s: Doesn't have regulator set\n", __func__);
 		goto unlock;
 	}
 
-	/* Make sure there are no concurrent readers while updating dev_opp */
-	WARN_ON(!list_empty(&dev_opp->opp_list));
+	/* Make sure there are no concurrent readers while updating opp_table */
+	WARN_ON(!list_empty(&opp_table->opp_list));
 
-	regulator_put(dev_opp->regulator);
-	dev_opp->regulator = ERR_PTR(-ENXIO);
+	regulator_put(opp_table->regulator);
+	opp_table->regulator = ERR_PTR(-ENXIO);
 
-	/* Try freeing device_opp if this was the last blocking resource */
-	_remove_device_opp(dev_opp);
+	/* Try freeing opp_table if this was the last blocking resource */
+	_remove_opp_table(opp_table);
 
 unlock:
-	mutex_unlock(&dev_opp_list_lock);
+	mutex_unlock(&opp_table_lock);
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_put_regulator);
 
-static bool _opp_is_supported(struct device *dev, struct device_opp *dev_opp,
+static bool _opp_is_supported(struct device *dev, struct opp_table *opp_table,
 			      struct device_node *np)
 {
-	unsigned int count = dev_opp->supported_hw_count;
+	unsigned int count = opp_table->supported_hw_count;
 	u32 version;
 	int ret;
 
-	if (!dev_opp->supported_hw)
+	if (!opp_table->supported_hw)
 		return true;
 
 	while (count--) {
@@ -1534,7 +1540,7 @@ static bool _opp_is_supported(struct device *dev, struct device_opp *dev_opp,
 		}
 
 		/* Both of these are bitwise masks of the versions */
-		if (!(version & dev_opp->supported_hw[count]))
+		if (!(version & opp_table->supported_hw[count]))
 			return false;
 	}
 
@@ -1546,11 +1552,11 @@ static bool _opp_is_supported(struct device *dev, struct device_opp *dev_opp,
  * @dev:	device for which we do this operation
  * @np:		device node
  *
- * This function adds an opp definition to the opp list and returns status. The
+ * This function adds an opp definition to the opp table and returns status. The
  * opp can be controlled using dev_pm_opp_enable/disable functions and may be
  * removed by dev_pm_opp_remove.
  *
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
  * Hence this function internally uses RCU updater strategy with mutex locks
  * to keep the integrity of the internal data structures. Callers should ensure
  * that this function is *NOT* called under RCU protection or in contexts where
@@ -1566,16 +1572,16 @@ static bool _opp_is_supported(struct device *dev, struct device_opp *dev_opp,
  */
 static int _opp_add_static_v2(struct device *dev, struct device_node *np)
 {
-	struct device_opp *dev_opp;
+	struct opp_table *opp_table;
 	struct dev_pm_opp *new_opp;
 	u64 rate;
 	u32 val;
 	int ret;
 
-	/* Hold our list modification lock here */
-	mutex_lock(&dev_opp_list_lock);
+	/* Hold our table modification lock here */
+	mutex_lock(&opp_table_lock);
 
-	new_opp = _allocate_opp(dev, &dev_opp);
+	new_opp = _allocate_opp(dev, &opp_table);
 	if (!new_opp) {
 		ret = -ENOMEM;
 		goto unlock;
@@ -1588,7 +1594,7 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np)
 	}
 
 	/* Check if the OPP supports hardware's hierarchy of versions or not */
-	if (!_opp_is_supported(dev, dev_opp, np)) {
+	if (!_opp_is_supported(dev, opp_table, np)) {
 		dev_dbg(dev, "OPP not supported by hardware: %llu\n", rate);
 		goto free_opp;
 	}
@@ -1608,30 +1614,30 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np)
 	if (!of_property_read_u32(np, "clock-latency-ns", &val))
 		new_opp->clock_latency_ns = val;
 
-	ret = opp_parse_supplies(new_opp, dev, dev_opp);
+	ret = opp_parse_supplies(new_opp, dev, opp_table);
 	if (ret)
 		goto free_opp;
 
-	ret = _opp_add(dev, new_opp, dev_opp);
+	ret = _opp_add(dev, new_opp, opp_table);
 	if (ret)
 		goto free_opp;
 
 	/* OPP to select on device suspend */
 	if (of_property_read_bool(np, "opp-suspend")) {
-		if (dev_opp->suspend_opp) {
+		if (opp_table->suspend_opp) {
 			dev_warn(dev, "%s: Multiple suspend OPPs found (%lu %lu)\n",
-				 __func__, dev_opp->suspend_opp->rate,
+				 __func__, opp_table->suspend_opp->rate,
 				 new_opp->rate);
 		} else {
 			new_opp->suspend = true;
-			dev_opp->suspend_opp = new_opp;
+			opp_table->suspend_opp = new_opp;
 		}
 	}
 
-	if (new_opp->clock_latency_ns > dev_opp->clock_latency_ns_max)
-		dev_opp->clock_latency_ns_max = new_opp->clock_latency_ns;
+	if (new_opp->clock_latency_ns > opp_table->clock_latency_ns_max)
+		opp_table->clock_latency_ns_max = new_opp->clock_latency_ns;
 
-	mutex_unlock(&dev_opp_list_lock);
+	mutex_unlock(&opp_table_lock);
 
 	pr_debug("%s: turbo:%d rate:%lu uv:%lu uvmin:%lu uvmax:%lu latency:%lu\n",
 		 __func__, new_opp->turbo, new_opp->rate, new_opp->u_volt,
@@ -1642,13 +1648,13 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np)
 	 * Notify the changes in the availability of the operable
 	 * frequency/voltage list.
 	 */
-	srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ADD, new_opp);
+	srcu_notifier_call_chain(&opp_table->srcu_head, OPP_EVENT_ADD, new_opp);
 	return 0;
 
 free_opp:
-	_opp_remove(dev_opp, new_opp, false);
+	_opp_remove(opp_table, new_opp, false);
 unlock:
-	mutex_unlock(&dev_opp_list_lock);
+	mutex_unlock(&opp_table_lock);
 	return ret;
 }
 
@@ -1658,11 +1664,11 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np)
  * @freq:	Frequency in Hz for this OPP
  * @u_volt:	Voltage in uVolts for this OPP
  *
- * This function adds an opp definition to the opp list and returns status.
+ * This function adds an opp definition to the opp table and returns status.
  * The opp is made available by default and it can be controlled using
  * dev_pm_opp_enable/disable functions.
  *
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
  * Hence this function internally uses RCU updater strategy with mutex locks
  * to keep the integrity of the internal data structures. Callers should ensure
  * that this function is *NOT* called under RCU protection or in contexts where
@@ -1694,7 +1700,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_add);
  * copy operation, returns 0 if no modification was done OR modification was
  * successful.
  *
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
  * Hence this function internally uses RCU updater strategy with mutex locks to
  * keep the integrity of the internal data structures. Callers should ensure
  * that this function is *NOT* called under RCU protection or in contexts where
@@ -1703,7 +1709,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_add);
 static int _opp_set_availability(struct device *dev, unsigned long freq,
 				 bool availability_req)
 {
-	struct device_opp *dev_opp;
+	struct opp_table *opp_table;
 	struct dev_pm_opp *new_opp, *tmp_opp, *opp = ERR_PTR(-ENODEV);
 	int r = 0;
 
@@ -1712,18 +1718,18 @@ static int _opp_set_availability(struct device *dev, unsigned long freq,
 	if (!new_opp)
 		return -ENOMEM;
 
-	mutex_lock(&dev_opp_list_lock);
+	mutex_lock(&opp_table_lock);
 
-	/* Find the device_opp */
-	dev_opp = _find_device_opp(dev);
-	if (IS_ERR(dev_opp)) {
-		r = PTR_ERR(dev_opp);
+	/* Find the opp_table */
+	opp_table = _find_opp_table(dev);
+	if (IS_ERR(opp_table)) {
+		r = PTR_ERR(opp_table);
 		dev_warn(dev, "%s: Device OPP not found (%d)\n", __func__, r);
 		goto unlock;
 	}
 
 	/* Do we have the frequency? */
-	list_for_each_entry(tmp_opp, &dev_opp->opp_list, node) {
+	list_for_each_entry(tmp_opp, &opp_table->opp_list, node) {
 		if (tmp_opp->rate == freq) {
 			opp = tmp_opp;
 			break;
@@ -1744,21 +1750,21 @@ static int _opp_set_availability(struct device *dev, unsigned long freq,
 	new_opp->available = availability_req;
 
 	list_replace_rcu(&opp->node, &new_opp->node);
-	mutex_unlock(&dev_opp_list_lock);
-	call_srcu(&dev_opp->srcu_head.srcu, &opp->rcu_head, _kfree_opp_rcu);
+	mutex_unlock(&opp_table_lock);
+	call_srcu(&opp_table->srcu_head.srcu, &opp->rcu_head, _kfree_opp_rcu);
 
 	/* Notify the change of the OPP availability */
 	if (availability_req)
-		srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ENABLE,
-					 new_opp);
+		srcu_notifier_call_chain(&opp_table->srcu_head,
+					 OPP_EVENT_ENABLE, new_opp);
 	else
-		srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_DISABLE,
-					 new_opp);
+		srcu_notifier_call_chain(&opp_table->srcu_head,
+					 OPP_EVENT_DISABLE, new_opp);
 
 	return 0;
 
 unlock:
-	mutex_unlock(&dev_opp_list_lock);
+	mutex_unlock(&opp_table_lock);
 	kfree(new_opp);
 	return r;
 }
@@ -1772,7 +1778,7 @@ static int _opp_set_availability(struct device *dev, unsigned long freq,
  * corresponding error value. It is meant to be used for users an OPP available
  * after being temporarily made unavailable with dev_pm_opp_disable.
  *
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
  * Hence this function indirectly uses RCU and mutex locks to keep the
  * integrity of the internal data structures. Callers should ensure that
  * this function is *NOT* called under RCU protection or in contexts where
@@ -1798,7 +1804,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_enable);
  * control by users to make this OPP not available until the circumstances are
  * right to make it available again (with a call to dev_pm_opp_enable).
  *
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
  * Hence this function indirectly uses RCU and mutex locks to keep the
  * integrity of the internal data structures. Callers should ensure that
  * this function is *NOT* called under RCU protection or in contexts where
@@ -1816,26 +1822,26 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_disable);
 
 /**
  * dev_pm_opp_get_notifier() - find notifier_head of the device with opp
- * @dev:	device pointer used to lookup device OPPs.
+ * @dev:	device pointer used to lookup OPP table.
  *
  * Return: pointer to  notifier head if found, otherwise -ENODEV or
  * -EINVAL based on type of error casted as pointer. value must be checked
  *  with IS_ERR to determine valid pointer or error result.
  *
- * Locking: This function must be called under rcu_read_lock(). dev_opp is a RCU
- * protected pointer. The reason for the same is that the opp pointer which is
- * returned will remain valid for use with opp_get_{voltage, freq} only while
+ * Locking: This function must be called under rcu_read_lock(). opp_table is a
+ * RCU protected pointer. The reason for the same is that the opp pointer which
+ * is returned will remain valid for use with opp_get_{voltage, freq} only while
  * under the locked area. The pointer returned must be used prior to unlocking
  * with rcu_read_unlock() to maintain the integrity of the pointer.
  */
 struct srcu_notifier_head *dev_pm_opp_get_notifier(struct device *dev)
 {
-	struct device_opp *dev_opp = _find_device_opp(dev);
+	struct opp_table *opp_table = _find_opp_table(dev);
 
-	if (IS_ERR(dev_opp))
-		return ERR_CAST(dev_opp); /* matching type */
+	if (IS_ERR(opp_table))
+		return ERR_CAST(opp_table); /* matching type */
 
-	return &dev_opp->srcu_head;
+	return &opp_table->srcu_head;
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_get_notifier);
 
@@ -1843,11 +1849,11 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_notifier);
 /**
  * dev_pm_opp_of_remove_table() - Free OPP table entries created from static DT
  *				  entries
- * @dev:	device pointer used to lookup device OPPs.
+ * @dev:	device pointer used to lookup OPP table.
  *
  * Free OPPs created using static entries present in DT.
  *
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
  * Hence this function indirectly uses RCU updater strategy with mutex locks
  * to keep the integrity of the internal data structures. Callers should ensure
  * that this function is *NOT* called under RCU protection or in contexts where
@@ -1855,38 +1861,38 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_notifier);
  */
 void dev_pm_opp_of_remove_table(struct device *dev)
 {
-	struct device_opp *dev_opp;
+	struct opp_table *opp_table;
 	struct dev_pm_opp *opp, *tmp;
 
-	/* Hold our list modification lock here */
-	mutex_lock(&dev_opp_list_lock);
+	/* Hold our table modification lock here */
+	mutex_lock(&opp_table_lock);
 
-	/* Check for existing list for 'dev' */
-	dev_opp = _find_device_opp(dev);
-	if (IS_ERR(dev_opp)) {
-		int error = PTR_ERR(dev_opp);
+	/* Check for existing table for 'dev' */
+	opp_table = _find_opp_table(dev);
+	if (IS_ERR(opp_table)) {
+		int error = PTR_ERR(opp_table);
 
 		if (error != -ENODEV)
-			WARN(1, "%s: dev_opp: %d\n",
+			WARN(1, "%s: opp_table: %d\n",
 			     IS_ERR_OR_NULL(dev) ?
 					"Invalid device" : dev_name(dev),
 			     error);
 		goto unlock;
 	}
 
-	/* Find if dev_opp manages a single device */
-	if (list_is_singular(&dev_opp->dev_list)) {
+	/* Find if opp_table manages a single device */
+	if (list_is_singular(&opp_table->dev_list)) {
 		/* Free static OPPs */
-		list_for_each_entry_safe(opp, tmp, &dev_opp->opp_list, node) {
+		list_for_each_entry_safe(opp, tmp, &opp_table->opp_list, node) {
 			if (!opp->dynamic)
-				_opp_remove(dev_opp, opp, true);
+				_opp_remove(opp_table, opp, true);
 		}
 	} else {
-		_remove_list_dev(_find_list_dev(dev, dev_opp), dev_opp);
+		_remove_opp_dev(_find_opp_dev(dev, opp_table), opp_table);
 	}
 
 unlock:
-	mutex_unlock(&dev_opp_list_lock);
+	mutex_unlock(&opp_table_lock);
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_of_remove_table);
 
@@ -1907,22 +1913,22 @@ struct device_node *_of_get_opp_desc_node(struct device *dev)
 static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np)
 {
 	struct device_node *np;
-	struct device_opp *dev_opp;
+	struct opp_table *opp_table;
 	int ret = 0, count = 0;
 
-	mutex_lock(&dev_opp_list_lock);
+	mutex_lock(&opp_table_lock);
 
-	dev_opp = _managed_opp(opp_np);
-	if (dev_opp) {
+	opp_table = _managed_opp(opp_np);
+	if (opp_table) {
 		/* OPPs are already managed */
-		if (!_add_list_dev(dev, dev_opp))
+		if (!_add_opp_dev(dev, opp_table))
 			ret = -ENOMEM;
-		mutex_unlock(&dev_opp_list_lock);
+		mutex_unlock(&opp_table_lock);
 		return ret;
 	}
-	mutex_unlock(&dev_opp_list_lock);
+	mutex_unlock(&opp_table_lock);
 
-	/* We have opp-list node now, iterate over it and add OPPs */
+	/* We have opp-table node now, iterate over it and add OPPs */
 	for_each_available_child_of_node(opp_np, np) {
 		count++;
 
@@ -1938,19 +1944,19 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np)
 	if (WARN_ON(!count))
 		return -ENOENT;
 
-	mutex_lock(&dev_opp_list_lock);
+	mutex_lock(&opp_table_lock);
 
-	dev_opp = _find_device_opp(dev);
-	if (WARN_ON(IS_ERR(dev_opp))) {
-		ret = PTR_ERR(dev_opp);
-		mutex_unlock(&dev_opp_list_lock);
+	opp_table = _find_opp_table(dev);
+	if (WARN_ON(IS_ERR(opp_table))) {
+		ret = PTR_ERR(opp_table);
+		mutex_unlock(&opp_table_lock);
 		goto free_table;
 	}
 
-	dev_opp->np = opp_np;
-	dev_opp->shared_opp = of_property_read_bool(opp_np, "opp-shared");
+	opp_table->np = opp_np;
+	opp_table->shared_opp = of_property_read_bool(opp_np, "opp-shared");
 
-	mutex_unlock(&dev_opp_list_lock);
+	mutex_unlock(&opp_table_lock);
 
 	return 0;
 
@@ -1979,7 +1985,7 @@ static int _of_add_opp_table_v1(struct device *dev)
 	 */
 	nr = prop->length / sizeof(u32);
 	if (nr % 2) {
-		dev_err(dev, "%s: Invalid OPP list\n", __func__);
+		dev_err(dev, "%s: Invalid OPP table\n", __func__);
 		return -EINVAL;
 	}
 
@@ -1999,11 +2005,11 @@ static int _of_add_opp_table_v1(struct device *dev)
 
 /**
  * dev_pm_opp_of_add_table() - Initialize opp table from device tree
- * @dev:	device pointer used to lookup device OPPs.
+ * @dev:	device pointer used to lookup OPP table.
  *
  * Register the initial OPP table with the OPP library for given device.
  *
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
  * Hence this function indirectly uses RCU updater strategy with mutex locks
  * to keep the integrity of the internal data structures. Callers should ensure
  * that this function is *NOT* called under RCU protection or in contexts where
diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c
index 9f0c15570f64..ba2bdbd932ef 100644
--- a/drivers/base/power/opp/cpu.c
+++ b/drivers/base/power/opp/cpu.c
@@ -31,7 +31,7 @@
  * @table:	Cpufreq table returned back to caller
  *
  * Generate a cpufreq table for a provided device- this assumes that the
- * opp list is already initialized and ready for usage.
+ * opp table is already initialized and ready for usage.
  *
  * This function allocates required memory for the cpufreq table. It is
  * expected that the caller does the required maintenance such as freeing
@@ -44,7 +44,7 @@
  * WARNING: It is  important for the callers to ensure refreshing their copy of
  * the table if any of the mentioned functions have been invoked in the interim.
  *
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
  * Since we just use the regular accessor functions to access the internal data
  * structures, we use RCU read lock inside this function. As a result, users of
  * this function DONOT need to use explicit locks for invoking.
@@ -122,15 +122,15 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_free_cpufreq_table);
 /* Required only for V1 bindings, as v2 can manage it from DT itself */
 int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
 {
-	struct device_list_opp *list_dev;
-	struct device_opp *dev_opp;
+	struct opp_device *opp_dev;
+	struct opp_table *opp_table;
 	struct device *dev;
 	int cpu, ret = 0;
 
-	mutex_lock(&dev_opp_list_lock);
+	mutex_lock(&opp_table_lock);
 
-	dev_opp = _find_device_opp(cpu_dev);
-	if (IS_ERR(dev_opp)) {
+	opp_table = _find_opp_table(cpu_dev);
+	if (IS_ERR(opp_table)) {
 		ret = -EINVAL;
 		goto unlock;
 	}
@@ -146,15 +146,15 @@ int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
 			continue;
 		}
 
-		list_dev = _add_list_dev(dev, dev_opp);
-		if (!list_dev) {
-			dev_err(dev, "%s: failed to add list-dev for cpu%d device\n",
+		opp_dev = _add_opp_dev(dev, opp_table);
+		if (!opp_dev) {
+			dev_err(dev, "%s: failed to add opp-dev for cpu%d device\n",
 				__func__, cpu);
 			continue;
 		}
 	}
 unlock:
-	mutex_unlock(&dev_opp_list_lock);
+	mutex_unlock(&opp_table_lock);
 
 	return ret;
 }
diff --git a/drivers/base/power/opp/debugfs.c b/drivers/base/power/opp/debugfs.c
index ddfe4773e922..ef1ae6b52042 100644
--- a/drivers/base/power/opp/debugfs.c
+++ b/drivers/base/power/opp/debugfs.c
@@ -34,9 +34,9 @@ void opp_debug_remove_one(struct dev_pm_opp *opp)
 	debugfs_remove_recursive(opp->dentry);
 }
 
-int opp_debug_create_one(struct dev_pm_opp *opp, struct device_opp *dev_opp)
+int opp_debug_create_one(struct dev_pm_opp *opp, struct opp_table *opp_table)
 {
-	struct dentry *pdentry = dev_opp->dentry;
+	struct dentry *pdentry = opp_table->dentry;
 	struct dentry *d;
 	char name[25];	/* 20 chars for 64 bit value + 5 (opp:\0) */
 
@@ -83,52 +83,52 @@ int opp_debug_create_one(struct dev_pm_opp *opp, struct device_opp *dev_opp)
 	return 0;
 }
 
-static int device_opp_debug_create_dir(struct device_list_opp *list_dev,
-				       struct device_opp *dev_opp)
+static int opp_list_debug_create_dir(struct opp_device *opp_dev,
+				     struct opp_table *opp_table)
 {
-	const struct device *dev = list_dev->dev;
+	const struct device *dev = opp_dev->dev;
 	struct dentry *d;
 
-	opp_set_dev_name(dev, dev_opp->dentry_name);
+	opp_set_dev_name(dev, opp_table->dentry_name);
 
 	/* Create device specific directory */
-	d = debugfs_create_dir(dev_opp->dentry_name, rootdir);
+	d = debugfs_create_dir(opp_table->dentry_name, rootdir);
 	if (!d) {
 		dev_err(dev, "%s: Failed to create debugfs dir\n", __func__);
 		return -ENOMEM;
 	}
 
-	list_dev->dentry = d;
-	dev_opp->dentry = d;
+	opp_dev->dentry = d;
+	opp_table->dentry = d;
 
 	return 0;
 }
 
-static int device_opp_debug_create_link(struct device_list_opp *list_dev,
-					struct device_opp *dev_opp)
+static int opp_list_debug_create_link(struct opp_device *opp_dev,
+				      struct opp_table *opp_table)
 {
-	const struct device *dev = list_dev->dev;
+	const struct device *dev = opp_dev->dev;
 	char name[NAME_MAX];
 	struct dentry *d;
 
-	opp_set_dev_name(list_dev->dev, name);
+	opp_set_dev_name(opp_dev->dev, name);
 
 	/* Create device specific directory link */
-	d = debugfs_create_symlink(name, rootdir, dev_opp->dentry_name);
+	d = debugfs_create_symlink(name, rootdir, opp_table->dentry_name);
 	if (!d) {
 		dev_err(dev, "%s: Failed to create link\n", __func__);
 		return -ENOMEM;
 	}
 
-	list_dev->dentry = d;
+	opp_dev->dentry = d;
 
 	return 0;
 }
 
 /**
  * opp_debug_register - add a device opp node to the debugfs 'opp' directory
- * @list_dev: list-dev pointer for device
- * @dev_opp: the device-opp being added
+ * @opp_dev: opp-dev pointer for device
+ * @opp_table: the device-opp being added
  *
  * Dynamically adds device specific directory in debugfs 'opp' directory. If the
  * device-opp is shared with other devices, then links will be created for all
@@ -136,73 +136,72 @@ static int device_opp_debug_create_link(struct device_list_opp *list_dev,
  *
  * Return: 0 on success, otherwise negative error.
  */
-int opp_debug_register(struct device_list_opp *list_dev,
-		       struct device_opp *dev_opp)
+int opp_debug_register(struct opp_device *opp_dev, struct opp_table *opp_table)
 {
 	if (!rootdir) {
 		pr_debug("%s: Uninitialized rootdir\n", __func__);
 		return -EINVAL;
 	}
 
-	if (dev_opp->dentry)
-		return device_opp_debug_create_link(list_dev, dev_opp);
+	if (opp_table->dentry)
+		return opp_list_debug_create_link(opp_dev, opp_table);
 
-	return device_opp_debug_create_dir(list_dev, dev_opp);
+	return opp_list_debug_create_dir(opp_dev, opp_table);
 }
 
-static void opp_migrate_dentry(struct device_list_opp *list_dev,
-			       struct device_opp *dev_opp)
+static void opp_migrate_dentry(struct opp_device *opp_dev,
+			       struct opp_table *opp_table)
 {
-	struct device_list_opp *new_dev;
+	struct opp_device *new_dev;
 	const struct device *dev;
 	struct dentry *dentry;
 
-	/* Look for next list-dev */
-	list_for_each_entry(new_dev, &dev_opp->dev_list, node)
-		if (new_dev != list_dev)
+	/* Look for next opp-dev */
+	list_for_each_entry(new_dev, &opp_table->dev_list, node)
+		if (new_dev != opp_dev)
 			break;
 
 	/* new_dev is guaranteed to be valid here */
 	dev = new_dev->dev;
 	debugfs_remove_recursive(new_dev->dentry);
 
-	opp_set_dev_name(dev, dev_opp->dentry_name);
+	opp_set_dev_name(dev, opp_table->dentry_name);
 
-	dentry = debugfs_rename(rootdir, list_dev->dentry, rootdir,
-				dev_opp->dentry_name);
+	dentry = debugfs_rename(rootdir, opp_dev->dentry, rootdir,
+				opp_table->dentry_name);
 	if (!dentry) {
 		dev_err(dev, "%s: Failed to rename link from: %s to %s\n",
-			__func__, dev_name(list_dev->dev), dev_name(dev));
+			__func__, dev_name(opp_dev->dev), dev_name(dev));
 		return;
 	}
 
 	new_dev->dentry = dentry;
-	dev_opp->dentry = dentry;
+	opp_table->dentry = dentry;
 }
 
 /**
  * opp_debug_unregister - remove a device opp node from debugfs opp directory
- * @list_dev: list-dev pointer for device
- * @dev_opp: the device-opp being removed
+ * @opp_dev: opp-dev pointer for device
+ * @opp_table: the device-opp being removed
  *
  * Dynamically removes device specific directory from debugfs 'opp' directory.
  */
-void opp_debug_unregister(struct device_list_opp *list_dev,
-			  struct device_opp *dev_opp)
+void opp_debug_unregister(struct opp_device *opp_dev,
+			  struct opp_table *opp_table)
 {
-	if (list_dev->dentry == dev_opp->dentry) {
+	if (opp_dev->dentry == opp_table->dentry) {
 		/* Move the real dentry object under another device */
-		if (!list_is_singular(&dev_opp->dev_list)) {
-			opp_migrate_dentry(list_dev, dev_opp);
+		if (!list_is_singular(&opp_table->dev_list)) {
+			opp_migrate_dentry(opp_dev, opp_table);
 			goto out;
 		}
-		dev_opp->dentry = NULL;
+		opp_table->dentry = NULL;
 	}
 
-	debugfs_remove_recursive(list_dev->dentry);
+	debugfs_remove_recursive(opp_dev->dentry);
 
 out:
-	list_dev->dentry = NULL;
+	opp_dev->dentry = NULL;
 }
 
 static int __init opp_debug_init(void)
diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h
index 4f1bdfc7da03..f67f806fcf3a 100644
--- a/drivers/base/power/opp/opp.h
+++ b/drivers/base/power/opp/opp.h
@@ -26,12 +26,12 @@ struct clk;
 struct regulator;
 
 /* Lock to allow exclusive modification to the device and opp lists */
-extern struct mutex dev_opp_list_lock;
+extern struct mutex opp_table_lock;
 
 /*
  * Internal data structure organization with the OPP layer library is as
  * follows:
- * dev_opp_list (root)
+ * opp_tables (root)
  *	|- device 1 (represents voltage domain 1)
  *	|	|- opp 1 (availability, freq, voltage)
  *	|	|- opp 2 ..
@@ -40,18 +40,18 @@ extern struct mutex dev_opp_list_lock;
  *	|- device 2 (represents the next voltage domain)
  *	...
  *	`- device m (represents mth voltage domain)
- * device 1, 2.. are represented by dev_opp structure while each opp
+ * device 1, 2.. are represented by opp_table structure while each opp
  * is represented by the opp structure.
  */
 
 /**
  * struct dev_pm_opp - Generic OPP description structure
- * @node:	opp list node. The nodes are maintained throughout the lifetime
+ * @node:	opp table node. The nodes are maintained throughout the lifetime
  *		of boot. It is expected only an optimal set of OPPs are
  *		added to the library by the SoC framework.
- *		RCU usage: opp list is traversed with RCU locks. node
+ *		RCU usage: opp table is traversed with RCU locks. node
  *		modification is possible realtime, hence the modifications
- *		are protected by the dev_opp_list_lock for integrity.
+ *		are protected by the opp_table_lock for integrity.
  *		IMPORTANT: the opp nodes should be maintained in increasing
  *		order.
  * @available:	true/false - marks if this OPP as available or not
@@ -65,7 +65,7 @@ extern struct mutex dev_opp_list_lock;
  * @u_amp:	Maximum current drawn by the device in microamperes
  * @clock_latency_ns: Latency (in nanoseconds) of switching to this OPP's
  *		frequency from any other OPP's frequency.
- * @dev_opp:	points back to the device_opp struct this opp belongs to
+ * @opp_table:	points back to the opp_table struct this opp belongs to
  * @rcu_head:	RCU callback head used for deferred freeing
  * @np:		OPP's device node.
  * @dentry:	debugfs dentry pointer (per opp)
@@ -87,7 +87,7 @@ struct dev_pm_opp {
 	unsigned long u_amp;
 	unsigned long clock_latency_ns;
 
-	struct device_opp *dev_opp;
+	struct opp_table *opp_table;
 	struct rcu_head rcu_head;
 
 	struct device_node *np;
@@ -98,16 +98,16 @@ struct dev_pm_opp {
 };
 
 /**
- * struct device_list_opp - devices managed by 'struct device_opp'
+ * struct opp_device - devices managed by 'struct opp_table'
  * @node:	list node
  * @dev:	device to which the struct object belongs
  * @rcu_head:	RCU callback head used for deferred freeing
  * @dentry:	debugfs dentry pointer (per device)
  *
- * This is an internal data structure maintaining the list of devices that are
- * managed by 'struct device_opp'.
+ * This is an internal data structure maintaining the devices that are managed
+ * by 'struct opp_table'.
  */
-struct device_list_opp {
+struct opp_device {
 	struct list_head node;
 	const struct device *dev;
 	struct rcu_head rcu_head;
@@ -118,16 +118,16 @@ struct device_list_opp {
 };
 
 /**
- * struct device_opp - Device opp structure
- * @node:	list node - contains the devices with OPPs that
+ * struct opp_table - Device opp structure
+ * @node:	table node - contains the devices with OPPs that
  *		have been registered. Nodes once added are not modified in this
- *		list.
- *		RCU usage: nodes are not modified in the list of device_opp,
- *		however addition is possible and is secured by dev_opp_list_lock
+ *		table.
+ *		RCU usage: nodes are not modified in the table of opp_table,
+ *		however addition is possible and is secured by opp_table_lock
  * @srcu_head:	notifier head to notify the OPP availability changes.
  * @rcu_head:	RCU callback head used for deferred freeing
  * @dev_list:	list of devices that share these OPPs
- * @opp_list:	list of opps
+ * @opp_list:	table of opps
  * @np:		struct device_node pointer for opp's DT node.
  * @clock_latency_ns_max: Max clock latency in nanoseconds.
  * @shared_opp: OPP is shared between multiple devices.
@@ -150,7 +150,7 @@ struct device_list_opp {
  * need to wait for the grace period of both of them before freeing any
  * resources. And so we have used kfree_rcu() from within call_srcu() handlers.
  */
-struct device_opp {
+struct opp_table {
 	struct list_head node;
 
 	struct srcu_notifier_head srcu_head;
@@ -180,30 +180,27 @@ struct device_opp {
 };
 
 /* Routines internal to opp core */
-struct device_opp *_find_device_opp(struct device *dev);
-struct device_list_opp *_add_list_dev(const struct device *dev,
-				      struct device_opp *dev_opp);
+struct opp_table *_find_opp_table(struct device *dev);
+struct opp_device *_add_opp_dev(const struct device *dev, struct opp_table *opp_table);
 struct device_node *_of_get_opp_desc_node(struct device *dev);
 
 #ifdef CONFIG_DEBUG_FS
 void opp_debug_remove_one(struct dev_pm_opp *opp);
-int opp_debug_create_one(struct dev_pm_opp *opp, struct device_opp *dev_opp);
-int opp_debug_register(struct device_list_opp *list_dev,
-		       struct device_opp *dev_opp);
-void opp_debug_unregister(struct device_list_opp *list_dev,
-			  struct device_opp *dev_opp);
+int opp_debug_create_one(struct dev_pm_opp *opp, struct opp_table *opp_table);
+int opp_debug_register(struct opp_device *opp_dev, struct opp_table *opp_table);
+void opp_debug_unregister(struct opp_device *opp_dev, struct opp_table *opp_table);
 #else
 static inline void opp_debug_remove_one(struct dev_pm_opp *opp) {}
 
 static inline int opp_debug_create_one(struct dev_pm_opp *opp,
-				       struct device_opp *dev_opp)
+				       struct opp_table *opp_table)
 { return 0; }
-static inline int opp_debug_register(struct device_list_opp *list_dev,
-				     struct device_opp *dev_opp)
+static inline int opp_debug_register(struct opp_device *opp_dev,
+				     struct opp_table *opp_table)
 { return 0; }
 
-static inline void opp_debug_unregister(struct device_list_opp *list_dev,
-					struct device_opp *dev_opp)
+static inline void opp_debug_unregister(struct opp_device *opp_dev,
+					struct opp_table *opp_table)
 { }
 #endif		/* DEBUG_FS */
 

From 5b1a8eb56e49d9bbf9075551804432a7b3b635a7 Mon Sep 17 00:00:00 2001
From: Punit Agrawal <punit.agrawal@arm.com>
Date: Tue, 17 Nov 2015 12:06:22 +0000
Subject: [PATCH 089/797] cpufreq-dt: Supply power coefficient when registering
 cooling devices

Support registering cooling devices with dynamic power coefficient
where provided by the device tree. This allows OF registered cooling
devices driver to be used with the power_allocator thermal governor.

Signed-off-by: Punit Agrawal <punit.agrawal@arm.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit f8fa8ae06b8c2c25d81c99766f9226adc5c3e073)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/cpufreq/cpufreq-dt.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 68232fef54c9..fc61cab00131 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -406,8 +406,13 @@ static void cpufreq_ready(struct cpufreq_policy *policy)
 	 * thermal DT code takes care of matching them.
 	 */
 	if (of_find_property(np, "#cooling-cells", NULL)) {
-		priv->cdev = of_cpufreq_cooling_register(np,
-							 policy->related_cpus);
+		u32 power_coefficient = 0;
+
+		of_property_read_u32(np, "dynamic-power-coefficient",
+				     &power_coefficient);
+
+		priv->cdev = of_cpufreq_power_cooling_register(np,
+				policy->related_cpus, power_coefficient, NULL);
 		if (IS_ERR(priv->cdev)) {
 			dev_err(priv->cpu_dev,
 				"running cpufreq without cooling device: %ld\n",

From 2c7296a879c7957ef8bfda8b21c3da432eeedf5d Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Wed, 30 Dec 2015 12:18:42 +0100
Subject: [PATCH 090/797] cpufreq-dt: fix handling regulator_get_voltage()
 result

The function can return negative values so it should be assigned
to signed type.

The problem has been detected using proposed semantic patch
scripts/coccinelle/tests/unsigned_lesser_than_zero.cocci.

Link: http://permalink.gmane.org/gmane.linux.kernel/2038576
Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit 929ca89c305a6ed7a4149115be99af6d73c36918)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/cpufreq/cpufreq-dt.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index fc61cab00131..0ca74d070058 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -50,7 +50,8 @@ static int set_target(struct cpufreq_policy *policy, unsigned int index)
 	struct private_data *priv = policy->driver_data;
 	struct device *cpu_dev = priv->cpu_dev;
 	struct regulator *cpu_reg = priv->cpu_reg;
-	unsigned long volt = 0, volt_old = 0, tol = 0;
+	unsigned long volt = 0, tol = 0;
+	int volt_old = 0;
 	unsigned int old_freq, new_freq;
 	long freq_Hz, freq_exact;
 	int ret;
@@ -83,7 +84,7 @@ static int set_target(struct cpufreq_policy *policy, unsigned int index)
 			opp_freq / 1000, volt);
 	}
 
-	dev_dbg(cpu_dev, "%u MHz, %ld mV --> %u MHz, %ld mV\n",
+	dev_dbg(cpu_dev, "%u MHz, %d mV --> %u MHz, %ld mV\n",
 		old_freq / 1000, (volt_old > 0) ? volt_old / 1000 : -1,
 		new_freq / 1000, volt ? volt / 1000 : -1);
 

From abd485677e17532c9edfa11ec1f46bf2f2dff037 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 9 Feb 2016 10:30:40 +0530
Subject: [PATCH 091/797] cpufreq: dt: Convert few pr_debug/err() calls to
 dev_dbg/err()

We have the device structure available now, lets use it for better print
messages.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit 896d6a4c0f41a93809b83f9e58aad73874a89d99)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/cpufreq/cpufreq-dt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 0ca74d070058..ace0168274d4 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -246,7 +246,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 	 */
 	ret = dev_pm_opp_get_opp_count(cpu_dev);
 	if (ret <= 0) {
-		pr_debug("OPP table is not ready, deferring probe\n");
+		dev_dbg(cpu_dev, "OPP table is not ready, deferring probe\n");
 		ret = -EPROBE_DEFER;
 		goto out_free_opp;
 	}
@@ -325,7 +325,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 
 	ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table);
 	if (ret) {
-		pr_err("failed to init cpufreq table: %d\n", ret);
+		dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret);
 		goto out_free_priv;
 	}
 

From c56192201536f960a81bbf845c4667a9c79a2439 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 9 Feb 2016 10:30:41 +0530
Subject: [PATCH 092/797] cpufreq: dt: Rename 'need_update' to 'opp_v1'

That's the real purpose of this field, i.e. to take special care of old
OPP V1 bindings. Lets name it accordingly, so that it can be used
elsewhere.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit 457e99e60a8f5a40b7da204c0bfc8a86ad2161b9)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/cpufreq/cpufreq-dt.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index ace0168274d4..0047d20803db 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -199,7 +199,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 	struct dev_pm_opp *suspend_opp;
 	unsigned long min_uV = ~0, max_uV = 0;
 	unsigned int transition_latency;
-	bool need_update = false;
+	bool opp_v1 = false;
 	int ret;
 
 	ret = allocate_resources(policy->cpu, &cpu_dev, &cpu_reg, &cpu_clk);
@@ -223,7 +223,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 		 * finding shared-OPPs for backward compatibility.
 		 */
 		if (ret == -ENOENT)
-			need_update = true;
+			opp_v1 = true;
 		else
 			goto out_node_put;
 	}
@@ -251,7 +251,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 		goto out_free_opp;
 	}
 
-	if (need_update) {
+	if (opp_v1) {
 		struct cpufreq_dt_platform_data *pd = cpufreq_get_driver_data();
 
 		if (!pd || !pd->independent_clocks)

From c9be30497a25decc31658f82603c4647c59b5e08 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 9 Feb 2016 10:30:42 +0530
Subject: [PATCH 093/797] cpufreq: dt: OPP layers handles clock-latency for V1
 bindings as well

"clock-latency" is handled by OPP layer for all bindings and so there is
no need to make special calls for V1 bindings. Use
dev_pm_opp_get_max_clock_latency() for both the cases.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit 391d9aef8145204e0a5d67be3bd1fc45c5396dae)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/cpufreq/cpufreq-dt.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 0047d20803db..4c9f8a828f6f 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -265,10 +265,6 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 		if (ret)
 			dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n",
 				__func__, ret);
-
-		of_property_read_u32(np, "clock-latency", &transition_latency);
-	} else {
-		transition_latency = dev_pm_opp_get_max_clock_latency(cpu_dev);
 	}
 
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
@@ -279,6 +275,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 
 	of_property_read_u32(np, "voltage-tolerance", &priv->voltage_tolerance);
 
+	transition_latency = dev_pm_opp_get_max_clock_latency(cpu_dev);
 	if (!transition_latency)
 		transition_latency = CPUFREQ_ETERNAL;
 

From 99753a20c8e9cbdfcca7431a0026506c01a0bbe1 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 9 Feb 2016 10:30:43 +0530
Subject: [PATCH 094/797] cpufreq: dt: Pass regulator name to the OPP core

OPP core can handle the regulators by itself, and but it needs to know
the name of the regulator to fetch. Add support for that.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit 050794aaebbb9f2c2c50b340b6998273e7c64189)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/cpufreq/cpufreq-dt.c | 46 ++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 4c9f8a828f6f..2af75f8088bb 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -34,6 +34,7 @@ struct private_data {
 	struct regulator *cpu_reg;
 	struct thermal_cooling_device *cdev;
 	unsigned int voltage_tolerance; /* in percentage */
+	const char *reg_name;
 };
 
 static struct freq_attr *cpufreq_dt_attr[] = {
@@ -119,6 +120,30 @@ static int set_target(struct cpufreq_policy *policy, unsigned int index)
 	return ret;
 }
 
+/*
+ * An earlier version of opp-v1 bindings used to name the regulator
+ * "cpu0-supply", we still need to handle that for backwards compatibility.
+ */
+static const char *find_supply_name(struct device *dev, struct device_node *np)
+{
+	struct property *pp;
+	int cpu = dev->id;
+
+	/* Try "cpu0" for older DTs */
+	if (!cpu) {
+		pp = of_find_property(np, "cpu0-supply", NULL);
+		if (pp)
+			return "cpu0";
+	}
+
+	pp = of_find_property(np, "cpu-supply", NULL);
+	if (pp)
+		return "cpu";
+
+	dev_dbg(dev, "no regulator for cpu%d\n", cpu);
+	return NULL;
+}
+
 static int allocate_resources(int cpu, struct device **cdev,
 			      struct regulator **creg, struct clk **cclk)
 {
@@ -200,6 +225,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 	unsigned long min_uV = ~0, max_uV = 0;
 	unsigned int transition_latency;
 	bool opp_v1 = false;
+	const char *name;
 	int ret;
 
 	ret = allocate_resources(policy->cpu, &cpu_dev, &cpu_reg, &cpu_clk);
@@ -228,6 +254,20 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 			goto out_node_put;
 	}
 
+	/*
+	 * OPP layer will be taking care of regulators now, but it needs to know
+	 * the name of the regulator first.
+	 */
+	name = find_supply_name(cpu_dev, np);
+	if (name) {
+		ret = dev_pm_opp_set_regulator(cpu_dev, name);
+		if (ret) {
+			dev_err(cpu_dev, "Failed to set regulator for cpu%d: %d\n",
+				policy->cpu, ret);
+			goto out_node_put;
+		}
+	}
+
 	/*
 	 * Initialize OPP tables for all policy->cpus. They will be shared by
 	 * all CPUs which have marked their CPUs shared with OPP bindings.
@@ -273,6 +313,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 		goto out_free_opp;
 	}
 
+	priv->reg_name = name;
 	of_property_read_u32(np, "voltage-tolerance", &priv->voltage_tolerance);
 
 	transition_latency = dev_pm_opp_get_max_clock_latency(cpu_dev);
@@ -366,6 +407,8 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 	kfree(priv);
 out_free_opp:
 	dev_pm_opp_of_cpumask_remove_table(policy->cpus);
+	if (name)
+		dev_pm_opp_put_regulator(cpu_dev);
 out_node_put:
 	of_node_put(np);
 out_put_reg_clk:
@@ -383,6 +426,9 @@ static int cpufreq_exit(struct cpufreq_policy *policy)
 	cpufreq_cooling_unregister(priv->cdev);
 	dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table);
 	dev_pm_opp_of_cpumask_remove_table(policy->related_cpus);
+	if (priv->reg_name)
+		dev_pm_opp_put_regulator(priv->cpu_dev);
+
 	clk_put(policy->clk);
 	if (!IS_ERR(priv->cpu_reg))
 		regulator_put(priv->cpu_reg);

From 70969a89d8212f3d77a4c67712fbbd22c465d50e Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 9 Feb 2016 10:30:44 +0530
Subject: [PATCH 095/797] cpufreq: dt: Unsupported OPPs are already disabled

The core already have a valid regulator set for the device opp and the
unsupported OPPs are already disabled by the core. There is no need to
repeat that in the user drivers, get rid of it.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit 6def6ea75e6dea45f01a16ae3cfb5b5ce48dd5e9)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/cpufreq/cpufreq-dt.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 2af75f8088bb..c3fe89461ff4 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -349,8 +349,6 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 					min_uV = opp_uV;
 				if (opp_uV > max_uV)
 					max_uV = opp_uV;
-			} else {
-				dev_pm_opp_disable(cpu_dev, opp_freq);
 			}
 
 			opp_freq++;

From ba7b484ca022afe4fc4b64b3f4ed359a57a93831 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 9 Feb 2016 10:30:45 +0530
Subject: [PATCH 096/797] cpufreq: dt: Reuse
 dev_pm_opp_get_max_transition_latency()

OPP layer has all the information now to calculate transition latency
(clock_latency + voltage_latency). Lets reuse the OPP layer helper
dev_pm_opp_get_max_transition_latency() instead of open coding the same
in cpufreq-dt driver.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit 755b888ff098c9f762717a9fbda7e05b16619069)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/cpufreq/cpufreq-dt.c | 48 +++---------------------------------
 1 file changed, 4 insertions(+), 44 deletions(-)

diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index c3fe89461ff4..6f80ce56b4ec 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -222,7 +222,6 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 	struct regulator *cpu_reg;
 	struct clk *cpu_clk;
 	struct dev_pm_opp *suspend_opp;
-	unsigned long min_uV = ~0, max_uV = 0;
 	unsigned int transition_latency;
 	bool opp_v1 = false;
 	const char *name;
@@ -316,49 +315,6 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 	priv->reg_name = name;
 	of_property_read_u32(np, "voltage-tolerance", &priv->voltage_tolerance);
 
-	transition_latency = dev_pm_opp_get_max_clock_latency(cpu_dev);
-	if (!transition_latency)
-		transition_latency = CPUFREQ_ETERNAL;
-
-	if (!IS_ERR(cpu_reg)) {
-		unsigned long opp_freq = 0;
-
-		/*
-		 * Disable any OPPs where the connected regulator isn't able to
-		 * provide the specified voltage and record minimum and maximum
-		 * voltage levels.
-		 */
-		while (1) {
-			struct dev_pm_opp *opp;
-			unsigned long opp_uV, tol_uV;
-
-			rcu_read_lock();
-			opp = dev_pm_opp_find_freq_ceil(cpu_dev, &opp_freq);
-			if (IS_ERR(opp)) {
-				rcu_read_unlock();
-				break;
-			}
-			opp_uV = dev_pm_opp_get_voltage(opp);
-			rcu_read_unlock();
-
-			tol_uV = opp_uV * priv->voltage_tolerance / 100;
-			if (regulator_is_supported_voltage(cpu_reg,
-							   opp_uV - tol_uV,
-							   opp_uV + tol_uV)) {
-				if (opp_uV < min_uV)
-					min_uV = opp_uV;
-				if (opp_uV > max_uV)
-					max_uV = opp_uV;
-			}
-
-			opp_freq++;
-		}
-
-		ret = regulator_set_voltage_time(cpu_reg, min_uV, max_uV);
-		if (ret > 0)
-			transition_latency += ret * 1000;
-	}
-
 	ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table);
 	if (ret) {
 		dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret);
@@ -393,6 +349,10 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 		cpufreq_dt_attr[1] = &cpufreq_freq_attr_scaling_boost_freqs;
 	}
 
+	transition_latency = dev_pm_opp_get_max_transition_latency(cpu_dev);
+	if (!transition_latency)
+		transition_latency = CPUFREQ_ETERNAL;
+
 	policy->cpuinfo.transition_latency = transition_latency;
 
 	of_node_put(np);

From 5da10c86b41732e6bc4444cf037494d3968baa1a Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 9 Feb 2016 10:30:46 +0530
Subject: [PATCH 097/797] cpufreq: dt: Use dev_pm_opp_set_rate() to switch
 frequency

OPP core supports frequency/voltage changes based on the target
frequency now, use that instead of open coding the same in cpufreq-dt
driver.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit 78c3ba5df96c875b1668e1cd3ee0a69e62454f32)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/cpufreq/cpufreq-dt.c | 73 +-----------------------------------
 1 file changed, 2 insertions(+), 71 deletions(-)

diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 6f80ce56b4ec..150a172c7d0a 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -45,79 +45,10 @@ static struct freq_attr *cpufreq_dt_attr[] = {
 
 static int set_target(struct cpufreq_policy *policy, unsigned int index)
 {
-	struct dev_pm_opp *opp;
-	struct cpufreq_frequency_table *freq_table = policy->freq_table;
-	struct clk *cpu_clk = policy->clk;
 	struct private_data *priv = policy->driver_data;
-	struct device *cpu_dev = priv->cpu_dev;
-	struct regulator *cpu_reg = priv->cpu_reg;
-	unsigned long volt = 0, tol = 0;
-	int volt_old = 0;
-	unsigned int old_freq, new_freq;
-	long freq_Hz, freq_exact;
-	int ret;
 
-	freq_Hz = clk_round_rate(cpu_clk, freq_table[index].frequency * 1000);
-	if (freq_Hz <= 0)
-		freq_Hz = freq_table[index].frequency * 1000;
-
-	freq_exact = freq_Hz;
-	new_freq = freq_Hz / 1000;
-	old_freq = clk_get_rate(cpu_clk) / 1000;
-
-	if (!IS_ERR(cpu_reg)) {
-		unsigned long opp_freq;
-
-		rcu_read_lock();
-		opp = dev_pm_opp_find_freq_ceil(cpu_dev, &freq_Hz);
-		if (IS_ERR(opp)) {
-			rcu_read_unlock();
-			dev_err(cpu_dev, "failed to find OPP for %ld\n",
-				freq_Hz);
-			return PTR_ERR(opp);
-		}
-		volt = dev_pm_opp_get_voltage(opp);
-		opp_freq = dev_pm_opp_get_freq(opp);
-		rcu_read_unlock();
-		tol = volt * priv->voltage_tolerance / 100;
-		volt_old = regulator_get_voltage(cpu_reg);
-		dev_dbg(cpu_dev, "Found OPP: %ld kHz, %ld uV\n",
-			opp_freq / 1000, volt);
-	}
-
-	dev_dbg(cpu_dev, "%u MHz, %d mV --> %u MHz, %ld mV\n",
-		old_freq / 1000, (volt_old > 0) ? volt_old / 1000 : -1,
-		new_freq / 1000, volt ? volt / 1000 : -1);
-
-	/* scaling up?  scale voltage before frequency */
-	if (!IS_ERR(cpu_reg) && new_freq > old_freq) {
-		ret = regulator_set_voltage_tol(cpu_reg, volt, tol);
-		if (ret) {
-			dev_err(cpu_dev, "failed to scale voltage up: %d\n",
-				ret);
-			return ret;
-		}
-	}
-
-	ret = clk_set_rate(cpu_clk, freq_exact);
-	if (ret) {
-		dev_err(cpu_dev, "failed to set clock rate: %d\n", ret);
-		if (!IS_ERR(cpu_reg) && volt_old > 0)
-			regulator_set_voltage_tol(cpu_reg, volt_old, tol);
-		return ret;
-	}
-
-	/* scaling down?  scale voltage after frequency */
-	if (!IS_ERR(cpu_reg) && new_freq < old_freq) {
-		ret = regulator_set_voltage_tol(cpu_reg, volt, tol);
-		if (ret) {
-			dev_err(cpu_dev, "failed to scale voltage down: %d\n",
-				ret);
-			clk_set_rate(cpu_clk, old_freq * 1000);
-		}
-	}
-
-	return ret;
+	return dev_pm_opp_set_rate(priv->cpu_dev,
+				   policy->freq_table[index].frequency * 1000);
 }
 
 /*

From 4b2c9f2ab35e19c8de1a4e50d798a49d98e5c39d Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 9 Feb 2016 10:30:47 +0530
Subject: [PATCH 098/797] cpufreq: dt: No need to fetch voltage-tolerance

Its already done by core and we don't need to get it anymore.  And so,
we don't need to get of node in cpufreq_init() anymore, move that to
find_supply_name() instead.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit df2c8ec28e73d47392b8cb24828c15c54819da41)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/cpufreq/cpufreq-dt.c | 46 ++++++++++++++++++------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 150a172c7d0a..bbafd7b63d1a 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -33,7 +33,6 @@ struct private_data {
 	struct device *cpu_dev;
 	struct regulator *cpu_reg;
 	struct thermal_cooling_device *cdev;
-	unsigned int voltage_tolerance; /* in percentage */
 	const char *reg_name;
 };
 
@@ -55,24 +54,38 @@ static int set_target(struct cpufreq_policy *policy, unsigned int index)
  * An earlier version of opp-v1 bindings used to name the regulator
  * "cpu0-supply", we still need to handle that for backwards compatibility.
  */
-static const char *find_supply_name(struct device *dev, struct device_node *np)
+static const char *find_supply_name(struct device *dev)
 {
+	struct device_node *np;
 	struct property *pp;
 	int cpu = dev->id;
+	const char *name = NULL;
+
+	np = of_node_get(dev->of_node);
+
+	/* This must be valid for sure */
+	if (WARN_ON(!np))
+		return NULL;
 
 	/* Try "cpu0" for older DTs */
 	if (!cpu) {
 		pp = of_find_property(np, "cpu0-supply", NULL);
-		if (pp)
-			return "cpu0";
+		if (pp) {
+			name = "cpu0";
+			goto node_put;
+		}
 	}
 
 	pp = of_find_property(np, "cpu-supply", NULL);
-	if (pp)
-		return "cpu";
+	if (pp) {
+		name = "cpu";
+		goto node_put;
+	}
 
 	dev_dbg(dev, "no regulator for cpu%d\n", cpu);
-	return NULL;
+node_put:
+	of_node_put(np);
+	return name;
 }
 
 static int allocate_resources(int cpu, struct device **cdev,
@@ -147,7 +160,6 @@ static int allocate_resources(int cpu, struct device **cdev,
 static int cpufreq_init(struct cpufreq_policy *policy)
 {
 	struct cpufreq_frequency_table *freq_table;
-	struct device_node *np;
 	struct private_data *priv;
 	struct device *cpu_dev;
 	struct regulator *cpu_reg;
@@ -164,13 +176,6 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 		return ret;
 	}
 
-	np = of_node_get(cpu_dev->of_node);
-	if (!np) {
-		dev_err(cpu_dev, "failed to find cpu%d node\n", policy->cpu);
-		ret = -ENOENT;
-		goto out_put_reg_clk;
-	}
-
 	/* Get OPP-sharing information from "operating-points-v2" bindings */
 	ret = dev_pm_opp_of_get_sharing_cpus(cpu_dev, policy->cpus);
 	if (ret) {
@@ -181,20 +186,20 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 		if (ret == -ENOENT)
 			opp_v1 = true;
 		else
-			goto out_node_put;
+			goto out_put_reg_clk;
 	}
 
 	/*
 	 * OPP layer will be taking care of regulators now, but it needs to know
 	 * the name of the regulator first.
 	 */
-	name = find_supply_name(cpu_dev, np);
+	name = find_supply_name(cpu_dev);
 	if (name) {
 		ret = dev_pm_opp_set_regulator(cpu_dev, name);
 		if (ret) {
 			dev_err(cpu_dev, "Failed to set regulator for cpu%d: %d\n",
 				policy->cpu, ret);
-			goto out_node_put;
+			goto out_put_reg_clk;
 		}
 	}
 
@@ -244,7 +249,6 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 	}
 
 	priv->reg_name = name;
-	of_property_read_u32(np, "voltage-tolerance", &priv->voltage_tolerance);
 
 	ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table);
 	if (ret) {
@@ -286,8 +290,6 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 
 	policy->cpuinfo.transition_latency = transition_latency;
 
-	of_node_put(np);
-
 	return 0;
 
 out_free_cpufreq_table:
@@ -298,8 +300,6 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 	dev_pm_opp_of_cpumask_remove_table(policy->cpus);
 	if (name)
 		dev_pm_opp_put_regulator(cpu_dev);
-out_node_put:
-	of_node_put(np);
 out_put_reg_clk:
 	clk_put(cpu_clk);
 	if (!IS_ERR(cpu_reg))

From 454f12fd0b3fae75d101765614e749e3f70a5078 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 9 Feb 2016 10:30:48 +0530
Subject: [PATCH 099/797] cpufreq: dt: No need to allocate resources anymore

OPP layer manages it now and cpufreq-dt driver doesn't need it. But, we
still need to check for availability of resources for deferred probing.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit dd02a3d920083b6cb0ee4f0eaf2c599b740bf5fe)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/cpufreq/cpufreq-dt.c | 116 ++++++++++++++---------------------
 1 file changed, 47 insertions(+), 69 deletions(-)

diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index bbafd7b63d1a..f951f911786e 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -31,7 +31,6 @@
 
 struct private_data {
 	struct device *cpu_dev;
-	struct regulator *cpu_reg;
 	struct thermal_cooling_device *cdev;
 	const char *reg_name;
 };
@@ -88,73 +87,59 @@ static const char *find_supply_name(struct device *dev)
 	return name;
 }
 
-static int allocate_resources(int cpu, struct device **cdev,
-			      struct regulator **creg, struct clk **cclk)
+static int resources_available(void)
 {
 	struct device *cpu_dev;
 	struct regulator *cpu_reg;
 	struct clk *cpu_clk;
 	int ret = 0;
-	char *reg_cpu0 = "cpu0", *reg_cpu = "cpu", *reg;
+	const char *name;
 
-	cpu_dev = get_cpu_device(cpu);
+	cpu_dev = get_cpu_device(0);
 	if (!cpu_dev) {
-		pr_err("failed to get cpu%d device\n", cpu);
+		pr_err("failed to get cpu0 device\n");
 		return -ENODEV;
 	}
 
-	/* Try "cpu0" for older DTs */
-	if (!cpu)
-		reg = reg_cpu0;
-	else
-		reg = reg_cpu;
+	cpu_clk = clk_get(cpu_dev, NULL);
+	ret = PTR_ERR_OR_ZERO(cpu_clk);
+	if (ret) {
+		/*
+		 * If cpu's clk node is present, but clock is not yet
+		 * registered, we should try defering probe.
+		 */
+		if (ret == -EPROBE_DEFER)
+			dev_dbg(cpu_dev, "clock not ready, retry\n");
+		else
+			dev_err(cpu_dev, "failed to get clock: %d\n", ret);
 
-try_again:
-	cpu_reg = regulator_get_optional(cpu_dev, reg);
+		return ret;
+	}
+
+	clk_put(cpu_clk);
+
+	name = find_supply_name(cpu_dev);
+	/* Platform doesn't require regulator */
+	if (!name)
+		return 0;
+
+	cpu_reg = regulator_get_optional(cpu_dev, name);
 	ret = PTR_ERR_OR_ZERO(cpu_reg);
 	if (ret) {
 		/*
 		 * If cpu's regulator supply node is present, but regulator is
 		 * not yet registered, we should try defering probe.
 		 */
-		if (ret == -EPROBE_DEFER) {
-			dev_dbg(cpu_dev, "cpu%d regulator not ready, retry\n",
-				cpu);
-			return ret;
-		}
-
-		/* Try with "cpu-supply" */
-		if (reg == reg_cpu0) {
-			reg = reg_cpu;
-			goto try_again;
-		}
-
-		dev_dbg(cpu_dev, "no regulator for cpu%d: %d\n", cpu, ret);
-	}
-
-	cpu_clk = clk_get(cpu_dev, NULL);
-	ret = PTR_ERR_OR_ZERO(cpu_clk);
-	if (ret) {
-		/* put regulator */
-		if (!IS_ERR(cpu_reg))
-			regulator_put(cpu_reg);
-
-		/*
-		 * If cpu's clk node is present, but clock is not yet
-		 * registered, we should try defering probe.
-		 */
 		if (ret == -EPROBE_DEFER)
-			dev_dbg(cpu_dev, "cpu%d clock not ready, retry\n", cpu);
+			dev_dbg(cpu_dev, "cpu0 regulator not ready, retry\n");
 		else
-			dev_err(cpu_dev, "failed to get cpu%d clock: %d\n", cpu,
-				ret);
-	} else {
-		*cdev = cpu_dev;
-		*creg = cpu_reg;
-		*cclk = cpu_clk;
+			dev_dbg(cpu_dev, "no regulator for cpu0: %d\n", ret);
+
+		return ret;
 	}
 
-	return ret;
+	regulator_put(cpu_reg);
+	return 0;
 }
 
 static int cpufreq_init(struct cpufreq_policy *policy)
@@ -162,7 +147,6 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 	struct cpufreq_frequency_table *freq_table;
 	struct private_data *priv;
 	struct device *cpu_dev;
-	struct regulator *cpu_reg;
 	struct clk *cpu_clk;
 	struct dev_pm_opp *suspend_opp;
 	unsigned int transition_latency;
@@ -170,9 +154,16 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 	const char *name;
 	int ret;
 
-	ret = allocate_resources(policy->cpu, &cpu_dev, &cpu_reg, &cpu_clk);
-	if (ret) {
-		pr_err("%s: Failed to allocate resources: %d\n", __func__, ret);
+	cpu_dev = get_cpu_device(policy->cpu);
+	if (!cpu_dev) {
+		pr_err("failed to get cpu%d device\n", policy->cpu);
+		return -ENODEV;
+	}
+
+	cpu_clk = clk_get(cpu_dev, NULL);
+	if (IS_ERR(cpu_clk)) {
+		ret = PTR_ERR(cpu_clk);
+		dev_err(cpu_dev, "%s: failed to get clk: %d\n", __func__, ret);
 		return ret;
 	}
 
@@ -186,7 +177,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 		if (ret == -ENOENT)
 			opp_v1 = true;
 		else
-			goto out_put_reg_clk;
+			goto out_put_clk;
 	}
 
 	/*
@@ -199,7 +190,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 		if (ret) {
 			dev_err(cpu_dev, "Failed to set regulator for cpu%d: %d\n",
 				policy->cpu, ret);
-			goto out_put_reg_clk;
+			goto out_put_clk;
 		}
 	}
 
@@ -257,9 +248,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 	}
 
 	priv->cpu_dev = cpu_dev;
-	priv->cpu_reg = cpu_reg;
 	policy->driver_data = priv;
-
 	policy->clk = cpu_clk;
 
 	rcu_read_lock();
@@ -300,10 +289,8 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 	dev_pm_opp_of_cpumask_remove_table(policy->cpus);
 	if (name)
 		dev_pm_opp_put_regulator(cpu_dev);
-out_put_reg_clk:
+out_put_clk:
 	clk_put(cpu_clk);
-	if (!IS_ERR(cpu_reg))
-		regulator_put(cpu_reg);
 
 	return ret;
 }
@@ -319,8 +306,6 @@ static int cpufreq_exit(struct cpufreq_policy *policy)
 		dev_pm_opp_put_regulator(priv->cpu_dev);
 
 	clk_put(policy->clk);
-	if (!IS_ERR(priv->cpu_reg))
-		regulator_put(priv->cpu_reg);
 	kfree(priv);
 
 	return 0;
@@ -373,9 +358,6 @@ static struct cpufreq_driver dt_cpufreq_driver = {
 
 static int dt_cpufreq_probe(struct platform_device *pdev)
 {
-	struct device *cpu_dev;
-	struct regulator *cpu_reg;
-	struct clk *cpu_clk;
 	int ret;
 
 	/*
@@ -385,19 +367,15 @@ static int dt_cpufreq_probe(struct platform_device *pdev)
 	 *
 	 * FIXME: Is checking this only for CPU0 sufficient ?
 	 */
-	ret = allocate_resources(0, &cpu_dev, &cpu_reg, &cpu_clk);
+	ret = resources_available();
 	if (ret)
 		return ret;
 
-	clk_put(cpu_clk);
-	if (!IS_ERR(cpu_reg))
-		regulator_put(cpu_reg);
-
 	dt_cpufreq_driver.driver_data = dev_get_platdata(&pdev->dev);
 
 	ret = cpufreq_register_driver(&dt_cpufreq_driver);
 	if (ret)
-		dev_err(cpu_dev, "failed register driver: %d\n", ret);
+		dev_err(&pdev->dev, "failed register driver: %d\n", ret);
 
 	return ret;
 }

From 84cf91ad330f5c9ba51f761a37e58b9aa99bf470 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Mon, 29 Feb 2016 15:59:18 +0100
Subject: [PATCH 100/797] s390/cpumf: Fix lpp detection

commit 7a76aa95f6f6682db5629449d763251d1c9f8c4e upstream.

we have to check bit 40 of the facility list before issuing LPP
and not bit 48. Otherwise a guest running on a system with
"The decimal-floating-point zoned-conversion facility" and without
the "The set-program-parameters facility" might crash on an lpp
instruction.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Fixes: e22cf8ca6f75 ("s390/cpumf: rework program parameter setting to detect guest samples")
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/kernel/head64.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 58b719fa8067..1ad2407c7f75 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -16,7 +16,7 @@
 
 __HEAD
 ENTRY(startup_continue)
-	tm	__LC_STFL_FAC_LIST+6,0x80	# LPP available ?
+	tm	__LC_STFL_FAC_LIST+5,0x80	# LPP available ?
 	jz	0f
 	xc	__LC_LPP+1(7,0),__LC_LPP+1	# clear lpp and current_pid
 	mvi	__LC_LPP,0x80			#   and set LPP_MAGIC

From 4c8fe4f52755d4690a745f4e56b543c51add86fe Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 20 Nov 2015 15:24:39 +0100
Subject: [PATCH 101/797] regulator: core: avoid unused variable warning

commit fa731ac7ea04a7d3a5c6d2f568132478c02a83b3 upstream.

The second argument of the mutex_lock_nested() helper is only
evaluated if CONFIG_DEBUG_LOCK_ALLOC is set. Otherwise we
get this build warning for the new regulator_lock_supply
function:

drivers/regulator/core.c: In function 'regulator_lock_supply':
drivers/regulator/core.c:142:6: warning: unused variable 'i' [-Wunused-variable]

To avoid the warning, this restructures the code to make it
both simpler and to move the 'i++' outside of the mutex_lock_nested
call, where it is now always used and the variable is not
flagged as unused.

We had some discussion about changing mutex_lock_nested to an
inline function, which would make the code do the right thing here,
but in the end decided against it, in order to guarantee that
mutex_lock_nested() does not introduced overhead without
CONFIG_DEBUG_LOCK_ALLOC.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 9f01cd4a915 ("regulator: core: introduce function to lock regulators and its supplies")
Link: http://permalink.gmane.org/gmane.linux.kernel/2068900
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/regulator/core.c | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 73b7683355cd..c70017d5f74b 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -138,18 +138,10 @@ static bool have_full_constraints(void)
  */
 static void regulator_lock_supply(struct regulator_dev *rdev)
 {
-	struct regulator *supply;
-	int i = 0;
+	int i;
 
-	while (1) {
-		mutex_lock_nested(&rdev->mutex, i++);
-		supply = rdev->supply;
-
-		if (!rdev->supply)
-			return;
-
-		rdev = supply->rdev;
-	}
+	for (i = 0; rdev->supply; rdev = rdev->supply->rdev, i++)
+		mutex_lock_nested(&rdev->mutex, i);
 }
 
 /**

From b1999fa6e8145305a6c8bda30ea20783717708e6 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 2 Dec 2015 16:54:50 +0100
Subject: [PATCH 102/797] regulator: core: Fix nested locking of supplies

commit 70a7fb80e85ae7f78f8e90cec3fbd862ea6a4d4b upstream.

Commit fa731ac7ea04 ("regulator: core: avoid unused variable warning")
introduced a subtle change in how supplies are locked. Where previously
code was always locking the regulator of the current iteration, the new
implementation only locks the regulator if it has a supply. For any
given power tree that means that the root will never get locked.

On the other hand the regulator_unlock_supply() will still release all
the locks, which in turn causes the lock debugging code to warn about a
mutex being unlocked which wasn't locked.

Cc: Mark Brown <broonie@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Fixes: Fixes: fa731ac7ea04 ("regulator: core: avoid unused variable warning")
Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/regulator/core.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index c70017d5f74b..7b94b8ee087c 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -132,6 +132,14 @@ static bool have_full_constraints(void)
 	return has_full_constraints || of_have_populated_dt();
 }
 
+static inline struct regulator_dev *rdev_get_supply(struct regulator_dev *rdev)
+{
+	if (rdev && rdev->supply)
+		return rdev->supply->rdev;
+
+	return NULL;
+}
+
 /**
  * regulator_lock_supply - lock a regulator and its supplies
  * @rdev:         regulator source
@@ -140,7 +148,7 @@ static void regulator_lock_supply(struct regulator_dev *rdev)
 {
 	int i;
 
-	for (i = 0; rdev->supply; rdev = rdev->supply->rdev, i++)
+	for (i = 0; rdev->supply; rdev = rdev_get_supply(rdev), i++)
 		mutex_lock_nested(&rdev->mutex, i);
 }
 

From e08f9a7c0e0242c2fae664d252e25c9bf93db522 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 18 Nov 2015 15:25:23 +0100
Subject: [PATCH 103/797] ASoC: samsung: pass DMA channels as pointers

commit b9a1a743818ea3265abf98f9431623afa8c50c86 upstream.

ARM64 allmodconfig produces a bunch of warnings when building the
samsung ASoC code:

sound/soc/samsung/dmaengine.c: In function 'samsung_asoc_init_dma_data':
sound/soc/samsung/dmaengine.c:53:32: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
   playback_data->filter_data = (void *)playback->channel;
sound/soc/samsung/dmaengine.c:60:31: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
   capture_data->filter_data = (void *)capture->channel;

We could easily shut up the warning by adding an intermediate cast,
but there is a bigger underlying problem: The use of IORESOURCE_DMA
to pass data from platform code to device drivers is dubious to start
with, as what we really want is a pointer that can be passed into
a filter function.

Note that on s3c64xx, the pl08x DMA data is already a pointer, but
gets cast to resource_size_t so we can pass it as a resource, and it
then gets converted back to a pointer. In contrast, the data we pass
for s3c24xx is an index into a device specific table, and we artificially
convert that into a pointer for the filter function.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-s3c64xx/dev-audio.c        | 41 +++++++++++--------
 arch/arm/mach-s3c64xx/include/mach/dma.h | 52 ++++++++++++------------
 arch/arm/plat-samsung/devs.c             | 11 +++--
 include/linux/platform_data/asoc-s3c.h   |  4 ++
 sound/soc/samsung/ac97.c                 | 26 ++----------
 sound/soc/samsung/dma.h                  |  2 +-
 sound/soc/samsung/dmaengine.c            |  4 +-
 sound/soc/samsung/i2s.c                  | 26 +++---------
 sound/soc/samsung/pcm.c                  | 20 +++------
 sound/soc/samsung/s3c2412-i2s.c          |  4 +-
 sound/soc/samsung/s3c24xx-i2s.c          |  4 +-
 sound/soc/samsung/spdif.c                | 10 +----
 12 files changed, 84 insertions(+), 120 deletions(-)

diff --git a/arch/arm/mach-s3c64xx/dev-audio.c b/arch/arm/mach-s3c64xx/dev-audio.c
index ff780a8d8366..9a42736ef4ac 100644
--- a/arch/arm/mach-s3c64xx/dev-audio.c
+++ b/arch/arm/mach-s3c64xx/dev-audio.c
@@ -54,12 +54,12 @@ static int s3c64xx_i2s_cfg_gpio(struct platform_device *pdev)
 
 static struct resource s3c64xx_iis0_resource[] = {
 	[0] = DEFINE_RES_MEM(S3C64XX_PA_IIS0, SZ_256),
-	[1] = DEFINE_RES_DMA(DMACH_I2S0_OUT),
-	[2] = DEFINE_RES_DMA(DMACH_I2S0_IN),
 };
 
-static struct s3c_audio_pdata i2sv3_pdata = {
+static struct s3c_audio_pdata i2s0_pdata = {
 	.cfg_gpio = s3c64xx_i2s_cfg_gpio,
+	.dma_playback = DMACH_I2S0_OUT,
+	.dma_capture = DMACH_I2S0_IN,
 };
 
 struct platform_device s3c64xx_device_iis0 = {
@@ -68,15 +68,19 @@ struct platform_device s3c64xx_device_iis0 = {
 	.num_resources	  = ARRAY_SIZE(s3c64xx_iis0_resource),
 	.resource	  = s3c64xx_iis0_resource,
 	.dev = {
-		.platform_data = &i2sv3_pdata,
+		.platform_data = &i2s0_pdata,
 	},
 };
 EXPORT_SYMBOL(s3c64xx_device_iis0);
 
 static struct resource s3c64xx_iis1_resource[] = {
 	[0] = DEFINE_RES_MEM(S3C64XX_PA_IIS1, SZ_256),
-	[1] = DEFINE_RES_DMA(DMACH_I2S1_OUT),
-	[2] = DEFINE_RES_DMA(DMACH_I2S1_IN),
+};
+
+static struct s3c_audio_pdata i2s1_pdata = {
+	.cfg_gpio = s3c64xx_i2s_cfg_gpio,
+	.dma_playback = DMACH_I2S1_OUT,
+	.dma_capture = DMACH_I2S1_IN,
 };
 
 struct platform_device s3c64xx_device_iis1 = {
@@ -85,19 +89,19 @@ struct platform_device s3c64xx_device_iis1 = {
 	.num_resources	  = ARRAY_SIZE(s3c64xx_iis1_resource),
 	.resource	  = s3c64xx_iis1_resource,
 	.dev = {
-		.platform_data = &i2sv3_pdata,
+		.platform_data = &i2s1_pdata,
 	},
 };
 EXPORT_SYMBOL(s3c64xx_device_iis1);
 
 static struct resource s3c64xx_iisv4_resource[] = {
 	[0] = DEFINE_RES_MEM(S3C64XX_PA_IISV4, SZ_256),
-	[1] = DEFINE_RES_DMA(DMACH_HSI_I2SV40_TX),
-	[2] = DEFINE_RES_DMA(DMACH_HSI_I2SV40_RX),
 };
 
 static struct s3c_audio_pdata i2sv4_pdata = {
 	.cfg_gpio = s3c64xx_i2s_cfg_gpio,
+	.dma_playback = DMACH_HSI_I2SV40_TX,
+	.dma_capture = DMACH_HSI_I2SV40_RX,
 	.type = {
 		.i2s = {
 			.quirks = QUIRK_PRI_6CHAN,
@@ -142,12 +146,12 @@ static int s3c64xx_pcm_cfg_gpio(struct platform_device *pdev)
 
 static struct resource s3c64xx_pcm0_resource[] = {
 	[0] = DEFINE_RES_MEM(S3C64XX_PA_PCM0, SZ_256),
-	[1] = DEFINE_RES_DMA(DMACH_PCM0_TX),
-	[2] = DEFINE_RES_DMA(DMACH_PCM0_RX),
 };
 
 static struct s3c_audio_pdata s3c_pcm0_pdata = {
 	.cfg_gpio = s3c64xx_pcm_cfg_gpio,
+	.dma_capture = DMACH_PCM0_RX,
+	.dma_playback = DMACH_PCM0_TX,
 };
 
 struct platform_device s3c64xx_device_pcm0 = {
@@ -163,12 +167,12 @@ EXPORT_SYMBOL(s3c64xx_device_pcm0);
 
 static struct resource s3c64xx_pcm1_resource[] = {
 	[0] = DEFINE_RES_MEM(S3C64XX_PA_PCM1, SZ_256),
-	[1] = DEFINE_RES_DMA(DMACH_PCM1_TX),
-	[2] = DEFINE_RES_DMA(DMACH_PCM1_RX),
 };
 
 static struct s3c_audio_pdata s3c_pcm1_pdata = {
 	.cfg_gpio = s3c64xx_pcm_cfg_gpio,
+	.dma_playback = DMACH_PCM1_TX,
+	.dma_capture = DMACH_PCM1_RX,
 };
 
 struct platform_device s3c64xx_device_pcm1 = {
@@ -196,13 +200,14 @@ static int s3c64xx_ac97_cfg_gpe(struct platform_device *pdev)
 
 static struct resource s3c64xx_ac97_resource[] = {
 	[0] = DEFINE_RES_MEM(S3C64XX_PA_AC97, SZ_256),
-	[1] = DEFINE_RES_DMA(DMACH_AC97_PCMOUT),
-	[2] = DEFINE_RES_DMA(DMACH_AC97_PCMIN),
-	[3] = DEFINE_RES_DMA(DMACH_AC97_MICIN),
-	[4] = DEFINE_RES_IRQ(IRQ_AC97),
+	[1] = DEFINE_RES_IRQ(IRQ_AC97),
 };
 
-static struct s3c_audio_pdata s3c_ac97_pdata;
+static struct s3c_audio_pdata s3c_ac97_pdata = {
+	.dma_playback = DMACH_AC97_PCMOUT,
+	.dma_capture = DMACH_AC97_PCMIN,
+	.dma_capture_mic = DMACH_AC97_MICIN,
+};
 
 static u64 s3c64xx_ac97_dmamask = DMA_BIT_MASK(32);
 
diff --git a/arch/arm/mach-s3c64xx/include/mach/dma.h b/arch/arm/mach-s3c64xx/include/mach/dma.h
index 096e14073bd9..9c739eafe95c 100644
--- a/arch/arm/mach-s3c64xx/include/mach/dma.h
+++ b/arch/arm/mach-s3c64xx/include/mach/dma.h
@@ -14,38 +14,38 @@
 #define S3C64XX_DMA_CHAN(name)		((unsigned long)(name))
 
 /* DMA0/SDMA0 */
-#define DMACH_UART0		S3C64XX_DMA_CHAN("uart0_tx")
-#define DMACH_UART0_SRC2	S3C64XX_DMA_CHAN("uart0_rx")
-#define DMACH_UART1		S3C64XX_DMA_CHAN("uart1_tx")
-#define DMACH_UART1_SRC2	S3C64XX_DMA_CHAN("uart1_rx")
-#define DMACH_UART2		S3C64XX_DMA_CHAN("uart2_tx")
-#define DMACH_UART2_SRC2	S3C64XX_DMA_CHAN("uart2_rx")
-#define DMACH_UART3		S3C64XX_DMA_CHAN("uart3_tx")
-#define DMACH_UART3_SRC2	S3C64XX_DMA_CHAN("uart3_rx")
-#define DMACH_PCM0_TX		S3C64XX_DMA_CHAN("pcm0_tx")
-#define DMACH_PCM0_RX		S3C64XX_DMA_CHAN("pcm0_rx")
-#define DMACH_I2S0_OUT		S3C64XX_DMA_CHAN("i2s0_tx")
-#define DMACH_I2S0_IN		S3C64XX_DMA_CHAN("i2s0_rx")
+#define DMACH_UART0		"uart0_tx"
+#define DMACH_UART0_SRC2	"uart0_rx"
+#define DMACH_UART1		"uart1_tx"
+#define DMACH_UART1_SRC2	"uart1_rx"
+#define DMACH_UART2		"uart2_tx"
+#define DMACH_UART2_SRC2	"uart2_rx"
+#define DMACH_UART3		"uart3_tx"
+#define DMACH_UART3_SRC2	"uart3_rx"
+#define DMACH_PCM0_TX		"pcm0_tx"
+#define DMACH_PCM0_RX		"pcm0_rx"
+#define DMACH_I2S0_OUT		"i2s0_tx"
+#define DMACH_I2S0_IN		"i2s0_rx"
 #define DMACH_SPI0_TX		S3C64XX_DMA_CHAN("spi0_tx")
 #define DMACH_SPI0_RX		S3C64XX_DMA_CHAN("spi0_rx")
-#define DMACH_HSI_I2SV40_TX	S3C64XX_DMA_CHAN("i2s2_tx")
-#define DMACH_HSI_I2SV40_RX	S3C64XX_DMA_CHAN("i2s2_rx")
+#define DMACH_HSI_I2SV40_TX	"i2s2_tx"
+#define DMACH_HSI_I2SV40_RX	"i2s2_rx"
 
 /* DMA1/SDMA1 */
-#define DMACH_PCM1_TX		S3C64XX_DMA_CHAN("pcm1_tx")
-#define DMACH_PCM1_RX		S3C64XX_DMA_CHAN("pcm1_rx")
-#define DMACH_I2S1_OUT		S3C64XX_DMA_CHAN("i2s1_tx")
-#define DMACH_I2S1_IN		S3C64XX_DMA_CHAN("i2s1_rx")
+#define DMACH_PCM1_TX		"pcm1_tx"
+#define DMACH_PCM1_RX		"pcm1_rx"
+#define DMACH_I2S1_OUT		"i2s1_tx"
+#define DMACH_I2S1_IN		"i2s1_rx"
 #define DMACH_SPI1_TX		S3C64XX_DMA_CHAN("spi1_tx")
 #define DMACH_SPI1_RX		S3C64XX_DMA_CHAN("spi1_rx")
-#define DMACH_AC97_PCMOUT	S3C64XX_DMA_CHAN("ac97_out")
-#define DMACH_AC97_PCMIN	S3C64XX_DMA_CHAN("ac97_in")
-#define DMACH_AC97_MICIN	S3C64XX_DMA_CHAN("ac97_mic")
-#define DMACH_PWM		S3C64XX_DMA_CHAN("pwm")
-#define DMACH_IRDA		S3C64XX_DMA_CHAN("irda")
-#define DMACH_EXTERNAL		S3C64XX_DMA_CHAN("external")
-#define DMACH_SECURITY_RX	S3C64XX_DMA_CHAN("sec_rx")
-#define DMACH_SECURITY_TX	S3C64XX_DMA_CHAN("sec_tx")
+#define DMACH_AC97_PCMOUT	"ac97_out"
+#define DMACH_AC97_PCMIN	"ac97_in"
+#define DMACH_AC97_MICIN	"ac97_mic"
+#define DMACH_PWM		"pwm"
+#define DMACH_IRDA		"irda"
+#define DMACH_EXTERNAL		"external"
+#define DMACH_SECURITY_RX	"sec_rx"
+#define DMACH_SECURITY_TX	"sec_tx"
 
 enum dma_ch {
 	DMACH_MAX = 32
diff --git a/arch/arm/plat-samsung/devs.c b/arch/arm/plat-samsung/devs.c
index 82074625de5c..e212f9d804bd 100644
--- a/arch/arm/plat-samsung/devs.c
+++ b/arch/arm/plat-samsung/devs.c
@@ -65,6 +65,7 @@
 #include <linux/platform_data/usb-ohci-s3c2410.h>
 #include <plat/usb-phy.h>
 #include <plat/regs-spi.h>
+#include <linux/platform_data/asoc-s3c.h>
 #include <linux/platform_data/spi-s3c64xx.h>
 
 static u64 samsung_device_dma_mask = DMA_BIT_MASK(32);
@@ -74,9 +75,12 @@ static u64 samsung_device_dma_mask = DMA_BIT_MASK(32);
 static struct resource s3c_ac97_resource[] = {
 	[0] = DEFINE_RES_MEM(S3C2440_PA_AC97, S3C2440_SZ_AC97),
 	[1] = DEFINE_RES_IRQ(IRQ_S3C244X_AC97),
-	[2] = DEFINE_RES_DMA_NAMED(DMACH_PCM_OUT, "PCM out"),
-	[3] = DEFINE_RES_DMA_NAMED(DMACH_PCM_IN, "PCM in"),
-	[4] = DEFINE_RES_DMA_NAMED(DMACH_MIC_IN, "Mic in"),
+};
+
+static struct s3c_audio_pdata s3c_ac97_pdata = {
+	.dma_playback = (void *)DMACH_PCM_OUT,
+	.dma_capture = (void *)DMACH_PCM_IN,
+	.dma_capture_mic = (void *)DMACH_MIC_IN,
 };
 
 struct platform_device s3c_device_ac97 = {
@@ -87,6 +91,7 @@ struct platform_device s3c_device_ac97 = {
 	.dev		= {
 		.dma_mask		= &samsung_device_dma_mask,
 		.coherent_dma_mask	= DMA_BIT_MASK(32),
+		.platform_data		= &s3c_ac97_pdata,
 	}
 };
 #endif /* CONFIG_CPU_S3C2440 */
diff --git a/include/linux/platform_data/asoc-s3c.h b/include/linux/platform_data/asoc-s3c.h
index 5e0bc779e6c5..33f88b4479e4 100644
--- a/include/linux/platform_data/asoc-s3c.h
+++ b/include/linux/platform_data/asoc-s3c.h
@@ -39,6 +39,10 @@ struct samsung_i2s {
  */
 struct s3c_audio_pdata {
 	int (*cfg_gpio)(struct platform_device *);
+	void *dma_playback;
+	void *dma_capture;
+	void *dma_play_sec;
+	void *dma_capture_mic;
 	union {
 		struct samsung_i2s i2s;
 	} type;
diff --git a/sound/soc/samsung/ac97.c b/sound/soc/samsung/ac97.c
index e4145509d63c..9c5219392460 100644
--- a/sound/soc/samsung/ac97.c
+++ b/sound/soc/samsung/ac97.c
@@ -324,7 +324,7 @@ static const struct snd_soc_component_driver s3c_ac97_component = {
 
 static int s3c_ac97_probe(struct platform_device *pdev)
 {
-	struct resource *mem_res, *dmatx_res, *dmarx_res, *dmamic_res, *irq_res;
+	struct resource *mem_res, *irq_res;
 	struct s3c_audio_pdata *ac97_pdata;
 	int ret;
 
@@ -335,24 +335,6 @@ static int s3c_ac97_probe(struct platform_device *pdev)
 	}
 
 	/* Check for availability of necessary resource */
-	dmatx_res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
-	if (!dmatx_res) {
-		dev_err(&pdev->dev, "Unable to get AC97-TX dma resource\n");
-		return -ENXIO;
-	}
-
-	dmarx_res = platform_get_resource(pdev, IORESOURCE_DMA, 1);
-	if (!dmarx_res) {
-		dev_err(&pdev->dev, "Unable to get AC97-RX dma resource\n");
-		return -ENXIO;
-	}
-
-	dmamic_res = platform_get_resource(pdev, IORESOURCE_DMA, 2);
-	if (!dmamic_res) {
-		dev_err(&pdev->dev, "Unable to get AC97-MIC dma resource\n");
-		return -ENXIO;
-	}
-
 	irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
 	if (!irq_res) {
 		dev_err(&pdev->dev, "AC97 IRQ not provided!\n");
@@ -364,11 +346,11 @@ static int s3c_ac97_probe(struct platform_device *pdev)
 	if (IS_ERR(s3c_ac97.regs))
 		return PTR_ERR(s3c_ac97.regs);
 
-	s3c_ac97_pcm_out.channel = dmatx_res->start;
+	s3c_ac97_pcm_out.slave = ac97_pdata->dma_playback;
 	s3c_ac97_pcm_out.dma_addr = mem_res->start + S3C_AC97_PCM_DATA;
-	s3c_ac97_pcm_in.channel = dmarx_res->start;
+	s3c_ac97_pcm_in.slave = ac97_pdata->dma_capture;
 	s3c_ac97_pcm_in.dma_addr = mem_res->start + S3C_AC97_PCM_DATA;
-	s3c_ac97_mic_in.channel = dmamic_res->start;
+	s3c_ac97_mic_in.slave = ac97_pdata->dma_capture_mic;
 	s3c_ac97_mic_in.dma_addr = mem_res->start + S3C_AC97_MIC_DATA;
 
 	init_completion(&s3c_ac97.done);
diff --git a/sound/soc/samsung/dma.h b/sound/soc/samsung/dma.h
index 0e85dcfec023..085ef30f5ca2 100644
--- a/sound/soc/samsung/dma.h
+++ b/sound/soc/samsung/dma.h
@@ -15,7 +15,7 @@
 #include <sound/dmaengine_pcm.h>
 
 struct s3c_dma_params {
-	int channel;				/* Channel ID */
+	void *slave;				/* Channel ID */
 	dma_addr_t dma_addr;
 	int dma_size;			/* Size of the DMA transfer */
 	char *ch_name;
diff --git a/sound/soc/samsung/dmaengine.c b/sound/soc/samsung/dmaengine.c
index 506f5bf6d082..727008d57d14 100644
--- a/sound/soc/samsung/dmaengine.c
+++ b/sound/soc/samsung/dmaengine.c
@@ -50,14 +50,14 @@ void samsung_asoc_init_dma_data(struct snd_soc_dai *dai,
 
 	if (playback) {
 		playback_data = &playback->dma_data;
-		playback_data->filter_data = (void *)playback->channel;
+		playback_data->filter_data = playback->slave;
 		playback_data->chan_name = playback->ch_name;
 		playback_data->addr = playback->dma_addr;
 		playback_data->addr_width = playback->dma_size;
 	}
 	if (capture) {
 		capture_data = &capture->dma_data;
-		capture_data->filter_data = (void *)capture->channel;
+		capture_data->filter_data = capture->slave;
 		capture_data->chan_name = capture->ch_name;
 		capture_data->addr = capture->dma_addr;
 		capture_data->addr_width = capture->dma_size;
diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c
index 7dbf899b2af2..e163b0148c4b 100644
--- a/sound/soc/samsung/i2s.c
+++ b/sound/soc/samsung/i2s.c
@@ -1260,27 +1260,14 @@ static int samsung_i2s_probe(struct platform_device *pdev)
 	pri_dai->lock = &pri_dai->spinlock;
 
 	if (!np) {
-		res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
-		if (!res) {
-			dev_err(&pdev->dev,
-				"Unable to get I2S-TX dma resource\n");
-			return -ENXIO;
-		}
-		pri_dai->dma_playback.channel = res->start;
-
-		res = platform_get_resource(pdev, IORESOURCE_DMA, 1);
-		if (!res) {
-			dev_err(&pdev->dev,
-				"Unable to get I2S-RX dma resource\n");
-			return -ENXIO;
-		}
-		pri_dai->dma_capture.channel = res->start;
-
 		if (i2s_pdata == NULL) {
 			dev_err(&pdev->dev, "Can't work without s3c_audio_pdata\n");
 			return -EINVAL;
 		}
 
+		pri_dai->dma_playback.slave = i2s_pdata->dma_playback;
+		pri_dai->dma_capture.slave = i2s_pdata->dma_capture;
+
 		if (&i2s_pdata->type)
 			i2s_cfg = &i2s_pdata->type.i2s;
 
@@ -1341,11 +1328,8 @@ static int samsung_i2s_probe(struct platform_device *pdev)
 		sec_dai->dma_playback.dma_addr = regs_base + I2STXDS;
 		sec_dai->dma_playback.ch_name = "tx-sec";
 
-		if (!np) {
-			res = platform_get_resource(pdev, IORESOURCE_DMA, 2);
-			if (res)
-				sec_dai->dma_playback.channel = res->start;
-		}
+		if (!np)
+			sec_dai->dma_playback.slave = i2s_pdata->dma_play_sec;
 
 		sec_dai->dma_playback.dma_size = 4;
 		sec_dai->addr = pri_dai->addr;
diff --git a/sound/soc/samsung/pcm.c b/sound/soc/samsung/pcm.c
index b320a9d3fbf8..c77f324e0bb8 100644
--- a/sound/soc/samsung/pcm.c
+++ b/sound/soc/samsung/pcm.c
@@ -486,7 +486,7 @@ static const struct snd_soc_component_driver s3c_pcm_component = {
 static int s3c_pcm_dev_probe(struct platform_device *pdev)
 {
 	struct s3c_pcm_info *pcm;
-	struct resource *mem_res, *dmatx_res, *dmarx_res;
+	struct resource *mem_res;
 	struct s3c_audio_pdata *pcm_pdata;
 	int ret;
 
@@ -499,18 +499,6 @@ static int s3c_pcm_dev_probe(struct platform_device *pdev)
 	pcm_pdata = pdev->dev.platform_data;
 
 	/* Check for availability of necessary resource */
-	dmatx_res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
-	if (!dmatx_res) {
-		dev_err(&pdev->dev, "Unable to get PCM-TX dma resource\n");
-		return -ENXIO;
-	}
-
-	dmarx_res = platform_get_resource(pdev, IORESOURCE_DMA, 1);
-	if (!dmarx_res) {
-		dev_err(&pdev->dev, "Unable to get PCM-RX dma resource\n");
-		return -ENXIO;
-	}
-
 	mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!mem_res) {
 		dev_err(&pdev->dev, "Unable to get register resource\n");
@@ -568,8 +556,10 @@ static int s3c_pcm_dev_probe(struct platform_device *pdev)
 	s3c_pcm_stereo_out[pdev->id].dma_addr = mem_res->start
 							+ S3C_PCM_TXFIFO;
 
-	s3c_pcm_stereo_in[pdev->id].channel = dmarx_res->start;
-	s3c_pcm_stereo_out[pdev->id].channel = dmatx_res->start;
+	if (pcm_pdata) {
+		s3c_pcm_stereo_in[pdev->id].slave = pcm_pdata->dma_capture;
+		s3c_pcm_stereo_out[pdev->id].slave = pcm_pdata->dma_playback;
+	}
 
 	pcm->dma_capture = &s3c_pcm_stereo_in[pdev->id];
 	pcm->dma_playback = &s3c_pcm_stereo_out[pdev->id];
diff --git a/sound/soc/samsung/s3c2412-i2s.c b/sound/soc/samsung/s3c2412-i2s.c
index 2b766d212ce0..77d27c85a32a 100644
--- a/sound/soc/samsung/s3c2412-i2s.c
+++ b/sound/soc/samsung/s3c2412-i2s.c
@@ -34,13 +34,13 @@
 #include "s3c2412-i2s.h"
 
 static struct s3c_dma_params s3c2412_i2s_pcm_stereo_out = {
-	.channel	= DMACH_I2S_OUT,
+	.slave		= (void *)(uintptr_t)DMACH_I2S_OUT,
 	.ch_name	= "tx",
 	.dma_size	= 4,
 };
 
 static struct s3c_dma_params s3c2412_i2s_pcm_stereo_in = {
-	.channel	= DMACH_I2S_IN,
+	.slave		= (void *)(uintptr_t)DMACH_I2S_IN,
 	.ch_name	= "rx",
 	.dma_size	= 4,
 };
diff --git a/sound/soc/samsung/s3c24xx-i2s.c b/sound/soc/samsung/s3c24xx-i2s.c
index 5bf723689692..9da3a77ea2c7 100644
--- a/sound/soc/samsung/s3c24xx-i2s.c
+++ b/sound/soc/samsung/s3c24xx-i2s.c
@@ -32,13 +32,13 @@
 #include "s3c24xx-i2s.h"
 
 static struct s3c_dma_params s3c24xx_i2s_pcm_stereo_out = {
-	.channel	= DMACH_I2S_OUT,
+	.slave		= (void *)(uintptr_t)DMACH_I2S_OUT,
 	.ch_name	= "tx",
 	.dma_size	= 2,
 };
 
 static struct s3c_dma_params s3c24xx_i2s_pcm_stereo_in = {
-	.channel	= DMACH_I2S_IN,
+	.slave		= (void *)(uintptr_t)DMACH_I2S_IN,
 	.ch_name	= "rx",
 	.dma_size	= 2,
 };
diff --git a/sound/soc/samsung/spdif.c b/sound/soc/samsung/spdif.c
index 36dbc0e96004..9dd7ee6d03ff 100644
--- a/sound/soc/samsung/spdif.c
+++ b/sound/soc/samsung/spdif.c
@@ -359,7 +359,7 @@ static const struct snd_soc_component_driver samsung_spdif_component = {
 static int spdif_probe(struct platform_device *pdev)
 {
 	struct s3c_audio_pdata *spdif_pdata;
-	struct resource *mem_res, *dma_res;
+	struct resource *mem_res;
 	struct samsung_spdif_info *spdif;
 	int ret;
 
@@ -367,12 +367,6 @@ static int spdif_probe(struct platform_device *pdev)
 
 	dev_dbg(&pdev->dev, "Entered %s\n", __func__);
 
-	dma_res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
-	if (!dma_res) {
-		dev_err(&pdev->dev, "Unable to get dma resource.\n");
-		return -ENXIO;
-	}
-
 	mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!mem_res) {
 		dev_err(&pdev->dev, "Unable to get register resource.\n");
@@ -432,7 +426,7 @@ static int spdif_probe(struct platform_device *pdev)
 
 	spdif_stereo_out.dma_size = 2;
 	spdif_stereo_out.dma_addr = mem_res->start + DATA_OUTBUF;
-	spdif_stereo_out.channel = dma_res->start;
+	spdif_stereo_out.slave = spdif_pdata ? spdif_pdata->dma_playback : NULL;
 
 	spdif->dma_playback = &spdif_stereo_out;
 

From c045105c641ccbeb6e94e87980cc8db870aa3961 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 16 Nov 2015 17:08:41 +0100
Subject: [PATCH 104/797] mmc: sh_mmcif: rework dma channel handling

commit 27cbd7e815a8e223ff7c4fe56daca724101288ac upstream.

When compiling the sh_mmcif driver for ARM64, we currently
get a harmless build warning:

../drivers/mmc/host/sh_mmcif.c: In function 'sh_mmcif_request_dma_one':
../drivers/mmc/host/sh_mmcif.c:417:4: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
    (void *)pdata->slave_id_tx :
    ^
../drivers/mmc/host/sh_mmcif.c:418:4: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
    (void *)pdata->slave_id_rx;

This could be worked around by adding another cast to uintptr_t, but
I decided to simplify the code a little more to avoid that. This
splits out the platform data using code into a separate function
and builds that only for CONFIG_SUPERH. This part still has a typecast
but does not need a second one. The SH platform code could be further
modified to pass a pointer directly as we do on other architectures
when we have a filter function.

The normal case is simplified further and now just calls
dma_request_slave_channel() directly without going through the
compat handling.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mmc/host/sh_mmcif.c | 88 +++++++++++++++++--------------------
 1 file changed, 40 insertions(+), 48 deletions(-)

diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index ad9ffea7d659..1ca8a1359cbc 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -397,38 +397,26 @@ static void sh_mmcif_start_dma_tx(struct sh_mmcif_host *host)
 }
 
 static struct dma_chan *
-sh_mmcif_request_dma_one(struct sh_mmcif_host *host,
-			 struct sh_mmcif_plat_data *pdata,
-			 enum dma_transfer_direction direction)
+sh_mmcif_request_dma_pdata(struct sh_mmcif_host *host, uintptr_t slave_id)
 {
-	struct dma_slave_config cfg = { 0, };
-	struct dma_chan *chan;
-	void *slave_data = NULL;
-	struct resource *res;
-	struct device *dev = sh_mmcif_host_to_dev(host);
 	dma_cap_mask_t mask;
-	int ret;
 
 	dma_cap_zero(mask);
 	dma_cap_set(DMA_SLAVE, mask);
-
-	if (pdata)
-		slave_data = direction == DMA_MEM_TO_DEV ?
-			(void *)pdata->slave_id_tx :
-			(void *)pdata->slave_id_rx;
-
-	chan = dma_request_slave_channel_compat(mask, shdma_chan_filter,
-				slave_data, dev,
-				direction == DMA_MEM_TO_DEV ? "tx" : "rx");
-
-	dev_dbg(dev, "%s: %s: got channel %p\n", __func__,
-		direction == DMA_MEM_TO_DEV ? "TX" : "RX", chan);
-
-	if (!chan)
+	if (slave_id <= 0)
 		return NULL;
 
-	res = platform_get_resource(host->pd, IORESOURCE_MEM, 0);
+	return dma_request_channel(mask, shdma_chan_filter, (void *)slave_id);
+}
 
+static int sh_mmcif_dma_slave_config(struct sh_mmcif_host *host,
+				     struct dma_chan *chan,
+				     enum dma_transfer_direction direction)
+{
+	struct resource *res;
+	struct dma_slave_config cfg = { 0, };
+
+	res = platform_get_resource(host->pd, IORESOURCE_MEM, 0);
 	cfg.direction = direction;
 
 	if (direction == DMA_DEV_TO_MEM) {
@@ -439,38 +427,42 @@ sh_mmcif_request_dma_one(struct sh_mmcif_host *host,
 		cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
 	}
 
-	ret = dmaengine_slave_config(chan, &cfg);
-	if (ret < 0) {
-		dma_release_channel(chan);
-		return NULL;
-	}
-
-	return chan;
+	return dmaengine_slave_config(chan, &cfg);
 }
 
-static void sh_mmcif_request_dma(struct sh_mmcif_host *host,
-				 struct sh_mmcif_plat_data *pdata)
+static void sh_mmcif_request_dma(struct sh_mmcif_host *host)
 {
 	struct device *dev = sh_mmcif_host_to_dev(host);
 	host->dma_active = false;
 
-	if (pdata) {
-		if (pdata->slave_id_tx <= 0 || pdata->slave_id_rx <= 0)
-			return;
-	} else if (!dev->of_node) {
-		return;
-	}
-
 	/* We can only either use DMA for both Tx and Rx or not use it at all */
-	host->chan_tx = sh_mmcif_request_dma_one(host, pdata, DMA_MEM_TO_DEV);
-	if (!host->chan_tx)
-		return;
+	if (IS_ENABLED(CONFIG_SUPERH) && dev->platform_data) {
+		struct sh_mmcif_plat_data *pdata = dev->platform_data;
 
-	host->chan_rx = sh_mmcif_request_dma_one(host, pdata, DMA_DEV_TO_MEM);
-	if (!host->chan_rx) {
-		dma_release_channel(host->chan_tx);
-		host->chan_tx = NULL;
+		host->chan_tx = sh_mmcif_request_dma_pdata(host,
+							pdata->slave_id_tx);
+		host->chan_rx = sh_mmcif_request_dma_pdata(host,
+							pdata->slave_id_rx);
+	} else {
+		host->chan_tx = dma_request_slave_channel(dev, "tx");
+		host->chan_tx = dma_request_slave_channel(dev, "rx");
 	}
+	dev_dbg(dev, "%s: got channel TX %p RX %p\n", __func__, host->chan_tx,
+		host->chan_rx);
+
+	if (!host->chan_tx || !host->chan_rx ||
+	    sh_mmcif_dma_slave_config(host, host->chan_tx, DMA_MEM_TO_DEV) ||
+	    sh_mmcif_dma_slave_config(host, host->chan_rx, DMA_DEV_TO_MEM))
+		goto error;
+
+	return;
+
+error:
+	if (host->chan_tx)
+		dma_release_channel(host->chan_tx);
+	if (host->chan_rx)
+		dma_release_channel(host->chan_rx);
+	host->chan_tx = host->chan_rx = NULL;
 }
 
 static void sh_mmcif_release_dma(struct sh_mmcif_host *host)
@@ -1102,7 +1094,7 @@ static void sh_mmcif_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	if (ios->power_mode == MMC_POWER_UP) {
 		if (!host->card_present) {
 			/* See if we also get DMA */
-			sh_mmcif_request_dma(host, dev->platform_data);
+			sh_mmcif_request_dma(host);
 			host->card_present = true;
 		}
 		sh_mmcif_set_power(host, ios);

From 19e0783ae96837e30e94acdb0cc4ae935338a969 Mon Sep 17 00:00:00 2001
From: Chris Paterson <chris.paterson2@renesas.com>
Date: Wed, 10 Feb 2016 14:07:01 +0000
Subject: [PATCH 105/797] mmc: sh_mmcif: Correct TX DMA channel allocation

commit a32ef81c9889c9554a3c4b465c4ee7b2d26c6b10 upstream.

Commit 27cbd7e815a8 ("mmc: sh_mmcif: rework dma channel handling")
introduced a typo causing the TX DMA channel allocation to be overwritten
by the requested RX DMA channel.

Fixes: 27cbd7e815a8 ("mmc: sh_mmcif: rework dma channel handling")
Signed-off-by: Chris Paterson <chris.paterson2@renesas.com>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mmc/host/sh_mmcif.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index 1ca8a1359cbc..6234eab38ff3 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -445,7 +445,7 @@ static void sh_mmcif_request_dma(struct sh_mmcif_host *host)
 							pdata->slave_id_rx);
 	} else {
 		host->chan_tx = dma_request_slave_channel(dev, "tx");
-		host->chan_tx = dma_request_slave_channel(dev, "rx");
+		host->chan_rx = dma_request_slave_channel(dev, "rx");
 	}
 	dev_dbg(dev, "%s: got channel TX %p RX %p\n", __func__, host->chan_tx,
 		host->chan_rx);

From e8c28e096a07b7bdf6fae534c8ff5f372b25ea82 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Wed, 3 Feb 2016 12:33:30 +0100
Subject: [PATCH 106/797] x86/microcode/intel: Make early loader look for
 builtin microcode too

commit 264285ac01673e70557c43ecee338ce97c4c0672 upstream.

Set the initrd @start depending on the presence of an initrd. Otherwise,
builtin microcode loading doesn't work as the start is wrong and we're
using it to compute offset to the microcode blobs.

Tested-by: Thomas Voegtle <tv@lio96.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1454499225-21544-3-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/microcode/intel.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index ce47402eb2f9..5e3a310d54f3 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -555,10 +555,14 @@ scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
 	cd.data = NULL;
 	cd.size = 0;
 
-	cd = find_cpio_data(p, (void *)start, size, &offset);
-	if (!cd.data) {
+	/* try built-in microcode if no initrd */
+	if (!size) {
 		if (!load_builtin_intel_microcode(&cd))
 			return UCODE_ERROR;
+	} else {
+		cd = find_cpio_data(p, (void *)start, size, &offset);
+		if (!cd.data)
+			return UCODE_ERROR;
 	}
 
 	return get_matching_model_microcode(0, start, cd.data, cd.size,
@@ -732,16 +736,20 @@ void __init load_ucode_intel_bsp(void)
 	struct boot_params *p;
 
 	p	= (struct boot_params *)__pa_nodebug(&boot_params);
-	start	= p->hdr.ramdisk_image;
 	size	= p->hdr.ramdisk_size;
 
-	_load_ucode_intel_bsp(
-			(struct mc_saved_data *)__pa_nodebug(&mc_saved_data),
-			(unsigned long *)__pa_nodebug(&mc_saved_in_initrd),
-			start, size);
+	/*
+	 * Set start only if we have an initrd image. We cannot use initrd_start
+	 * because it is not set that early yet.
+	 */
+	start	= (size ? p->hdr.ramdisk_image : 0);
+
+	_load_ucode_intel_bsp((struct mc_saved_data *)__pa_nodebug(&mc_saved_data),
+			      (unsigned long *)__pa_nodebug(&mc_saved_in_initrd),
+			      start, size);
 #else
-	start	= boot_params.hdr.ramdisk_image + PAGE_OFFSET;
 	size	= boot_params.hdr.ramdisk_size;
+	start	= (size ? boot_params.hdr.ramdisk_image + PAGE_OFFSET : 0);
 
 	_load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd, start, size);
 #endif

From 5aeaf8bd6c6a3f29a1ab55987c10741ef984ea52 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Wed, 3 Feb 2016 12:33:29 +0100
Subject: [PATCH 107/797] x86/microcode: Untangle from BLK_DEV_INITRD

commit 5f9c01aa7c49a2d74474d6d879a797b8badf29e6 upstream.

Thomas Voegtle reported that doing oldconfig with a .config which has
CONFIG_MICROCODE enabled but BLK_DEV_INITRD disabled prevents the
microcode loading mechanism from being built.

So untangle it from the BLK_DEV_INITRD dependency so that oldconfig
doesn't turn it off and add an explanatory text to its Kconfig help what
the supported methods for supplying microcode are.

Reported-by: Thomas Voegtle <tv@lio96.de>
Tested-by: Thomas Voegtle <tv@lio96.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1454499225-21544-2-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/Kconfig                      | 23 ++++++++++++-----------
 arch/x86/include/asm/microcode.h      | 26 ++++++++++++++++++++++++++
 arch/x86/kernel/cpu/microcode/intel.c | 14 ++++----------
 3 files changed, 42 insertions(+), 21 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index db3622f22b61..436639a31624 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1126,22 +1126,23 @@ config MICROCODE
 	bool "CPU microcode loading support"
 	default y
 	depends on CPU_SUP_AMD || CPU_SUP_INTEL
-	depends on BLK_DEV_INITRD
 	select FW_LOADER
 	---help---
-
 	  If you say Y here, you will be able to update the microcode on
-	  certain Intel and AMD processors. The Intel support is for the
-	  IA32 family, e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4,
-	  Xeon etc. The AMD support is for families 0x10 and later. You will
-	  obviously need the actual microcode binary data itself which is not
-	  shipped with the Linux kernel.
+	  Intel and AMD processors. The Intel support is for the IA32 family,
+	  e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4, Xeon etc. The
+	  AMD support is for families 0x10 and later. You will obviously need
+	  the actual microcode binary data itself which is not shipped with
+	  the Linux kernel.
 
-	  This option selects the general module only, you need to select
-	  at least one vendor specific module as well.
+	  The preferred method to load microcode from a detached initrd is described
+	  in Documentation/x86/early-microcode.txt. For that you need to enable
+	  CONFIG_BLK_DEV_INITRD in order for the loader to be able to scan the
+	  initrd for microcode blobs.
 
-	  To compile this driver as a module, choose M here: the module
-	  will be called microcode.
+	  In addition, you can build-in the microcode into the kernel. For that you
+	  need to enable FIRMWARE_IN_KERNEL and add the vendor-supplied microcode
+	  to the CONFIG_EXTRA_FIRMWARE config option.
 
 config MICROCODE_INTEL
 	bool "Intel microcode loading support"
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 34e62b1dcfce..712b24ed3a64 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -2,6 +2,7 @@
 #define _ASM_X86_MICROCODE_H
 
 #include <linux/earlycpio.h>
+#include <linux/initrd.h>
 
 #define native_rdmsr(msr, val1, val2)			\
 do {							\
@@ -168,4 +169,29 @@ static inline void reload_early_microcode(void)			{ }
 static inline bool
 get_builtin_firmware(struct cpio_data *cd, const char *name)	{ return false; }
 #endif
+
+static inline unsigned long get_initrd_start(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+	return initrd_start;
+#else
+	return 0;
+#endif
+}
+
+static inline unsigned long get_initrd_start_addr(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+#ifdef CONFIG_X86_32
+	unsigned long *initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start);
+
+	return (unsigned long)__pa_nodebug(*initrd_start_p);
+#else
+	return get_initrd_start();
+#endif
+#else /* CONFIG_BLK_DEV_INITRD */
+	return 0;
+#endif
+}
+
 #endif /* _ASM_X86_MICROCODE_H */
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 5e3a310d54f3..ac8975a65280 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -698,7 +698,7 @@ int __init save_microcode_in_initrd_intel(void)
 	if (count == 0)
 		return ret;
 
-	copy_initrd_ptrs(mc_saved, mc_saved_in_initrd, initrd_start, count);
+	copy_initrd_ptrs(mc_saved, mc_saved_in_initrd, get_initrd_start(), count);
 	ret = save_microcode(&mc_saved_data, mc_saved, count);
 	if (ret)
 		pr_err("Cannot save microcode patches from initrd.\n");
@@ -760,20 +760,14 @@ void load_ucode_intel_ap(void)
 	struct mc_saved_data *mc_saved_data_p;
 	struct ucode_cpu_info uci;
 	unsigned long *mc_saved_in_initrd_p;
-	unsigned long initrd_start_addr;
 	enum ucode_state ret;
 #ifdef CONFIG_X86_32
-	unsigned long *initrd_start_p;
 
-	mc_saved_in_initrd_p =
-		(unsigned long *)__pa_nodebug(mc_saved_in_initrd);
+	mc_saved_in_initrd_p = (unsigned long *)__pa_nodebug(mc_saved_in_initrd);
 	mc_saved_data_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data);
-	initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start);
-	initrd_start_addr = (unsigned long)__pa_nodebug(*initrd_start_p);
 #else
-	mc_saved_data_p = &mc_saved_data;
 	mc_saved_in_initrd_p = mc_saved_in_initrd;
-	initrd_start_addr = initrd_start;
+	mc_saved_data_p = &mc_saved_data;
 #endif
 
 	/*
@@ -785,7 +779,7 @@ void load_ucode_intel_ap(void)
 
 	collect_cpu_info_early(&uci);
 	ret = load_microcode(mc_saved_data_p, mc_saved_in_initrd_p,
-			     initrd_start_addr, &uci);
+			     get_initrd_start_addr(), &uci);
 
 	if (ret != UCODE_OK)
 		return;

From 7657a398c105500d1614444a26c1fc9166813fb8 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Wed, 10 Feb 2016 14:15:27 -0800
Subject: [PATCH 108/797] x86/entry/compat: Keep TS_COMPAT set during signal
 delivery

commit 4e79e182b419172e35936a47f098509092d69817 upstream.

Signal delivery needs to know the sign of an interrupted syscall's
return value in order to detect -ERESTART variants.  Normally this
works independently of bitness because syscalls internally return
long.  Under ptrace, however, this can break, and syscall_get_error
is supposed to sign-extend regs->ax if needed.

We were clearing TS_COMPAT too early, though, and this prevented
sign extension, which subtly broke syscall restart under ptrace.

Reported-by: Robert O'Callahan <robert@ocallahan.org>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: c5c46f59e4e7 ("x86/entry: Add new, comprehensible entry and exit handlers written in C")
Link: http://lkml.kernel.org/r/cbce3cf545522f64eb37f5478cb59746230db3b5.1455142412.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/entry/common.c | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 03663740c866..1a4477cedc49 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -268,6 +268,7 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
 /* Called with IRQs disabled. */
 __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
 {
+	struct thread_info *ti = pt_regs_to_thread_info(regs);
 	u32 cached_flags;
 
 	if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
@@ -275,12 +276,22 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
 
 	lockdep_sys_exit();
 
-	cached_flags =
-		READ_ONCE(pt_regs_to_thread_info(regs)->flags);
+	cached_flags = READ_ONCE(ti->flags);
 
 	if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
 		exit_to_usermode_loop(regs, cached_flags);
 
+#ifdef CONFIG_COMPAT
+	/*
+	 * Compat syscalls set TS_COMPAT.  Make sure we clear it before
+	 * returning to user mode.  We need to clear it *after* signal
+	 * handling, because syscall restart has a fixup for compat
+	 * syscalls.  The fixup is exercised by the ptrace_syscall_32
+	 * selftest.
+	 */
+	ti->status &= ~TS_COMPAT;
+#endif
+
 	user_enter();
 }
 
@@ -332,14 +343,6 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs)
 	if (unlikely(cached_flags & SYSCALL_EXIT_WORK_FLAGS))
 		syscall_slow_exit_work(regs, cached_flags);
 
-#ifdef CONFIG_COMPAT
-	/*
-	 * Compat syscalls set TS_COMPAT.  Make sure we clear it before
-	 * returning to user mode.
-	 */
-	ti->status &= ~TS_COMPAT;
-#endif
-
 	local_irq_disable();
 	prepare_exit_to_usermode(regs);
 }

From f6724209df88ec1a760d036b71b8f871b1556785 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Thu, 3 Mar 2016 20:50:40 +0100
Subject: [PATCH 109/797] perf/x86/intel: Add definition for PT PMI bit

commit 5690ae28e472d25e330ad0c637a5cea3fc39fb32 upstream.

This patch adds a definition for GLOBAL_OVFL_STATUS bit 55
which is used with the Processor Trace (PT) feature.

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: adrian.hunter@intel.com
Cc: kan.liang@intel.com
Cc: namhyung@kernel.org
Link: http://lkml.kernel.org/r/1457034642-21837-2-git-send-email-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/perf_event.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 7bcb861a04e5..5a2ed3ed2f26 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -165,6 +165,7 @@ struct x86_pmu_capability {
 #define GLOBAL_STATUS_ASIF				BIT_ULL(60)
 #define GLOBAL_STATUS_COUNTERS_FROZEN			BIT_ULL(59)
 #define GLOBAL_STATUS_LBRS_FROZEN			BIT_ULL(58)
+#define GLOBAL_STATUS_TRACE_TOPAPMI			BIT_ULL(55)
 
 /*
  * IBS cpuid feature detection

From 54fda475686cfb9a90bb558c0fa590e644503432 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Fri, 26 Feb 2016 09:15:11 -0600
Subject: [PATCH 110/797] x86/PCI: Mark Broadwell-EP Home Agent & PCU as having
 non-compliant BARs

commit b894157145e4ac7598d7062bc93320898a5e059e upstream.

The Home Agent and PCU PCI devices in Broadwell-EP have a non-BAR register
where a BAR should be.  We don't know what the side effects of sizing the
"BAR" would be, and we don't know what address space the "BAR" might appear
to describe.

Mark these devices as having non-compliant BARs so the PCI core doesn't
touch them.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/pci/fixup.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index e58565556703..0ae7e9fa348d 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -540,3 +540,10 @@ static void twinhead_reserve_killing_zone(struct pci_dev *dev)
         }
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x27B9, twinhead_reserve_killing_zone);
+
+static void pci_bdwep_bar(struct pci_dev *dev)
+{
+	dev->non_compliant_bars = 1;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fa0, pci_bdwep_bar);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, pci_bdwep_bar);

From 0f6e5e26e68f1171b7cf8bebb7ce86c95e506639 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 10 Feb 2016 17:50:23 +0100
Subject: [PATCH 111/797] KVM: x86: fix missed hardware breakpoints

commit 4e422bdd2f849d98fffccbc3295c2f0996097fb3 upstream.

Sometimes when setting a breakpoint a process doesn't stop on it.
This is because the debug registers are not loaded correctly on
VCPU load.

The following simple reproducer from Oleg Nesterov tries using debug
registers in both the host and the guest, for example by running "./bp
0 1" on the host and "./bp 14 15" under QEMU.

    #include <unistd.h>
    #include <signal.h>
    #include <stdlib.h>
    #include <stdio.h>
    #include <sys/wait.h>
    #include <sys/ptrace.h>
    #include <sys/user.h>
    #include <asm/debugreg.h>
    #include <assert.h>

    #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)

    unsigned long encode_dr7(int drnum, int enable, unsigned int type, unsigned int len)
    {
        unsigned long dr7;

        dr7 = ((len | type) & 0xf)
            << (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE);
        if (enable)
            dr7 |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE));

        return dr7;
    }

    int write_dr(int pid, int dr, unsigned long val)
    {
        return ptrace(PTRACE_POKEUSER, pid,
                offsetof (struct user, u_debugreg[dr]),
                val);
    }

    void set_bp(pid_t pid, void *addr)
    {
        unsigned long dr7;
        assert(write_dr(pid, 0, (long)addr) == 0);
        dr7 = encode_dr7(0, 1, DR_RW_EXECUTE, DR_LEN_1);
        assert(write_dr(pid, 7, dr7) == 0);
    }

    void *get_rip(int pid)
    {
        return (void*)ptrace(PTRACE_PEEKUSER, pid,
                offsetof(struct user, regs.rip), 0);
    }

    void test(int nr)
    {
        void *bp_addr = &&label + nr, *bp_hit;
        int pid;

        printf("test bp %d\n", nr);
        assert(nr < 16); // see 16 asm nops below

        pid = fork();
        if (!pid) {
            assert(ptrace(PTRACE_TRACEME, 0,0,0) == 0);
            kill(getpid(), SIGSTOP);
            for (;;) {
                label: asm (
                    "nop; nop; nop; nop;"
                    "nop; nop; nop; nop;"
                    "nop; nop; nop; nop;"
                    "nop; nop; nop; nop;"
                );
            }
        }

        assert(pid == wait(NULL));
        set_bp(pid, bp_addr);

        for (;;) {
            assert(ptrace(PTRACE_CONT, pid, 0, 0) == 0);
            assert(pid == wait(NULL));

            bp_hit = get_rip(pid);
            if (bp_hit != bp_addr)
                fprintf(stderr, "ERR!! hit wrong bp %ld != %d\n",
                    bp_hit - &&label, nr);
        }
    }

    int main(int argc, const char *argv[])
    {
        while (--argc) {
            int nr = atoi(*++argv);
            if (!fork())
                test(nr);
        }

        while (wait(NULL) > 0)
            ;
        return 0;
    }

Suggested-by: Nadadv Amit <namit@cs.technion.ac.il>
Reported-by: Andrey Wagin <avagin@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/x86.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d2945024ed33..8bfc5fc6a39b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2736,6 +2736,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	}
 
 	kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
+	vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD;
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)

From 49f0cbfc3e73108c319c3d3cc5fe04587d96b654 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
Date: Wed, 2 Mar 2016 22:56:38 +0100
Subject: [PATCH 112/797] KVM: i8254: change PIT discard tick policy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 7dd0fdff145c5be7146d0ac06732ae3613412ac1 upstream.

Discard policy uses ack_notifiers to prevent injection of PIT interrupts
before EOI from the last one.

This patch changes the policy to always try to deliver the interrupt,
which makes a difference when its vector is in ISR.
Old implementation would drop the interrupt, but proposed one injects to
IRR, like real hardware would.

The old policy breaks legacy NMI watchdogs, where PIT is used through
virtual wire (LVT0): PIT never sends an interrupt before receiving EOI,
thus a guest deadlock with disabled interrupts will stop NMIs.

Note that NMI doesn't do EOI, so PIT also had to send a normal interrupt
through IOAPIC.  (KVM's PIT is deeply rotten and luckily not used much
in modern systems.)

Even though there is a chance of regressions, I think we can fix the
LVT0 NMI bug without introducing a new tick policy.

Reported-by: Yuki Shibuya <shibuya.yk@ncos.nec.co.jp>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/i8254.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index b0ea42b78ccd..ab5318727579 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -245,7 +245,7 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
 		 * PIC is being reset.  Handle it gracefully here
 		 */
 		atomic_inc(&ps->pending);
-	else if (value > 0)
+	else if (value > 0 && ps->reinject)
 		/* in this case, we had multiple outstanding pit interrupts
 		 * that we needed to inject.  Reinject
 		 */
@@ -288,7 +288,9 @@ static void pit_do_work(struct kthread_work *work)
 	 * last one has been acked.
 	 */
 	spin_lock(&ps->inject_lock);
-	if (ps->irq_ack) {
+	if (!ps->reinject)
+		inject = 1;
+	else if (ps->irq_ack) {
 		ps->irq_ack = 0;
 		inject = 1;
 	}
@@ -317,10 +319,10 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
 	struct kvm_kpit_state *ps = container_of(data, struct kvm_kpit_state, timer);
 	struct kvm_pit *pt = ps->kvm->arch.vpit;
 
-	if (ps->reinject || !atomic_read(&ps->pending)) {
+	if (ps->reinject)
 		atomic_inc(&ps->pending);
-		queue_kthread_work(&pt->worker, &pt->expired);
-	}
+
+	queue_kthread_work(&pt->worker, &pt->expired);
 
 	if (ps->is_periodic) {
 		hrtimer_add_expires_ns(&ps->timer, ps->period);

From 4e2fa4bbbac1c2e198e4c980d451c9ec568ae798 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 21 Mar 2016 10:15:25 +0100
Subject: [PATCH 113/797] KVM: fix spin_lock_init order on x86

commit e9ad4ec8379ad1ba6f68b8ca1c26b50b5ae0a327 upstream.

Moving the initialization earlier is needed in 4.6 because
kvm_arch_init_vm is now using mmu_lock, causing lockdep to
complain:

[  284.440294] INFO: trying to register non-static key.
[  284.445259] the code is fine but needs lockdep annotation.
[  284.450736] turning off the locking correctness validator.
...
[  284.528318]  [<ffffffff810aecc3>] lock_acquire+0xd3/0x240
[  284.533733]  [<ffffffffa0305aa0>] ? kvm_page_track_register_notifier+0x20/0x60 [kvm]
[  284.541467]  [<ffffffff81715581>] _raw_spin_lock+0x41/0x80
[  284.546960]  [<ffffffffa0305aa0>] ? kvm_page_track_register_notifier+0x20/0x60 [kvm]
[  284.554707]  [<ffffffffa0305aa0>] kvm_page_track_register_notifier+0x20/0x60 [kvm]
[  284.562281]  [<ffffffffa02ece70>] kvm_mmu_init_vm+0x20/0x30 [kvm]
[  284.568381]  [<ffffffffa02dbf7a>] kvm_arch_init_vm+0x1ea/0x200 [kvm]
[  284.574740]  [<ffffffffa02bff3f>] kvm_dev_ioctl+0xbf/0x4d0 [kvm]

However, it also helps fixing a preexisting problem, which is why this
patch is also good for stable kernels: kvm_create_vm was incrementing
current->mm->mm_count but not decrementing it at the out_err label (in
case kvm_init_mmu_notifier failed).  The new initialization order makes
it possible to add the required mmdrop without adding a new error label.

Reported-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 virt/kvm/kvm_main.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7338e30421d8..fefbf2d148ef 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -547,6 +547,16 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	if (!kvm)
 		return ERR_PTR(-ENOMEM);
 
+	spin_lock_init(&kvm->mmu_lock);
+	atomic_inc(&current->mm->mm_count);
+	kvm->mm = current->mm;
+	kvm_eventfd_init(kvm);
+	mutex_init(&kvm->lock);
+	mutex_init(&kvm->irq_lock);
+	mutex_init(&kvm->slots_lock);
+	atomic_set(&kvm->users_count, 1);
+	INIT_LIST_HEAD(&kvm->devices);
+
 	r = kvm_arch_init_vm(kvm, type);
 	if (r)
 		goto out_err_no_disable;
@@ -579,16 +589,6 @@ static struct kvm *kvm_create_vm(unsigned long type)
 			goto out_err;
 	}
 
-	spin_lock_init(&kvm->mmu_lock);
-	kvm->mm = current->mm;
-	atomic_inc(&kvm->mm->mm_count);
-	kvm_eventfd_init(kvm);
-	mutex_init(&kvm->lock);
-	mutex_init(&kvm->irq_lock);
-	mutex_init(&kvm->slots_lock);
-	atomic_set(&kvm->users_count, 1);
-	INIT_LIST_HEAD(&kvm->devices);
-
 	r = kvm_init_mmu_notifier(kvm);
 	if (r)
 		goto out_err;
@@ -613,6 +613,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
 		kvm_free_memslots(kvm, kvm->memslots[i]);
 	kvm_arch_free_vm(kvm);
+	mmdrop(current->mm);
 	return ERR_PTR(r);
 }
 

From c44b175bf03cd74e517f3c98b2cb4896e04202ae Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Mar 2016 16:53:29 +0100
Subject: [PATCH 114/797] KVM: VMX: avoid guest hang on invalid invept
 instruction

commit 2849eb4f99d54925c543db12917127f88b3c38ff upstream.

A guest executing an invalid invept instruction would hang
because the instruction pointer was not updated.

Fixes: bfd0a56b90005f8c8a004baf407ad90045c2b11e
Reviewed-by: David Matlack <dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/vmx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0958fa2b7cb7..89d5e02b14ae 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7340,6 +7340,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
 	if (!(types & (1UL << type))) {
 		nested_vmx_failValid(vcpu,
 				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+		skip_emulated_instruction(vcpu);
 		return 1;
 	}
 

From f9153f95f2b5a1a90b81c746342bed9d40dc9ae0 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Mar 2016 16:53:42 +0100
Subject: [PATCH 115/797] KVM: VMX: avoid guest hang on invalid invvpid
 instruction

commit f6870ee9e53430f2a318ccf0dd5e66bb46194e43 upstream.

A guest executing an invalid invvpid instruction would hang
because the instruction pointer was not updated.

Reported-by: jmontleo@redhat.com
Tested-by: jmontleo@redhat.com
Fixes: 99b83ac893b84ed1a62ad6d1f2b6cc32026b9e85
Reviewed-by: David Matlack <dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/vmx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 89d5e02b14ae..75d5d5b75e1f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7399,6 +7399,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
 	if (!(types & (1UL << type))) {
 		nested_vmx_failValid(vcpu,
 			VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+		skip_emulated_instruction(vcpu);
 		return 1;
 	}
 

From 6a84dfcbf56eab7955b607e22696cb145e019f20 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Mar 2016 16:58:38 +0100
Subject: [PATCH 116/797] KVM: VMX: fix nested vpid for old KVM guests

commit ef697a712a6165aea7779c295604b099e8bfae2e upstream.

Old KVM guests invoke single-context invvpid without actually checking
whether it is supported.  This was fixed by commit 518c8ae ("KVM: VMX:
Make sure single type invvpid is supported before issuing invvpid
instruction", 2010-08-01) and the patch after, but pre-2.6.36
kernels lack it including RHEL 6.

Reported-by: jmontleo@redhat.com
Tested-by: jmontleo@redhat.com
Fixes: 99b83ac893b84ed1a62ad6d1f2b6cc32026b9e85
Reviewed-by: David Matlack <dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/vmx.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 75d5d5b75e1f..f34ab71dfd57 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2637,8 +2637,15 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 	} else
 		vmx->nested.nested_vmx_ept_caps = 0;
 
+	/*
+	 * Old versions of KVM use the single-context version without
+	 * checking for support, so declare that it is supported even
+	 * though it is treated as global context.  The alternative is
+	 * not failing the single-context invvpid, and it is worse.
+	 */
 	if (enable_vpid)
 		vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT |
+				VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT |
 				VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
 	else
 		vmx->nested.nested_vmx_vpid_caps = 0;
@@ -7416,12 +7423,17 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
 	}
 
 	switch (type) {
+	case VMX_VPID_EXTENT_SINGLE_CONTEXT:
+		/*
+		 * Old versions of KVM use the single-context version so we
+		 * have to support it; just treat it the same as all-context.
+		 */
 	case VMX_VPID_EXTENT_ALL_CONTEXT:
 		__vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02);
 		nested_vmx_succeed(vcpu);
 		break;
 	default:
-		/* Trap single context invalidation invvpid calls */
+		/* Trap individual address invalidation invvpid calls */
 		BUG_ON(1);
 		break;
 	}

From 37014e0c5c90e250892da8aba8533cd43bacb4eb Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Wed, 2 Mar 2016 13:24:14 +0200
Subject: [PATCH 117/797] perf/core: Fix perf_sched_count derailment

commit 927a5570855836e5d5859a80ce7e91e963545e8f upstream.

The error path in perf_event_open() is such that asking for a sampling
event on a PMU that doesn't generate interrupts will end up in dropping
the perf_sched_count even though it hasn't been incremented for this
event yet.

Given a sufficient amount of these calls, we'll end up disabling
scheduler's jump label even though we'd still have active events in the
system, thereby facilitating the arrival of the infernal regions upon us.

I'm fixing this by moving account_event() inside perf_event_alloc().

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: vince@deater.net
Link: http://lkml.kernel.org/r/1456917854-29427-1-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/events/core.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1087bbeb152b..faf2067fc8e2 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7979,6 +7979,9 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 		}
 	}
 
+	/* symmetric to unaccount_event() in _free_event() */
+	account_event(event);
+
 	return event;
 
 err_per_task:
@@ -8342,8 +8345,6 @@ SYSCALL_DEFINE5(perf_event_open,
 		}
 	}
 
-	account_event(event);
-
 	/*
 	 * Special case software events and allow them to be part of
 	 * any hardware group.
@@ -8626,8 +8627,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 	/* Mark owner so we could distinguish it from user events. */
 	event->owner = EVENT_OWNER_KERNEL;
 
-	account_event(event);
-
 	ctx = find_get_context(event->pmu, task, event);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);

From 2dfe91df33715503b253563d5e6d9a816758485c Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 17 Feb 2016 14:44:55 -0800
Subject: [PATCH 118/797] perf tools: Dont stop PMU parsing on alias parse
 error

commit 940db6dcd3f4659303fdf6befe7416adc4d24118 upstream.

When an error happens during alias parsing currently the complete
parsing of all attributes of the PMU is stopped. This is breaks old perf
on a newer kernel that may have not-yet-know alias attributes (such as
.scale or .per-pkg).

Continue when some attribute is unparseable.

This is IMHO a stable candidate and should be backported to older
versions to avoid problems with newer kernels.

v2: Print warnings when something goes wrong.
v3: Change warning to debug output

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/1455749095-18358-1-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/util/pmu.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index e4b173dec4b9..6f2a0279476c 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -283,13 +283,12 @@ static int pmu_aliases_parse(char *dir, struct list_head *head)
 {
 	struct dirent *evt_ent;
 	DIR *event_dir;
-	int ret = 0;
 
 	event_dir = opendir(dir);
 	if (!event_dir)
 		return -EINVAL;
 
-	while (!ret && (evt_ent = readdir(event_dir))) {
+	while ((evt_ent = readdir(event_dir))) {
 		char path[PATH_MAX];
 		char *name = evt_ent->d_name;
 		FILE *file;
@@ -305,17 +304,19 @@ static int pmu_aliases_parse(char *dir, struct list_head *head)
 
 		snprintf(path, PATH_MAX, "%s/%s", dir, name);
 
-		ret = -EINVAL;
 		file = fopen(path, "r");
-		if (!file)
-			break;
+		if (!file) {
+			pr_debug("Cannot open %s\n", path);
+			continue;
+		}
 
-		ret = perf_pmu__new_alias(head, dir, name, file);
+		if (perf_pmu__new_alias(head, dir, name, file) < 0)
+			pr_debug("Cannot set up %s\n", name);
 		fclose(file);
 	}
 
 	closedir(event_dir);
-	return ret;
+	return 0;
 }
 
 /*

From 6be7771fe7242d7d4399545e0c9e2f6f3cd76725 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Fri, 19 Feb 2016 11:43:52 +0000
Subject: [PATCH 119/797] perf tools: Fix checking asprintf return value

commit 26dee028d365fbc0e3326606a8520260b4462381 upstream.

According to man pages, asprintf returns -1 when failure. This patch
fixes two incorrect return value checker.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Cody P Schafer <dev@codyps.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kirill Smelkov <kirr@nexedi.com>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Fixes: ffeb883e5662 ("perf tools: Show proper error message for wrong terms of hw/sw events")
Link: http://lkml.kernel.org/r/1455882283-79592-5-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/util/parse-events.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index b48e87693aa5..a35db828bd0d 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -2101,11 +2101,11 @@ char *parse_events_formats_error_string(char *additional_terms)
 
 	/* valid terms */
 	if (additional_terms) {
-		if (!asprintf(&str, "valid terms: %s,%s",
-			      additional_terms, static_terms))
+		if (asprintf(&str, "valid terms: %s,%s",
+			     additional_terms, static_terms) < 0)
 			goto fail;
 	} else {
-		if (!asprintf(&str, "valid terms: %s", static_terms))
+		if (asprintf(&str, "valid terms: %s", static_terms) < 0)
 			goto fail;
 	}
 	return str;

From 64bf6d9705aa8724a17a137e76dc6f5b58309026 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Sat, 27 Feb 2016 21:21:12 +0100
Subject: [PATCH 120/797] perf tools: Fix python extension build

commit 67d5268908283c187e0a460048a423256c2fb288 upstream.

The util/python-ext-sources file contains source files required to build
the python extension relative to $(srctree)/tools/perf,

Such a file path $(FILE).c is handed over to the python extension build
system, which builds the final object in the
$(PYTHON_EXTBUILD)/tmp/$(FILE).o path.

After the build is done all files from $(PYTHON_EXTBUILD)lib/ are
carried as the result binaries.

Above system fails when we add source file relative to ../lib, which we
do for:

  ../lib/bitmap.c
  ../lib/find_bit.c
  ../lib/hweight.c
  ../lib/rbtree.c

All above objects will be built like:

  $(PYTHON_EXTBUILD)/tmp/../lib/bitmap.c
  $(PYTHON_EXTBUILD)/tmp/../lib/find_bit.c
  $(PYTHON_EXTBUILD)/tmp/../lib/hweight.c
  $(PYTHON_EXTBUILD)/tmp/../lib/rbtree.c

which accidentally happens to be final library path:

  $(PYTHON_EXTBUILD)/lib/

Changing setup.py to pass full paths of source files to Extension build
class and thus keep all built objects under $(PYTHON_EXTBUILD)tmp
directory.

Reported-by: Jeff Bastian <jbastian@redhat.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Josh Boyer <jwboyer@fedoraproject.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20160227201350.GB28494@krava.redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/util/setup.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 1833103768cb..c8680984d2d6 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -22,6 +22,7 @@ cflags = getenv('CFLAGS', '').split()
 # switch off several checks (need to be at the end of cflags list)
 cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' ]
 
+src_perf  = getenv('srctree') + '/tools/perf'
 build_lib = getenv('PYTHON_EXTBUILD_LIB')
 build_tmp = getenv('PYTHON_EXTBUILD_TMP')
 libtraceevent = getenv('LIBTRACEEVENT')
@@ -30,6 +31,9 @@ libapikfs = getenv('LIBAPI')
 ext_sources = [f.strip() for f in file('util/python-ext-sources')
 				if len(f.strip()) > 0 and f[0] != '#']
 
+# use full paths with source files
+ext_sources = map(lambda x: '%s/%s' % (src_perf, x) , ext_sources)
+
 perf = Extension('perf',
 		  sources = ext_sources,
 		  include_dirs = ['util/include'],

From d75936f3f968b98243b9380b0c05d90b5292245d Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Fri, 18 Mar 2016 10:03:24 +0800
Subject: [PATCH 121/797] Thermal: Ignore invalid trip points

commit 81ad4276b505e987dd8ebbdf63605f92cd172b52 upstream.

In some cases, platform thermal driver may report invalid trip points,
thermal core should not take any action for these trip points.

This fixed a regression that bogus trip point starts to screw up thermal
control on some Lenovo laptops, after
commit bb431ba26c5cd0a17c941ca6c3a195a3a6d5d461
Author: Zhang Rui <rui.zhang@intel.com>
Date:   Fri Oct 30 16:31:47 2015 +0800

    Thermal: initialize thermal zone device correctly

    After thermal zone device registered, as we have not read any
    temperature before, thus tz->temperature should not be 0,
    which actually means 0C, and thermal trend is not available.
    In this case, we need specially handling for the first
    thermal_zone_device_update().

    Both thermal core framework and step_wise governor is
    enhanced to handle this. And since the step_wise governor
    is the only one that uses trends, so it's the only thermal
    governor that needs to be updated.

    Tested-by: Manuel Krause <manuelkrause@netscape.net>
    Tested-by: szegad <szegadlo@poczta.onet.pl>
    Tested-by: prash <prash.n.rao@gmail.com>
    Tested-by: amish <ammdispose-arch@yahoo.com>
    Tested-by: Matthias <morpheusxyz123@yahoo.de>
    Reviewed-by: Javi Merino <javi.merino@arm.com>
    Signed-off-by: Zhang Rui <rui.zhang@intel.com>
    Signed-off-by: Chen Yu <yu.c.chen@intel.com>

Link: https://bugzilla.redhat.com/show_bug.cgi?id=1317190
Link: https://bugzilla.kernel.org/show_bug.cgi?id=114551
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/thermal/thermal_core.c | 13 ++++++++++++-
 include/linux/thermal.h        |  2 ++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index ba08b5521382..3d5f8f432b5b 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -454,6 +454,10 @@ static void handle_thermal_trip(struct thermal_zone_device *tz, int trip)
 {
 	enum thermal_trip_type type;
 
+	/* Ignore disabled trip points */
+	if (test_bit(trip, &tz->trips_disabled))
+		return;
+
 	tz->ops->get_trip_type(tz, trip, &type);
 
 	if (type == THERMAL_TRIP_CRITICAL || type == THERMAL_TRIP_HOT)
@@ -1796,6 +1800,7 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 {
 	struct thermal_zone_device *tz;
 	enum thermal_trip_type trip_type;
+	int trip_temp;
 	int result;
 	int count;
 	int passive = 0;
@@ -1867,9 +1872,15 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 		goto unregister;
 
 	for (count = 0; count < trips; count++) {
-		tz->ops->get_trip_type(tz, count, &trip_type);
+		if (tz->ops->get_trip_type(tz, count, &trip_type))
+			set_bit(count, &tz->trips_disabled);
 		if (trip_type == THERMAL_TRIP_PASSIVE)
 			passive = 1;
+		if (tz->ops->get_trip_temp(tz, count, &trip_temp))
+			set_bit(count, &tz->trips_disabled);
+		/* Check for bogus trip points */
+		if (trip_temp == 0)
+			set_bit(count, &tz->trips_disabled);
 	}
 
 	if (!passive) {
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index e13a1ace50e9..4a849f19e6c9 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -156,6 +156,7 @@ struct thermal_attr {
  * @trip_hyst_attrs:	attributes for trip points for sysfs: trip hysteresis
  * @devdata:	private pointer for device private data
  * @trips:	number of trip points the thermal zone supports
+ * @trips_disabled;	bitmap for disabled trips
  * @passive_delay:	number of milliseconds to wait between polls when
  *			performing passive cooling.
  * @polling_delay:	number of milliseconds to wait between polls when
@@ -191,6 +192,7 @@ struct thermal_zone_device {
 	struct thermal_attr *trip_hyst_attrs;
 	void *devdata;
 	int trips;
+	unsigned long trips_disabled;	/* bitmap for disabled trips */
 	int passive_delay;
 	int polling_delay;
 	int temperature;

From af080e5802224176001a987c3ab77ba7490ef49e Mon Sep 17 00:00:00 2001
From: Chris Friesen <cbf123@mail.usask.ca>
Date: Sat, 5 Mar 2016 23:18:48 -0600
Subject: [PATCH 122/797] sched/cputime: Fix steal_account_process_tick() to
 always return jiffies

commit f9c904b7613b8b4c85b10cd6b33ad41b2843fa9d upstream.

The callers of steal_account_process_tick() expect it to return
whether a jiffy should be considered stolen or not.

Currently the return value of steal_account_process_tick() is in
units of cputime, which vary between either jiffies or nsecs
depending on CONFIG_VIRT_CPU_ACCOUNTING_GEN.

If cputime has nsecs granularity and there is a tiny amount of
stolen time (a few nsecs, say) then we will consider the entire
tick stolen and will not account the tick on user/system/idle,
causing /proc/stats to show invalid data.

The fix is to change steal_account_process_tick() to accumulate
the stolen time and only account it once it's worth a jiffy.

(Thanks to Frederic Weisbecker for suggestions to fix a bug in my
first version of the patch.)

Signed-off-by: Chris Friesen <chris.friesen@windriver.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/56DBBDB8.40305@mail.usask.ca
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/sched/cputime.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 05de80b48586..f74ea89e77a8 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -259,21 +259,21 @@ static __always_inline bool steal_account_process_tick(void)
 #ifdef CONFIG_PARAVIRT
 	if (static_key_false(&paravirt_steal_enabled)) {
 		u64 steal;
-		cputime_t steal_ct;
+		unsigned long steal_jiffies;
 
 		steal = paravirt_steal_clock(smp_processor_id());
 		steal -= this_rq()->prev_steal_time;
 
 		/*
-		 * cputime_t may be less precise than nsecs (eg: if it's
-		 * based on jiffies). Lets cast the result to cputime
+		 * steal is in nsecs but our caller is expecting steal
+		 * time in jiffies. Lets cast the result to jiffies
 		 * granularity and account the rest on the next rounds.
 		 */
-		steal_ct = nsecs_to_cputime(steal);
-		this_rq()->prev_steal_time += cputime_to_nsecs(steal_ct);
+		steal_jiffies = nsecs_to_jiffies(steal);
+		this_rq()->prev_steal_time += jiffies_to_nsecs(steal_jiffies);
 
-		account_steal_time(steal_ct);
-		return steal_ct;
+		account_steal_time(jiffies_to_cputime(steal_jiffies));
+		return steal_jiffies;
 	}
 #endif
 	return false;

From 10595c57f4682b096d6ac6dd2212ac889f08d9f4 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Mon, 29 Feb 2016 09:19:24 +0100
Subject: [PATCH 123/797] sched/preempt, sh: kmap_coherent relies on disabled
 preemption

commit b15d53d009558d14c4f394a6d1fa2039c7f45c43 upstream.

kmap_coherent needs disabled preemption to not schedule in the critical
section, just like kmap_coherent on mips and kmap_atomic in general.

Fixes: 8222dbe21e79 "sched/preempt, mm/fault: Decouple preemption from the page fault logic"
Reported-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Tested-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Rich Felker <dalias@libc.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/sh/mm/kmap.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/sh/mm/kmap.c b/arch/sh/mm/kmap.c
index ec29e14ec5a8..bf25d7c79a2d 100644
--- a/arch/sh/mm/kmap.c
+++ b/arch/sh/mm/kmap.c
@@ -36,6 +36,7 @@ void *kmap_coherent(struct page *page, unsigned long addr)
 
 	BUG_ON(!test_bit(PG_dcache_clean, &page->flags));
 
+	preempt_disable();
 	pagefault_disable();
 
 	idx = FIX_CMAP_END -
@@ -64,4 +65,5 @@ void kunmap_coherent(void *kvaddr)
 	}
 
 	pagefault_enable();
+	preempt_enable();
 }

From dff87fa52ddf26df67526d303d08226e7168560b Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Wed, 9 Mar 2016 16:40:48 -0800
Subject: [PATCH 124/797] EDAC/sb_edac: Fix computation of channel address

commit eb1af3b71f9d83e45f2fd2fd649356e98e1c582c upstream.

Large memory Haswell-EX systems with multiple DIMMs per channel were
sometimes reporting the wrong DIMM.

Found three problems:

 1) Debug printouts for socket and channel interleave were not interpreting
    the register fields correctly. The socket interleave field is a 2^X
    value (0=1, 1=2, 2=4, 3=8). The channel interleave is X+1 (0=1, 1=2,
    2=3. 3=4).

 2) Actual use of the socket interleave value didn't interpret as 2^X

 3) Conversion of address to channel address was complicated, and wrong.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <arozansk@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-edac@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/edac/sb_edac.c | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 429309c62699..cbee3179ec08 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -1117,8 +1117,8 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
 		edac_dbg(0, "TAD#%d: up to %u.%03u GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n",
 			 n_tads, gb, (mb*1000)/1024,
 			 ((u64)tmp_mb) << 20L,
-			 (u32)TAD_SOCK(reg),
-			 (u32)TAD_CH(reg),
+			 (u32)(1 << TAD_SOCK(reg)),
+			 (u32)TAD_CH(reg) + 1,
 			 (u32)TAD_TGT0(reg),
 			 (u32)TAD_TGT1(reg),
 			 (u32)TAD_TGT2(reg),
@@ -1396,7 +1396,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
 	}
 
 	ch_way = TAD_CH(reg) + 1;
-	sck_way = TAD_SOCK(reg) + 1;
+	sck_way = 1 << TAD_SOCK(reg);
 
 	if (ch_way == 3)
 		idx = addr >> 6;
@@ -1453,7 +1453,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
 		 n_tads,
 		 addr,
 		 limit,
-		 (u32)TAD_SOCK(reg),
+		 sck_way,
 		 ch_way,
 		 offset,
 		 idx,
@@ -1468,18 +1468,12 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
 			offset, addr);
 		return -EINVAL;
 	}
-	addr -= offset;
-	/* Store the low bits [0:6] of the addr */
-	ch_addr = addr & 0x7f;
-	/* Remove socket wayness and remove 6 bits */
-	addr >>= 6;
-	addr = div_u64(addr, sck_xch);
-#if 0
-	/* Divide by channel way */
-	addr = addr / ch_way;
-#endif
-	/* Recover the last 6 bits */
-	ch_addr |= addr << 6;
+
+	ch_addr = addr - offset;
+	ch_addr >>= (6 + shiftup);
+	ch_addr /= ch_way * sck_way;
+	ch_addr <<= (6 + shiftup);
+	ch_addr |= addr & ((1 << (6 + shiftup)) - 1);
 
 	/*
 	 * Step 3) Decode rank

From 608377369dcebfa0dc9506a4f17d008bc93bb5b9 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 20 Jan 2016 12:54:51 +0300
Subject: [PATCH 125/797] EDAC, amd64_edac: Shift wrapping issue in
 f1x_get_norm_dct_addr()

commit 6f3508f61c814ee852c199988a62bd954c50dfc1 upstream.

dct_sel_base_off is declared as a u64 but we're only using the lower 32
bits because of a shift wrapping bug. This can possibly truncate the
upper 16 bits of DctSelBaseOffset[47:26], causing us to misdecode the CS
row.

Fixes: c8e518d5673d ('amd64_edac: Sanitize f10_get_base_addr_offset')
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@amd.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20160120095451.GB19898@mwanda
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/edac/amd64_edac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 9eee13ef83a5..d87a47547ba5 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1452,7 +1452,7 @@ static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, u8 range,
 	u64 chan_off;
 	u64 dram_base		= get_dram_base(pvt, range);
 	u64 hole_off		= f10_dhar_offset(pvt);
-	u64 dct_sel_base_off	= (pvt->dct_sel_hi & 0xFFFFFC00) << 16;
+	u64 dct_sel_base_off	= (u64)(pvt->dct_sel_hi & 0xFFFFFC00) << 16;
 
 	if (hi_rng) {
 		/*

From 1a4d9389206b787f620966dd0442ac0cd8df5525 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Thu, 10 Mar 2016 09:52:55 +0100
Subject: [PATCH 126/797] s390: fix floating pointer register corruption
 (again)

commit e370e4769463a65dcf8806fa26d2874e0542ac41 upstream.

There is a tricky interaction between the machine check handler
and the critical sections of load_fpu_regs and save_fpu_regs
functions. If the machine check interrupts one of the two
functions the critical section cleanup will complete the function
before the machine check handler s390_do_machine_check is called.
Trouble is that the machine check handler needs to validate the
floating point registers *before* and not *after* the completion
of load_fpu_regs/save_fpu_regs.

The simplest solution is to rewind the PSW to the start of the
load_fpu_regs/save_fpu_regs and retry the function after the
return from the machine check handler.

Tested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/kernel/entry.S | 106 +--------------------------------------
 1 file changed, 2 insertions(+), 104 deletions(-)

diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 857b6526d298..424e6809ad07 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -1197,114 +1197,12 @@ cleanup_critical:
 	.quad	.Lpsw_idle_lpsw
 
 .Lcleanup_save_fpu_regs:
-	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
-	bor	%r14
-	clg	%r9,BASED(.Lcleanup_save_fpu_regs_done)
-	jhe	5f
-	clg	%r9,BASED(.Lcleanup_save_fpu_regs_fp)
-	jhe	4f
-	clg	%r9,BASED(.Lcleanup_save_fpu_regs_vx_high)
-	jhe	3f
-	clg	%r9,BASED(.Lcleanup_save_fpu_regs_vx_low)
-	jhe	2f
-	clg	%r9,BASED(.Lcleanup_save_fpu_fpc_end)
-	jhe	1f
-	lg	%r2,__LC_CURRENT
-	aghi	%r2,__TASK_thread
-0:	# Store floating-point controls
-	stfpc	__THREAD_FPU_fpc(%r2)
-1:	# Load register save area and check if VX is active
-	lg	%r3,__THREAD_FPU_regs(%r2)
-	TSTMSK	__LC_MACHINE_FLAGS,MACHINE_FLAG_VX
-	jz	4f			  # no VX -> store FP regs
-2:	# Store vector registers (V0-V15)
-	VSTM	%v0,%v15,0,%r3		  # vstm 0,15,0(3)
-3:	# Store vector registers (V16-V31)
-	VSTM	%v16,%v31,256,%r3	  # vstm 16,31,256(3)
-	j	5f			  # -> done, set CIF_FPU flag
-4:	# Store floating-point registers
-	std	0,0(%r3)
-	std	1,8(%r3)
-	std	2,16(%r3)
-	std	3,24(%r3)
-	std	4,32(%r3)
-	std	5,40(%r3)
-	std	6,48(%r3)
-	std	7,56(%r3)
-	std	8,64(%r3)
-	std	9,72(%r3)
-	std	10,80(%r3)
-	std	11,88(%r3)
-	std	12,96(%r3)
-	std	13,104(%r3)
-	std	14,112(%r3)
-	std	15,120(%r3)
-5:	# Set CIF_FPU flag
-	oi	__LC_CPU_FLAGS+7,_CIF_FPU
-	lg	%r9,48(%r11)		# return from save_fpu_regs
+	larl	%r9,save_fpu_regs
 	br	%r14
-.Lcleanup_save_fpu_fpc_end:
-	.quad	.Lsave_fpu_regs_fpc_end
-.Lcleanup_save_fpu_regs_vx_low:
-	.quad	.Lsave_fpu_regs_vx_low
-.Lcleanup_save_fpu_regs_vx_high:
-	.quad	.Lsave_fpu_regs_vx_high
-.Lcleanup_save_fpu_regs_fp:
-	.quad	.Lsave_fpu_regs_fp
-.Lcleanup_save_fpu_regs_done:
-	.quad	.Lsave_fpu_regs_done
 
 .Lcleanup_load_fpu_regs:
-	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
-	bnor	%r14
-	clg	%r9,BASED(.Lcleanup_load_fpu_regs_done)
-	jhe	1f
-	clg	%r9,BASED(.Lcleanup_load_fpu_regs_fp)
-	jhe	2f
-	clg	%r9,BASED(.Lcleanup_load_fpu_regs_vx_high)
-	jhe	3f
-	clg	%r9,BASED(.Lcleanup_load_fpu_regs_vx)
-	jhe	4f
-	lg	%r4,__LC_CURRENT
-	aghi	%r4,__TASK_thread
-	lfpc	__THREAD_FPU_fpc(%r4)
-	TSTMSK	__LC_MACHINE_FLAGS,MACHINE_FLAG_VX
-	lg	%r4,__THREAD_FPU_regs(%r4)	# %r4 <- reg save area
-	jz	2f				# -> no VX, load FP regs
-4:	# Load V0 ..V15 registers
-	VLM	%v0,%v15,0,%r4
-3:	# Load V16..V31 registers
-	VLM	%v16,%v31,256,%r4
-	j	1f
-2:	# Load floating-point registers
-	ld	0,0(%r4)
-	ld	1,8(%r4)
-	ld	2,16(%r4)
-	ld	3,24(%r4)
-	ld	4,32(%r4)
-	ld	5,40(%r4)
-	ld	6,48(%r4)
-	ld	7,56(%r4)
-	ld	8,64(%r4)
-	ld	9,72(%r4)
-	ld	10,80(%r4)
-	ld	11,88(%r4)
-	ld	12,96(%r4)
-	ld	13,104(%r4)
-	ld	14,112(%r4)
-	ld	15,120(%r4)
-1:	# Clear CIF_FPU bit
-	ni	__LC_CPU_FLAGS+7,255-_CIF_FPU
-	lg	%r9,48(%r11)		# return from load_fpu_regs
+	larl	%r9,load_fpu_regs
 	br	%r14
-.Lcleanup_load_fpu_regs_vx:
-	.quad	.Lload_fpu_regs_vx
-.Lcleanup_load_fpu_regs_vx_high:
-	.quad	.Lload_fpu_regs_vx_high
-.Lcleanup_load_fpu_regs_fp:
-	.quad	.Lload_fpu_regs_fp
-.Lcleanup_load_fpu_regs_done:
-	.quad	.Lload_fpu_regs_done
 
 /*
  * Integer constants

From c024dcd3df1a0180993d4ce94b87e8ec271c6c2e Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 10 Mar 2016 10:32:21 +0100
Subject: [PATCH 127/797] s390/cpumf: add missing lpp magic initialization

commit 8f100bb1ff27873dd71f636da670e503b9ade3c6 upstream.

Add the missing lpp magic initialization for cpu 0. Without this all
samples on cpu 0 do not have the most significant bit set in the
program parameter field, which we use to distinguish between guest and
host samples if the pid is also 0.

We did initialize the lpp magic in the absolute zero lowcore but
forgot that when switching to the allocated lowcore on cpu 0 only.

Reported-by: Shu Juan Zhang <zhshuj@cn.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Fixes: e22cf8ca6f75 ("s390/cpumf: rework program parameter setting to detect guest samples")
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/kernel/setup.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index c837bcacf218..1f581eb61bc2 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -329,6 +329,7 @@ static void __init setup_lowcore(void)
 		+ PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
 	lc->current_task = (unsigned long) init_thread_union.thread_info.task;
 	lc->thread_info = (unsigned long) &init_thread_union;
+	lc->lpp = LPP_MAGIC;
 	lc->machine_flags = S390_lowcore.machine_flags;
 	lc->stfl_fac_list = S390_lowcore.stfl_fac_list;
 	memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,

From c1948606af0861bd181cb85ed1797ef02c20bbec Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Mon, 14 Mar 2016 15:47:23 +0100
Subject: [PATCH 128/797] s390/pci: enforce fmb page boundary rule

commit 80c544ded25ac14d7cc3e555abb8ed2c2da99b84 upstream.

The function measurement block must not cross a page boundary. Ensure
that by raising the alignment requirement to the smallest power of 2
larger than the size of the fmb.

Fixes: d0b088531 ("s390/pci: performance statistics and debug infrastructure")
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/include/asm/pci.h | 2 +-
 arch/s390/pci/pci.c         | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index c873e682b67f..2b2ced9dc00a 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -45,7 +45,7 @@ struct zpci_fmb {
 	u64 rpcit_ops;
 	u64 dma_rbytes;
 	u64 dma_wbytes;
-} __packed __aligned(16);
+} __packed __aligned(64);
 
 enum zpci_state {
 	ZPCI_FN_STATE_RESERVED,
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 7ef12a3ace3a..19442395f413 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -871,8 +871,11 @@ static inline int barsize(u8 size)
 
 static int zpci_mem_init(void)
 {
+	BUILD_BUG_ON(!is_power_of_2(__alignof__(struct zpci_fmb)) ||
+		     __alignof__(struct zpci_fmb) < sizeof(struct zpci_fmb));
+
 	zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb),
-				16, 0, NULL);
+					   __alignof__(struct zpci_fmb), 0, NULL);
 	if (!zdev_fmb_cache)
 		goto error_zdev;
 

From 88c9954c5c898dfe2e581cba34417ab5abccdd0d Mon Sep 17 00:00:00 2001
From: Phil Elwell <phil@raspberrypi.org>
Date: Mon, 29 Feb 2016 17:30:08 -0800
Subject: [PATCH 129/797] pinctrl-bcm2835: Fix cut-and-paste error in "pull"
 parsing

commit 2c7e3306d23864d49f686f22e56e180ff0fffb7f upstream.

The DT bindings for pinctrl-bcm2835 allow both the function and pull
to contain either one entry or one per pin. However, an error in the
DT parsing can cause failures if the number of pulls differs from the
number of functions.

Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Phil Elwell <phil@raspberrypi.org>
Reviewed-by: Stephen Warren <swarren@wwwdotorg.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pinctrl/bcm/pinctrl-bcm2835.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/bcm/pinctrl-bcm2835.c b/drivers/pinctrl/bcm/pinctrl-bcm2835.c
index 2e6ca69635aa..17dd8fe12b54 100644
--- a/drivers/pinctrl/bcm/pinctrl-bcm2835.c
+++ b/drivers/pinctrl/bcm/pinctrl-bcm2835.c
@@ -779,7 +779,7 @@ static int bcm2835_pctl_dt_node_to_map(struct pinctrl_dev *pctldev,
 		}
 		if (num_pulls) {
 			err = of_property_read_u32_index(np, "brcm,pull",
-					(num_funcs > 1) ? i : 0, &pull);
+					(num_pulls > 1) ? i : 0, &pull);
 			if (err)
 				goto out;
 			err = bcm2835_pctl_dt_node_to_map_pull(pc, np, pin,

From 8cbac3c4f74d92bf04645a613e061ab4f9baa866 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Thu, 25 Feb 2016 14:35:57 -0600
Subject: [PATCH 130/797] PCI: Disable IO/MEM decoding for devices with
 non-compliant BARs

commit b84106b4e2290c081cdab521fa832596cdfea246 upstream.

The PCI config header (first 64 bytes of each device's config space) is
defined by the PCI spec so generic software can identify the device and
manage its usage of I/O, memory, and IRQ resources.

Some non-spec-compliant devices put registers other than BARs where the
BARs should be.  When the PCI core sizes these "BARs", the reads and writes
it does may have unwanted side effects, and the "BAR" may appear to
describe non-sensical address space.

Add a flag bit to mark non-compliant devices so we don't touch their BARs.
Turn off IO/MEM decoding to prevent the devices from consuming address
space, since we can't read the BARs to find out what that address space
would be.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pci/probe.c | 14 ++++++++++++++
 include/linux/pci.h |  1 +
 2 files changed, 15 insertions(+)

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index edb1984201e9..7aafb5fb9336 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -179,6 +179,9 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
 	u16 orig_cmd;
 	struct pci_bus_region region, inverted_region;
 
+	if (dev->non_compliant_bars)
+		return 0;
+
 	mask = type ? PCI_ROM_ADDRESS_MASK : ~0;
 
 	/* No printks while decoding is disabled! */
@@ -1174,6 +1177,7 @@ void pci_msi_setup_pci_dev(struct pci_dev *dev)
 int pci_setup_device(struct pci_dev *dev)
 {
 	u32 class;
+	u16 cmd;
 	u8 hdr_type;
 	int pos = 0;
 	struct pci_bus_region region;
@@ -1219,6 +1223,16 @@ int pci_setup_device(struct pci_dev *dev)
 	/* device class may be changed after fixup */
 	class = dev->class >> 8;
 
+	if (dev->non_compliant_bars) {
+		pci_read_config_word(dev, PCI_COMMAND, &cmd);
+		if (cmd & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) {
+			dev_info(&dev->dev, "device has non-compliant BARs; disabling IO/MEM decoding\n");
+			cmd &= ~PCI_COMMAND_IO;
+			cmd &= ~PCI_COMMAND_MEMORY;
+			pci_write_config_word(dev, PCI_COMMAND, cmd);
+		}
+	}
+
 	switch (dev->hdr_type) {		    /* header type */
 	case PCI_HEADER_TYPE_NORMAL:		    /* standard header */
 		if (class == PCI_CLASS_BRIDGE_PCI)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 6ae25aae88fd..4e554bfff129 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -359,6 +359,7 @@ struct pci_dev {
 	unsigned int	io_window_1k:1;	/* Intel P2P bridge 1K I/O windows */
 	unsigned int	irq_managed:1;
 	unsigned int	has_secondary_link:1;
+	unsigned int	non_compliant_bars:1;	/* broken BARs; ignore them */
 	pci_dev_flags_t dev_flags;
 	atomic_t	enable_cnt;	/* pci_enable_device has been called */
 

From 2221620b0bce6df54e19eaf6065c368075e499e6 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Mon, 21 Mar 2016 11:12:55 +0000
Subject: [PATCH 131/797] PCI: ACPI: IA64: fix IO port generic range check

commit 4a2e7aab4ffce1e0e79b303dc2f9a03aa9f3a332 upstream.

The [0 - 64k] ACPI PCI IO port resource boundary check in:

acpi_dev_ioresource_flags()

is currently applied blindly in the ACPI resource parsing to all
architectures, but only x86 suffers from that IO space limitation.

On arches (ie IA64 and ARM64) where IO space is memory mapped,
the PCI root bridges IO resource windows are firstly initialized from
the _CRS (in acpi_decode_space()) and contain the CPU physical address
at which a root bridge decodes IO space in the CPU physical address
space with the offset value representing the offset required to translate
the PCI bus address into the CPU physical address.

The IO resource windows are then parsed and updated in arch code
before creating and enumerating PCI buses (eg IA64 add_io_space())
to map in an arch specific way the obtained CPU physical address range
to a slice of virtual address space reserved to map PCI IO space,
ending up with PCI bridges resource windows containing IO
resources like the following on a working IA64 configuration:

PCI host bridge to bus 0000:00
pci_bus 0000:00: root bus resource [io  0x1000000-0x100ffff window] (bus
address [0x0000-0xffff])
pci_bus 0000:00: root bus resource [mem 0x000a0000-0x000fffff window]
pci_bus 0000:00: root bus resource [mem 0x80000000-0x8fffffff window]
pci_bus 0000:00: root bus resource [mem 0x80004000000-0x800ffffffff window]
pci_bus 0000:00: root bus resource [bus 00]

This implies that the [0 - 64K] check in acpi_dev_ioresource_flags()
leaves platforms with memory mapped IO space (ie IA64) broken (ie kernel
can't claim IO resources since the host bridge IO resource is disabled
and discarded by ACPI core code, see log on IA64 with missing root bridge
IO resource, silently filtered by current [0 - 64k] check in
acpi_dev_ioresource_flags()):

PCI host bridge to bus 0000:00
pci_bus 0000:00: root bus resource [mem 0x000a0000-0x000fffff window]
pci_bus 0000:00: root bus resource [mem 0x80000000-0x8fffffff window]
pci_bus 0000:00: root bus resource [mem 0x80004000000-0x800ffffffff window]
pci_bus 0000:00: root bus resource [bus 00]

[...]

pci 0000:00:03.0: [1002:515e] type 00 class 0x030000
pci 0000:00:03.0: reg 0x10: [mem 0x80000000-0x87ffffff pref]
pci 0000:00:03.0: reg 0x14: [io  0x1000-0x10ff]
pci 0000:00:03.0: reg 0x18: [mem 0x88020000-0x8802ffff]
pci 0000:00:03.0: reg 0x30: [mem 0x88000000-0x8801ffff pref]
pci 0000:00:03.0: supports D1 D2
pci 0000:00:03.0: can't claim BAR 1 [io  0x1000-0x10ff]: no compatible
bridge window

For this reason, the IO port resources boundaries check in generic ACPI
parsing code should be guarded with a CONFIG_X86 guard so that more arches
(ie ARM64) can benefit from the generic ACPI resources parsing interface
without incurring in unexpected resource filtering, fixing at the same
time current breakage on IA64.

This patch factors out IO ports boundary [0 - 64k] check in generic ACPI
code and makes the IO space check X86 specific to make sure that IO
space resources are usable on other arches too.

Fixes: 3772aea7d6f3 (ia64/PCI/ACPI: Use common ACPI resource parsing interface for host bridge)
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/resource.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index cdc5c2599beb..627f8fbb5e9a 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -26,8 +26,20 @@
 
 #ifdef CONFIG_X86
 #define valid_IRQ(i) (((i) != 0) && ((i) != 2))
+static inline bool acpi_iospace_resource_valid(struct resource *res)
+{
+	/* On X86 IO space is limited to the [0 - 64K] IO port range */
+	return res->end < 0x10003;
+}
 #else
 #define valid_IRQ(i) (true)
+/*
+ * ACPI IO descriptors on arches other than X86 contain MMIO CPU physical
+ * addresses mapping IO space in CPU physical address space, IO space
+ * resources can be placed anywhere in the 64-bit physical address space.
+ */
+static inline bool
+acpi_iospace_resource_valid(struct resource *res) { return true; }
 #endif
 
 static bool acpi_dev_resource_len_valid(u64 start, u64 end, u64 len, bool io)
@@ -126,7 +138,7 @@ static void acpi_dev_ioresource_flags(struct resource *res, u64 len,
 	if (!acpi_dev_resource_len_valid(res->start, res->end, len, true))
 		res->flags |= IORESOURCE_DISABLED | IORESOURCE_UNSET;
 
-	if (res->end >= 0x10003)
+	if (!acpi_iospace_resource_valid(res))
 		res->flags |= IORESOURCE_DISABLED | IORESOURCE_UNSET;
 
 	if (io_decode == ACPI_DECODE_16)

From dc1441612fdb4ca221e3a4aa32e39e74d020e386 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 14 Mar 2016 09:40:46 +0100
Subject: [PATCH 132/797] x86/irq: Cure live lock in fixup_irqs()

commit 551adc60573cb68e3d55cacca9ba1b7437313df7 upstream.

Harry reported, that he's able to trigger a system freeze with cpu hot
unplug. The freeze turned out to be a live lock caused by recent changes in
irq_force_complete_move().

When fixup_irqs() and from there irq_force_complete_move() is called on the
dying cpu, then all other cpus are in stop machine an wait for the dying cpu
to complete the teardown. If there is a move of an interrupt pending then
irq_force_complete_move() sends the cleanup IPI to the cpus in the old_domain
mask and waits for them to clear the mask. That's obviously impossible as
those cpus are firmly stuck in stop machine with interrupts disabled.

I should have known that, but I completely overlooked it being concentrated on
the locking issues around the vectors. And the existance of the call to
__irq_complete_move() in the code, which actually sends the cleanup IPI made
it reasonable to wait for that cleanup to complete. That call was bogus even
before the recent changes as it was just a pointless distraction.

We have to look at two cases:

1) The move_in_progress flag of the interrupt is set

   This means the ioapic has been updated with the new vector, but it has not
   fired yet. In theory there is a race:

   set_ioapic(new_vector) <-- Interrupt is raised before update is effective,
   			      i.e. it's raised on the old vector.

   So if the target cpu cannot handle that interrupt before the old vector is
   cleaned up, we get a spurious interrupt and in the worst case the ioapic
   irq line becomes stale, but my experiments so far have only resulted in
   spurious interrupts.

   But in case of cpu hotplug this should be a non issue because if the
   affinity update happens right before all cpus rendevouz in stop machine,
   there is no way that the interrupt can be blocked on the target cpu because
   all cpus loops first with interrupts enabled in stop machine, so the old
   vector is not yet cleaned up when the interrupt fires.

   So the only way to run into this issue is if the delivery of the interrupt
   on the apic/system bus would be delayed beyond the point where the target
   cpu disables interrupts in stop machine. I doubt that it can happen, but at
   least there is a theroretical chance. Virtualization might be able to
   expose this, but AFAICT the IOAPIC emulation is not as stupid as the real
   hardware.

   I've spent quite some time over the weekend to enforce that situation,
   though I was not able to trigger the delayed case.

2) The move_in_progress flag is not set and the old_domain cpu mask is not
   empty.

   That means, that an interrupt was delivered after the change and the
   cleanup IPI has been sent to the cpus in old_domain, but not all CPUs have
   responded to it yet.

In both cases we can assume that the next interrupt will arrive on the new
vector, so we can cleanup the old vectors on the cpus in the old_domain cpu
mask.

Fixes: 98229aa36caa "x86/irq: Plug vector cleanup race"
Reported-by: Harry Junior <harryjr@outlook.fr>
Tested-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Ben Hutchings <ben@decadent.org.uk>
Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1603140931430.3657@nanos
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/hw_irq.h |  1 +
 arch/x86/kernel/apic/vector.c | 92 +++++++++++++++++++++++++++--------
 2 files changed, 73 insertions(+), 20 deletions(-)

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 1e3408e88604..59caa55fb9b5 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -136,6 +136,7 @@ struct irq_alloc_info {
 struct irq_cfg {
 	unsigned int		dest_apicid;
 	u8			vector;
+	u8			old_vector;
 };
 
 extern struct irq_cfg *irq_cfg(unsigned int irq);
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index a35f6b5473f4..7af2505f20c2 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -211,6 +211,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 	 */
 	cpumask_and(d->old_domain, d->old_domain, cpu_online_mask);
 	d->move_in_progress = !cpumask_empty(d->old_domain);
+	d->cfg.old_vector = d->move_in_progress ? d->cfg.vector : 0;
 	d->cfg.vector = vector;
 	cpumask_copy(d->domain, vector_cpumask);
 success:
@@ -653,46 +654,97 @@ void irq_complete_move(struct irq_cfg *cfg)
 }
 
 /*
- * Called with @desc->lock held and interrupts disabled.
+ * Called from fixup_irqs() with @desc->lock held and interrupts disabled.
  */
 void irq_force_complete_move(struct irq_desc *desc)
 {
 	struct irq_data *irqdata = irq_desc_get_irq_data(desc);
 	struct apic_chip_data *data = apic_chip_data(irqdata);
 	struct irq_cfg *cfg = data ? &data->cfg : NULL;
+	unsigned int cpu;
 
 	if (!cfg)
 		return;
 
-	__irq_complete_move(cfg, cfg->vector);
-
 	/*
 	 * This is tricky. If the cleanup of @data->old_domain has not been
 	 * done yet, then the following setaffinity call will fail with
 	 * -EBUSY. This can leave the interrupt in a stale state.
 	 *
-	 * The cleanup cannot make progress because we hold @desc->lock. So in
-	 * case @data->old_domain is not yet cleaned up, we need to drop the
-	 * lock and acquire it again. @desc cannot go away, because the
-	 * hotplug code holds the sparse irq lock.
+	 * All CPUs are stuck in stop machine with interrupts disabled so
+	 * calling __irq_complete_move() would be completely pointless.
 	 */
 	raw_spin_lock(&vector_lock);
-	/* Clean out all offline cpus (including ourself) first. */
+	/*
+	 * Clean out all offline cpus (including the outgoing one) from the
+	 * old_domain mask.
+	 */
 	cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
-	while (!cpumask_empty(data->old_domain)) {
+
+	/*
+	 * If move_in_progress is cleared and the old_domain mask is empty,
+	 * then there is nothing to cleanup. fixup_irqs() will take care of
+	 * the stale vectors on the outgoing cpu.
+	 */
+	if (!data->move_in_progress && cpumask_empty(data->old_domain)) {
 		raw_spin_unlock(&vector_lock);
-		raw_spin_unlock(&desc->lock);
-		cpu_relax();
-		raw_spin_lock(&desc->lock);
-		/*
-		 * Reevaluate apic_chip_data. It might have been cleared after
-		 * we dropped @desc->lock.
-		 */
-		data = apic_chip_data(irqdata);
-		if (!data)
-			return;
-		raw_spin_lock(&vector_lock);
+		return;
 	}
+
+	/*
+	 * 1) The interrupt is in move_in_progress state. That means that we
+	 *    have not seen an interrupt since the io_apic was reprogrammed to
+	 *    the new vector.
+	 *
+	 * 2) The interrupt has fired on the new vector, but the cleanup IPIs
+	 *    have not been processed yet.
+	 */
+	if (data->move_in_progress) {
+		/*
+		 * In theory there is a race:
+		 *
+		 * set_ioapic(new_vector) <-- Interrupt is raised before update
+		 *			      is effective, i.e. it's raised on
+		 *			      the old vector.
+		 *
+		 * So if the target cpu cannot handle that interrupt before
+		 * the old vector is cleaned up, we get a spurious interrupt
+		 * and in the worst case the ioapic irq line becomes stale.
+		 *
+		 * But in case of cpu hotplug this should be a non issue
+		 * because if the affinity update happens right before all
+		 * cpus rendevouz in stop machine, there is no way that the
+		 * interrupt can be blocked on the target cpu because all cpus
+		 * loops first with interrupts enabled in stop machine, so the
+		 * old vector is not yet cleaned up when the interrupt fires.
+		 *
+		 * So the only way to run into this issue is if the delivery
+		 * of the interrupt on the apic/system bus would be delayed
+		 * beyond the point where the target cpu disables interrupts
+		 * in stop machine. I doubt that it can happen, but at least
+		 * there is a theroretical chance. Virtualization might be
+		 * able to expose this, but AFAICT the IOAPIC emulation is not
+		 * as stupid as the real hardware.
+		 *
+		 * Anyway, there is nothing we can do about that at this point
+		 * w/o refactoring the whole fixup_irq() business completely.
+		 * We print at least the irq number and the old vector number,
+		 * so we have the necessary information when a problem in that
+		 * area arises.
+		 */
+		pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
+			irqdata->irq, cfg->old_vector);
+	}
+	/*
+	 * If old_domain is not empty, then other cpus still have the irq
+	 * descriptor set in their vector array. Clean it up.
+	 */
+	for_each_cpu(cpu, data->old_domain)
+		per_cpu(vector_irq, cpu)[cfg->old_vector] = VECTOR_UNUSED;
+
+	/* Cleanup the left overs of the (half finished) move */
+	cpumask_clear(data->old_domain);
+	data->move_in_progress = 0;
 	raw_spin_unlock(&vector_lock);
 }
 #endif

From 1eeb3225856a914d199f92d2d492142783eb5740 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@codemonkey.org.uk>
Date: Mon, 14 Mar 2016 21:20:54 -0400
Subject: [PATCH 133/797] x86/apic: Fix suspicious RCU usage in
 smp_trace_call_function_interrupt()

commit 7834c10313fb823e538f2772be78edcdeed2e6e3 upstream.

Since 4.4, I've been able to trigger this occasionally:

===============================
[ INFO: suspicious RCU usage. ]
4.5.0-rc7-think+ #3 Not tainted
Cc: Andi Kleen <ak@linux.intel.com>
Link: http://lkml.kernel.org/r/20160315012054.GA17765@codemonkey.org.uk
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

-------------------------------
./arch/x86/include/asm/msr-trace.h:47 suspicious rcu_dereference_check() usage!

other info that might help us debug this:

RCU used illegally from idle CPU!
rcu_scheduler_active = 1, debug_locks = 1
RCU used illegally from extended quiescent state!
no locks held by swapper/3/0.

stack backtrace:
CPU: 3 PID: 0 Comm: swapper/3 Not tainted 4.5.0-rc7-think+ #3
 ffffffff92f821e0 1f3e5c340597d7fc ffff880468e07f10 ffffffff92560c2a
 ffff880462145280 0000000000000001 ffff880468e07f40 ffffffff921376a6
 ffffffff93665ea0 0000cc7c876d28da 0000000000000005 ffffffff9383dd60
Call Trace:
 <IRQ>  [<ffffffff92560c2a>] dump_stack+0x67/0x9d
 [<ffffffff921376a6>] lockdep_rcu_suspicious+0xe6/0x100
 [<ffffffff925ae7a7>] do_trace_write_msr+0x127/0x1a0
 [<ffffffff92061c83>] native_apic_msr_eoi_write+0x23/0x30
 [<ffffffff92054408>] smp_trace_call_function_interrupt+0x38/0x360
 [<ffffffff92d1ca60>] trace_call_function_interrupt+0x90/0xa0
 <EOI>  [<ffffffff92ac5124>] ? cpuidle_enter_state+0x1b4/0x520

Move the entering_irq() call before ack_APIC_irq(), because entering_irq()
tells the RCU susbstems to end the extended quiescent state, so that the
following trace call in ack_APIC_irq() works correctly.

Suggested-by: Andi Kleen <ak@linux.intel.com>
Fixes: 4787c368a9bc "x86/tracing: Add irq_enter/exit() in smp_trace_reschedule_interrupt()"
Signed-off-by: Dave Jones <davej@codemonkey.org.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/apic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index a30316bf801a..163769d82475 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -638,8 +638,8 @@ static inline void entering_irq(void)
 
 static inline void entering_ack_irq(void)
 {
-	ack_APIC_irq();
 	entering_irq();
+	ack_APIC_irq();
 }
 
 static inline void ipi_entering_ack_irq(void)

From 0f63ab5873ed78838afa4b2f8bfd9d18f806cf40 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Wed, 16 Mar 2016 14:14:21 -0700
Subject: [PATCH 134/797] x86/iopl/64: Properly context-switch IOPL on Xen PV

commit b7a584598aea7ca73140cb87b40319944dd3393f upstream.

On Xen PV, regs->flags doesn't reliably reflect IOPL and the
exit-to-userspace code doesn't change IOPL.  We need to context
switch it manually.

I'm doing this without going through paravirt because this is
specific to Xen PV.  After the dust settles, we can merge this with
the 32-bit code, tidy up the iopl syscall implementation, and remove
the set_iopl pvop entirely.

Fixes XSA-171.

Reviewewd-by: Jan Beulich <JBeulich@suse.com>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jan Beulich <JBeulich@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/693c3bd7aeb4d3c27c92c622b7d0f554a458173c.1458162709.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/xen/hypervisor.h |  2 ++
 arch/x86/kernel/process_64.c          | 12 ++++++++++++
 arch/x86/xen/enlighten.c              |  2 +-
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index 8b2d4bea9962..39171b3646bb 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -62,4 +62,6 @@ void xen_arch_register_cpu(int num);
 void xen_arch_unregister_cpu(int num);
 #endif
 
+extern void xen_set_iopl_mask(unsigned mask);
+
 #endif /* _ASM_X86_XEN_HYPERVISOR_H */
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index e835d263a33b..4cbb60fbff3e 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -48,6 +48,7 @@
 #include <asm/syscalls.h>
 #include <asm/debugreg.h>
 #include <asm/switch_to.h>
+#include <asm/xen/hypervisor.h>
 
 asmlinkage extern void ret_from_fork(void);
 
@@ -411,6 +412,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
 		__switch_to_xtra(prev_p, next_p, tss);
 
+#ifdef CONFIG_XEN
+	/*
+	 * On Xen PV, IOPL bits in pt_regs->flags have no effect, and
+	 * current_pt_regs()->flags may not match the current task's
+	 * intended IOPL.  We need to switch it manually.
+	 */
+	if (unlikely(static_cpu_has(X86_FEATURE_XENPV) &&
+		     prev->iopl != next->iopl))
+		xen_set_iopl_mask(next->iopl);
+#endif
+
 	if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
 		/*
 		 * AMD CPUs have a misfeature: SYSRET sets the SS selector but
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index b7de78bdc09c..beab8c706ac9 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -961,7 +961,7 @@ static void xen_load_sp0(struct tss_struct *tss,
 	tss->x86_tss.sp0 = thread->sp0;
 }
 
-static void xen_set_iopl_mask(unsigned mask)
+void xen_set_iopl_mask(unsigned mask)
 {
 	struct physdev_set_iopl set_iopl;
 

From f71e846236048ca5165b4ff5bc6f1745cabb6bd6 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Wed, 16 Mar 2016 14:14:22 -0700
Subject: [PATCH 135/797] x86/iopl: Fix iopl capability check on Xen PV

commit c29016cf41fe9fa994a5ecca607cf5f1cd98801e upstream.

iopl(3) is supposed to work if iopl is already 3, even if
unprivileged.  This didn't work right on Xen PV.  Fix it.

Reviewewd-by: Jan Beulich <JBeulich@suse.com>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jan Beulich <JBeulich@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/8ce12013e6e4c0a44a97e316be4a6faff31bd5ea.1458162709.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/ioport.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 37dae792dbbe..589b3193f102 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -96,9 +96,14 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
 SYSCALL_DEFINE1(iopl, unsigned int, level)
 {
 	struct pt_regs *regs = current_pt_regs();
-	unsigned int old = (regs->flags >> 12) & 3;
 	struct thread_struct *t = &current->thread;
 
+	/*
+	 * Careful: the IOPL bits in regs->flags are undefined under Xen PV
+	 * and changing them has no effect.
+	 */
+	unsigned int old = t->iopl >> X86_EFLAGS_IOPL_BIT;
+
 	if (level > 3)
 		return -EINVAL;
 	/* Trying to gain more privileges? */
@@ -106,8 +111,9 @@ SYSCALL_DEFINE1(iopl, unsigned int, level)
 		if (!capable(CAP_SYS_RAWIO))
 			return -EPERM;
 	}
-	regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12);
-	t->iopl = level << 12;
+	regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) |
+		(level << X86_EFLAGS_IOPL_BIT);
+	t->iopl = level << X86_EFLAGS_IOPL_BIT;
 	set_iopl_mask(t->iopl);
 
 	return 0;

From 8e8a1a17bcc016c59044b06776fc1ddbcc897bb3 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Fri, 1 Apr 2016 14:31:23 -0700
Subject: [PATCH 136/797] x86/mm: TLB_REMOTE_SEND_IPI should count pages

commit 18c98243ddf05a1827ad2c359c5ac051101e7ff7 upstream.

TLB_REMOTE_SEND_IPI was recently introduced, but it counts bytes instead
of pages.  In addition, it does not report correctly the case in which
flush_tlb_page flushes a page.  Fix it to be consistent with other TLB
counters.

Fixes: 5b74283ab251b9d ("x86, mm: trace when an IPI is about to be sent")
Signed-off-by: Nadav Amit <namit@vmware.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/mm/tlb.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 8f4cc3dfac32..5fb6adaaa796 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -106,8 +106,6 @@ static void flush_tlb_func(void *info)
 
 	if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm))
 		return;
-	if (!f->flush_end)
-		f->flush_end = f->flush_start + PAGE_SIZE;
 
 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
 	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
@@ -135,12 +133,20 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
 				 unsigned long end)
 {
 	struct flush_tlb_info info;
+
+	if (end == 0)
+		end = start + PAGE_SIZE;
 	info.flush_mm = mm;
 	info.flush_start = start;
 	info.flush_end = end;
 
 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
-	trace_tlb_flush(TLB_REMOTE_SEND_IPI, end - start);
+	if (end == TLB_FLUSH_ALL)
+		trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
+	else
+		trace_tlb_flush(TLB_REMOTE_SEND_IPI,
+				(end - start) >> PAGE_SHIFT);
+
 	if (is_uv_system()) {
 		unsigned int cpu;
 

From f5967a77df538ea60fb83f3957b9d7cfc4646ae2 Mon Sep 17 00:00:00 2001
From: Douglas Gilbert <dgilbert@interlog.com>
Date: Thu, 3 Mar 2016 00:31:29 -0500
Subject: [PATCH 137/797] sg: fix dxferp in from_to case

commit 5ecee0a3ee8d74b6950cb41e8989b0c2174568d4 upstream.

One of the strange things that the original sg driver did was let the
user provide both a data-out buffer (it followed the sg_header+cdb)
_and_ specify a reply length greater than zero. What happened was that
the user data-out buffer was copied into some kernel buffers and then
the mid level was told a read type operation would take place with the
data from the device overwriting the same kernel buffers. The user would
then read those kernel buffers back into the user space.

From what I can tell, the above action was broken by commit fad7f01e61bf
("sg: set dxferp to NULL for READ with the older SG interface") in 2008
and syzkaller found that out recently.

Make sure that a user space pointer is passed through when data follows
the sg_header structure and command.  Fix the abnormal case when a
non-zero reply_len is also given.

Fixes: fad7f01e61bf737fe8a3740d803f000db57ecac6
Signed-off-by: Douglas Gilbert <dgilbert@interlog.com>
Reviewed-by: Ewan Milne <emilne@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/sg.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 5e820674432c..ae7d9bdf409c 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -652,7 +652,8 @@ sg_write(struct file *filp, const char __user *buf, size_t count, loff_t * ppos)
 	else
 		hp->dxfer_direction = (mxsize > 0) ? SG_DXFER_FROM_DEV : SG_DXFER_NONE;
 	hp->dxfer_len = mxsize;
-	if (hp->dxfer_direction == SG_DXFER_TO_DEV)
+	if ((hp->dxfer_direction == SG_DXFER_TO_DEV) ||
+	    (hp->dxfer_direction == SG_DXFER_TO_FROM_DEV))
 		hp->dxferp = (char __user *)buf + cmd_size;
 	else
 		hp->dxferp = NULL;

From 1624297ccc24486aa9e264f04f59743e5563a6b2 Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <raghavaaditya.renukunta@pmcs.com>
Date: Wed, 3 Feb 2016 15:06:00 -0800
Subject: [PATCH 138/797] aacraid: Fix RRQ overload

commit 3f4ce057d51a9c0ed9b01ba693df685d230ffcae upstream.

The driver utilizes an array of atomic variables to keep track of IO
submissions to each vector. To submit an IO multiple threads iterate
through the array to find a vector which has empty slots to send an
IO. The reading and updating of the variable is not atomic, causing race
conditions when a thread uses a full vector to submit an IO.

Fixed by mapping each FIB to a vector, the submission path then uses
said vector to submit IO thereby removing the possibly of a race
condition.The vector assignment is started from 1 since vector 0 is
reserved for the use of AIF management FIBS.If the number of MSIx
vectors is 1 (MSI or INTx mode) then all the fibs are allocated to
vector 0.

Fixes: 495c0217 "aacraid: MSI-x support"
Signed-off-by: Raghava Aditya Renukunta <raghavaaditya.renukunta@pmcs.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Tomas Henzl <thenzl@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/aacraid/aacraid.h |  2 ++
 drivers/scsi/aacraid/commsup.c | 28 ++++++++++++++++++++++++++++
 drivers/scsi/aacraid/src.c     | 30 +++++++-----------------------
 3 files changed, 37 insertions(+), 23 deletions(-)

diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index 074878b55a0b..d044f3f273be 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -944,6 +944,7 @@ struct fib {
 	 */
 	struct list_head	fiblink;
 	void			*data;
+	u32			vector_no;
 	struct hw_fib		*hw_fib_va;		/* Actual shared object */
 	dma_addr_t		hw_fib_pa;		/* physical address of hw_fib*/
 };
@@ -2113,6 +2114,7 @@ static inline unsigned int cap_to_cyls(sector_t capacity, unsigned divisor)
 int aac_acquire_irq(struct aac_dev *dev);
 void aac_free_irq(struct aac_dev *dev);
 const char *aac_driverinfo(struct Scsi_Host *);
+void aac_fib_vector_assign(struct aac_dev *dev);
 struct fib *aac_fib_alloc(struct aac_dev *dev);
 int aac_fib_setup(struct aac_dev *dev);
 void aac_fib_map_free(struct aac_dev *dev);
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index a1f90fe849c9..e9b4c1119eb9 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -90,6 +90,28 @@ void aac_fib_map_free(struct aac_dev *dev)
 	dev->hw_fib_pa = 0;
 }
 
+void aac_fib_vector_assign(struct aac_dev *dev)
+{
+	u32 i = 0;
+	u32 vector = 1;
+	struct fib *fibptr = NULL;
+
+	for (i = 0, fibptr = &dev->fibs[i];
+		i < (dev->scsi_host_ptr->can_queue + AAC_NUM_MGT_FIB);
+		i++, fibptr++) {
+		if ((dev->max_msix == 1) ||
+		  (i > ((dev->scsi_host_ptr->can_queue + AAC_NUM_MGT_FIB - 1)
+			- dev->vector_cap))) {
+			fibptr->vector_no = 0;
+		} else {
+			fibptr->vector_no = vector;
+			vector++;
+			if (vector == dev->max_msix)
+				vector = 1;
+		}
+	}
+}
+
 /**
  *	aac_fib_setup	-	setup the fibs
  *	@dev: Adapter to set up
@@ -151,6 +173,12 @@ int aac_fib_setup(struct aac_dev * dev)
 		hw_fib_pa = hw_fib_pa +
 			dev->max_fib_size + sizeof(struct aac_fib_xporthdr);
 	}
+
+	/*
+	 *Assign vector numbers to fibs
+	 */
+	aac_fib_vector_assign(dev);
+
 	/*
 	 *	Add the fib chain to the free list
 	 */
diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c
index 2aa34ea8ceb1..bc0203f3d243 100644
--- a/drivers/scsi/aacraid/src.c
+++ b/drivers/scsi/aacraid/src.c
@@ -156,8 +156,8 @@ static irqreturn_t aac_src_intr_message(int irq, void *dev_id)
 				break;
 			if (dev->msi_enabled && dev->max_msix > 1)
 				atomic_dec(&dev->rrq_outstanding[vector_no]);
-			aac_intr_normal(dev, handle-1, 0, isFastResponse, NULL);
 			dev->host_rrq[index++] = 0;
+			aac_intr_normal(dev, handle-1, 0, isFastResponse, NULL);
 			if (index == (vector_no + 1) * dev->vector_cap)
 				index = vector_no * dev->vector_cap;
 			dev->host_rrq_idx[vector_no] = index;
@@ -452,36 +452,20 @@ static int aac_src_deliver_message(struct fib *fib)
 #endif
 
 	u16 hdr_size = le16_to_cpu(fib->hw_fib_va->header.Size);
+	u16 vector_no;
 
 	atomic_inc(&q->numpending);
 
 	if (dev->msi_enabled && fib->hw_fib_va->header.Command != AifRequest &&
 	    dev->max_msix > 1) {
-		u_int16_t vector_no, first_choice = 0xffff;
-
-		vector_no = dev->fibs_pushed_no % dev->max_msix;
-		do {
-			vector_no += 1;
-			if (vector_no == dev->max_msix)
-				vector_no = 1;
-			if (atomic_read(&dev->rrq_outstanding[vector_no]) <
-			    dev->vector_cap)
-				break;
-			if (0xffff == first_choice)
-				first_choice = vector_no;
-			else if (vector_no == first_choice)
-				break;
-		} while (1);
-		if (vector_no == first_choice)
-			vector_no = 0;
-		atomic_inc(&dev->rrq_outstanding[vector_no]);
-		if (dev->fibs_pushed_no == 0xffffffff)
-			dev->fibs_pushed_no = 0;
-		else
-			dev->fibs_pushed_no++;
+		vector_no = fib->vector_no;
 		fib->hw_fib_va->header.Handle += (vector_no << 16);
+	} else {
+		vector_no = 0;
 	}
 
+	atomic_inc(&dev->rrq_outstanding[vector_no]);
+
 	if (dev->comm_interface == AAC_COMM_MESSAGE_TYPE2) {
 		/* Calculate the amount to the fibsize bits */
 		fibsize = (hdr_size + 127) / 128 - 1;

From f3a3019dfc5411743273b5a53f2d95a845bec736 Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <raghavaaditya.renukunta@pmcs.com>
Date: Wed, 3 Feb 2016 15:06:02 -0800
Subject: [PATCH 139/797] aacraid: Fix memory leak in aac_fib_map_free

commit f88fa79a61726ce9434df9b4aede36961f709f17 upstream.

aac_fib_map_free() calls pci_free_consistent() without checking that
dev->hw_fib_va is not NULL and dev->max_fib_size is not zero.If they are
indeed NULL/0, this will result in a hang as pci_free_consistent() will
attempt to invalidate cache for the entire 64-bit address space
(which would take a very long time).

Fixed by adding a check to make sure that dev->hw_fib_va and
dev->max_fib_size are not NULL and 0 respectively.

Fixes: 9ad5204d6 - "[SCSI]aacraid: incorrect dma mapping mask during blinked recover or user initiated reset"
Signed-off-by: Raghava Aditya Renukunta <raghavaaditya.renukunta@pmcs.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Tomas Henzl <thenzl@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/aacraid/commsup.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index e9b4c1119eb9..4cbf54928640 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -83,9 +83,12 @@ static int fib_map_alloc(struct aac_dev *dev)
 
 void aac_fib_map_free(struct aac_dev *dev)
 {
-	pci_free_consistent(dev->pdev,
-	  dev->max_fib_size * (dev->scsi_host_ptr->can_queue + AAC_NUM_MGT_FIB),
-	  dev->hw_fib_va, dev->hw_fib_pa);
+	if (dev->hw_fib_va && dev->max_fib_size) {
+		pci_free_consistent(dev->pdev,
+		(dev->max_fib_size *
+		(dev->scsi_host_ptr->can_queue + AAC_NUM_MGT_FIB)),
+		dev->hw_fib_va, dev->hw_fib_pa);
+	}
 	dev->hw_fib_va = NULL;
 	dev->hw_fib_pa = 0;
 }

From e468298bd4f80e9353b2fe1273ad036a4abaf6e6 Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <raghavaaditya.renukunta@pmcs.com>
Date: Wed, 3 Feb 2016 15:06:03 -0800
Subject: [PATCH 140/797] aacraid: Set correct msix count for EEH recovery

commit ecc479e00db8eb110b200afe1effcb3df20ca7ae upstream.

During EEH recovery number of online CPU's might change thereby changing
the number of MSIx vectors. Since each fib is allocated to a vector,
changes in the number of vectors causes fib to be sent thru invalid
vectors.In addition the correct number of MSIx vectors is not updated in
the INIT struct sent to the controller, when it is reinitialized.

Fixed by reassigning vectors to fibs based on the updated number of MSIx
vectors and updating the INIT structure before sending to controller.

Fixes: MSI-X vector calculation for suspend/resume
Signed-off-by: Raghava Aditya Renukunta <raghavaaditya.renukunta@pmcs.com>
Reviewed-by: Shane Seymour <shane.seymour@hpe.com>
Reviewed-by: Johannes Thumshirn <jthushirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/aacraid/linit.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index 3b6e5c67e853..aa6eccb8940b 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -1404,8 +1404,18 @@ static int aac_acquire_resources(struct aac_dev *dev)
 
 	aac_adapter_enable_int(dev);
 
-	if (!dev->sync_mode)
+	/*max msix may change  after EEH
+	 * Re-assign vectors to fibs
+	 */
+	aac_fib_vector_assign(dev);
+
+	if (!dev->sync_mode) {
+		/* After EEH recovery or suspend resume, max_msix count
+		 * may change, therfore updating in init as well.
+		 */
 		aac_adapter_start(dev);
+		dev->init->Sa_MSIXVectors = cpu_to_le32(dev->max_msix);
+	}
 	return 0;
 
 error_iounmap:

From 67aa7e6dd927c17103b3c5acb7eb50efb2372dab Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Sat, 5 Mar 2016 17:52:02 -0500
Subject: [PATCH 141/797] sd: Fix discard granularity when LBPRZ=1

commit 6540a65da90c09590897310e31993b1f6e28485a upstream.

Commit 397737223c59 ("sd: Make discard granularity match logical block
size when LBPRZ=1") accidentally set the granularity to one byte instead
of one logical block on devices that provide deterministic zeroes after
UNMAP.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reported-by: Mike Snitzer <snitzer@redhat.com>
Reviewed-by: Ewan Milne <emilne@redhat.com>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>
Fixes: 397737223c59e89dca7305feb6528caef8fbef84
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/sd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index bb669d32ccd0..cc84ea7d09cc 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -648,7 +648,7 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
 	 */
 	if (sdkp->lbprz) {
 		q->limits.discard_alignment = 0;
-		q->limits.discard_granularity = 1;
+		q->limits.discard_granularity = logical_block_size;
 	} else {
 		q->limits.discard_alignment = sdkp->unmap_alignment *
 			logical_block_size;

From c1f327046b17e6d9fac9bc61e23bcb1897c2d9b3 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Mon, 7 Mar 2016 11:59:44 +0100
Subject: [PATCH 142/797] scsi: storvsc: fix SRB_STATUS_ABORTED handling

commit ff06c5ffbcb4ffa542fb80c897be977956fafecc upstream.

Commit 3209f9d780d1 ("scsi: storvsc: Fix a bug in the handling of SRB
status flags") filtered SRB_STATUS_AUTOSENSE_VALID out effectively making
the (SRB_STATUS_ABORTED | SRB_STATUS_AUTOSENSE_VALID) case a dead code. The
logic from this branch (e.g. storvsc_device_scan() call) is still required,
fix the check.

Fixes: 3209f9d780d1 ("scsi: storvsc: Fix a bug in the handling of SRB status flags")
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Acked-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/storvsc_drv.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 3fba42ad9fb8..0f636cc4c809 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -889,8 +889,9 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb,
 		do_work = true;
 		process_err_fn = storvsc_remove_lun;
 		break;
-	case (SRB_STATUS_ABORTED | SRB_STATUS_AUTOSENSE_VALID):
-		if ((asc == 0x2a) && (ascq == 0x9)) {
+	case SRB_STATUS_ABORTED:
+		if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID &&
+		    (asc == 0x2a) && (ascq == 0x9)) {
 			do_work = true;
 			process_err_fn = storvsc_device_scan;
 			/*

From e760edfb7ba763e48b870025ab0b5edb4af85089 Mon Sep 17 00:00:00 2001
From: Maurizio Lombardi <mlombard@redhat.com>
Date: Fri, 4 Mar 2016 10:41:49 +0100
Subject: [PATCH 143/797] be2iscsi: set the boot_kset pointer to NULL in case
 of failure

commit 84bd64993f916bcf86270c67686ecf4cea7b8933 upstream.

In beiscsi_setup_boot_info(), the boot_kset pointer should be set to
NULL in case of failure otherwise an invalid pointer dereference may
occur later.

Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Jitendra Bhivare <jitendra.bhivare@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/be2iscsi/be_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c
index fe0c5143f8e6..758f76e88704 100644
--- a/drivers/scsi/be2iscsi/be_main.c
+++ b/drivers/scsi/be2iscsi/be_main.c
@@ -4470,6 +4470,7 @@ static int beiscsi_setup_boot_info(struct beiscsi_hba *phba)
 	scsi_host_put(phba->shost);
 free_kset:
 	iscsi_boot_destroy_kset(phba->boot_kset);
+	phba->boot_kset = NULL;
 	return -ENOMEM;
 }
 

From b9d26f81ae9fa23988a5b503455f300bcef292fb Mon Sep 17 00:00:00 2001
From: Alan <gnomes@lxorguk.ukuu.org.uk>
Date: Mon, 15 Feb 2016 18:53:15 +0000
Subject: [PATCH 144/797] aic7xxx: Fix queue depth handling

commit 5a51a7abca133860a6f4429655a9eda3c4afde32 upstream.

We were setting the queue depth correctly, then setting it back to
two. If you hit this as a bisection point then please send me an email
as it would imply we've been hiding other bugs with this one.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Reviewed-by: Hannes Reinicke <hare@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/aic7xxx/aic7xxx_osm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.c b/drivers/scsi/aic7xxx/aic7xxx_osm.c
index b846a4683562..fc6a83188c1e 100644
--- a/drivers/scsi/aic7xxx/aic7xxx_osm.c
+++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c
@@ -1336,6 +1336,7 @@ ahc_platform_set_tags(struct ahc_softc *ahc, struct scsi_device *sdev,
 	case AHC_DEV_Q_TAGGED:
 		scsi_change_queue_depth(sdev,
 				dev->openings + dev->active);
+		break;
 	default:
 		/*
 		 * We allow the OS to queue 2 untagged transactions to

From 83250e67a9d65a662c5fdec6075f3c2dee0e79a0 Mon Sep 17 00:00:00 2001
From: Jerry Hoemann <jerry.hoemann@hpe.com>
Date: Wed, 6 Jan 2016 16:03:41 -0700
Subject: [PATCH 145/797] libnvdimm: Fix security issue with DSM IOCTL.

commit 07accfa9d1a8bac8262f6d24a94a54d2d1f35149 upstream.

Code attempts to prevent certain IOCTL DSM from being called
when device is opened read only.  This security feature can
be trivially overcome by changing the size portion of the
ioctl_command which isn't used.

Check only the _IOC_NR (i.e. the command).

Signed-off-by: Jerry Hoemann <jerry.hoemann@hpe.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvdimm/bus.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 7e2c43f701bc..496b9b662dc6 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -513,10 +513,10 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
 
 	/* fail write commands (when read-only) */
 	if (read_only)
-		switch (ioctl_cmd) {
-		case ND_IOCTL_VENDOR:
-		case ND_IOCTL_SET_CONFIG_DATA:
-		case ND_IOCTL_ARS_START:
+		switch (cmd) {
+		case ND_CMD_VENDOR:
+		case ND_CMD_SET_CONFIG_DATA:
+		case ND_CMD_ARS_START:
 			dev_dbg(&nvdimm_bus->dev, "'%s' command while read-only.\n",
 					nvdimm ? nvdimm_cmd_name(cmd)
 					: nvdimm_bus_cmd_name(cmd));

From 15c3af026b6e66ca2d9566d862af7e2fd7943a40 Mon Sep 17 00:00:00 2001
From: DingXiang <dingxiang@huawei.com>
Date: Tue, 2 Feb 2016 12:29:18 +0800
Subject: [PATCH 146/797] dm snapshot: disallow the COW and origin devices from
 being identical

commit 4df2bf466a9c9c92f40d27c4aa9120f4e8227bfc upstream.

Otherwise loading a "snapshot" table using the same device for the
origin and COW devices, e.g.:

echo "0 20971520 snapshot 253:3 253:3 P 8" | dmsetup create snap

will trigger:

BUG: unable to handle kernel NULL pointer dereference at 0000000000000098
[ 1958.979934] IP: [<ffffffffa040efba>] dm_exception_store_set_chunk_size+0x7a/0x110 [dm_snapshot]
[ 1958.989655] PGD 0
[ 1958.991903] Oops: 0000 [#1] SMP
...
[ 1959.059647] CPU: 9 PID: 3556 Comm: dmsetup Tainted: G          IO    4.5.0-rc5.snitm+ #150
...
[ 1959.083517] task: ffff8800b9660c80 ti: ffff88032a954000 task.ti: ffff88032a954000
[ 1959.091865] RIP: 0010:[<ffffffffa040efba>]  [<ffffffffa040efba>] dm_exception_store_set_chunk_size+0x7a/0x110 [dm_snapshot]
[ 1959.104295] RSP: 0018:ffff88032a957b30  EFLAGS: 00010246
[ 1959.110219] RAX: 0000000000000000 RBX: 0000000000000008 RCX: 0000000000000001
[ 1959.118180] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff880329334a00
[ 1959.126141] RBP: ffff88032a957b50 R08: 0000000000000000 R09: 0000000000000001
[ 1959.134102] R10: 000000000000000a R11: f000000000000000 R12: ffff880330884d80
[ 1959.142061] R13: 0000000000000008 R14: ffffc90001c13088 R15: ffff880330884d80
[ 1959.150021] FS:  00007f8926ba3840(0000) GS:ffff880333440000(0000) knlGS:0000000000000000
[ 1959.159047] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1959.165456] CR2: 0000000000000098 CR3: 000000032f48b000 CR4: 00000000000006e0
[ 1959.173415] Stack:
[ 1959.175656]  ffffc90001c13040 ffff880329334a00 ffff880330884ed0 ffff88032a957bdc
[ 1959.183946]  ffff88032a957bb8 ffffffffa040f225 ffff880329334a30 ffff880300000000
[ 1959.192233]  ffffffffa04133e0 ffff880329334b30 0000000830884d58 00000000569c58cf
[ 1959.200521] Call Trace:
[ 1959.203248]  [<ffffffffa040f225>] dm_exception_store_create+0x1d5/0x240 [dm_snapshot]
[ 1959.211986]  [<ffffffffa040d310>] snapshot_ctr+0x140/0x630 [dm_snapshot]
[ 1959.219469]  [<ffffffffa0005c44>] ? dm_split_args+0x64/0x150 [dm_mod]
[ 1959.226656]  [<ffffffffa0005ea7>] dm_table_add_target+0x177/0x440 [dm_mod]
[ 1959.234328]  [<ffffffffa0009203>] table_load+0x143/0x370 [dm_mod]
[ 1959.241129]  [<ffffffffa00090c0>] ? retrieve_status+0x1b0/0x1b0 [dm_mod]
[ 1959.248607]  [<ffffffffa0009e35>] ctl_ioctl+0x255/0x4d0 [dm_mod]
[ 1959.255307]  [<ffffffff813304e2>] ? memzero_explicit+0x12/0x20
[ 1959.261816]  [<ffffffffa000a0c3>] dm_ctl_ioctl+0x13/0x20 [dm_mod]
[ 1959.268615]  [<ffffffff81215eb6>] do_vfs_ioctl+0xa6/0x5c0
[ 1959.274637]  [<ffffffff81120d2f>] ? __audit_syscall_entry+0xaf/0x100
[ 1959.281726]  [<ffffffff81003176>] ? do_audit_syscall_entry+0x66/0x70
[ 1959.288814]  [<ffffffff81216449>] SyS_ioctl+0x79/0x90
[ 1959.294450]  [<ffffffff8167e4ae>] entry_SYSCALL_64_fastpath+0x12/0x71
...
[ 1959.323277] RIP  [<ffffffffa040efba>] dm_exception_store_set_chunk_size+0x7a/0x110 [dm_snapshot]
[ 1959.333090]  RSP <ffff88032a957b30>
[ 1959.336978] CR2: 0000000000000098
[ 1959.344121] ---[ end trace b049991ccad1169e ]---

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1195899
Signed-off-by: Ding Xiang <dingxiang@huawei.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/dm-snap.c          |  9 +++++++++
 drivers/md/dm-table.c         | 36 +++++++++++++++++++++++------------
 include/linux/device-mapper.h |  2 ++
 3 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 61f184ad081c..e108deebbaaa 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1106,6 +1106,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	int i;
 	int r = -EINVAL;
 	char *origin_path, *cow_path;
+	dev_t origin_dev, cow_dev;
 	unsigned args_used, num_flush_bios = 1;
 	fmode_t origin_mode = FMODE_READ;
 
@@ -1136,11 +1137,19 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		ti->error = "Cannot get origin device";
 		goto bad_origin;
 	}
+	origin_dev = s->origin->bdev->bd_dev;
 
 	cow_path = argv[0];
 	argv++;
 	argc--;
 
+	cow_dev = dm_get_dev_t(cow_path);
+	if (cow_dev && cow_dev == origin_dev) {
+		ti->error = "COW device cannot be the same as origin device";
+		r = -EINVAL;
+		goto bad_cow;
+	}
+
 	r = dm_get_device(ti, cow_path, dm_table_get_mode(ti->table), &s->cow);
 	if (r) {
 		ti->error = "Cannot get COW device";
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 061152a43730..cb5d0daf53bb 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -364,6 +364,26 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
 	return 0;
 }
 
+/*
+ * Convert the path to a device
+ */
+dev_t dm_get_dev_t(const char *path)
+{
+	dev_t uninitialized_var(dev);
+	struct block_device *bdev;
+
+	bdev = lookup_bdev(path);
+	if (IS_ERR(bdev))
+		dev = name_to_dev_t(path);
+	else {
+		dev = bdev->bd_dev;
+		bdput(bdev);
+	}
+
+	return dev;
+}
+EXPORT_SYMBOL_GPL(dm_get_dev_t);
+
 /*
  * Add a device to the list, or just increment the usage count if
  * it's already present.
@@ -372,23 +392,15 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
 		  struct dm_dev **result)
 {
 	int r;
-	dev_t uninitialized_var(dev);
+	dev_t dev;
 	struct dm_dev_internal *dd;
 	struct dm_table *t = ti->table;
-	struct block_device *bdev;
 
 	BUG_ON(!t);
 
-	/* convert the path to a device */
-	bdev = lookup_bdev(path);
-	if (IS_ERR(bdev)) {
-		dev = name_to_dev_t(path);
-		if (!dev)
-			return -ENODEV;
-	} else {
-		dev = bdev->bd_dev;
-		bdput(bdev);
-	}
+	dev = dm_get_dev_t(path);
+	if (!dev)
+		return -ENODEV;
 
 	dd = find_device(&t->devices, dev);
 	if (!dd) {
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index ec1c61c87d89..899ab9f8549e 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -124,6 +124,8 @@ struct dm_dev {
 	char name[16];
 };
 
+dev_t dm_get_dev_t(const char *path);
+
 /*
  * Constructors should call these functions to ensure destination devices
  * are opened/closed correctly.

From 5504a47088034573d0839120751b1aec46204aab Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Fri, 5 Feb 2016 08:49:01 -0500
Subject: [PATCH 147/797] dm: fix excessive dm-mq context switching

commit 6acfe68bac7e6f16dc312157b1fa6e2368985013 upstream.

Request-based DM's blk-mq support (dm-mq) was reported to be 50% slower
than if an underlying null_blk device were used directly.  One of the
reasons for this drop in performance is that blk_insert_clone_request()
was calling blk_mq_insert_request() with @async=true.  This forced the
use of kblockd_schedule_delayed_work_on() to run the blk-mq hw queues
which ushered in ping-ponging between process context (fio in this case)
and kblockd's kworker to submit the cloned request.  The ftrace
function_graph tracer showed:

  kworker-2013  =>   fio-12190
  fio-12190    =>  kworker-2013
  ...
  kworker-2013  =>   fio-12190
  fio-12190    =>  kworker-2013
  ...

Fixing blk_insert_clone_request()'s blk_mq_insert_request() call to
_not_ use kblockd to submit the cloned requests isn't enough to
eliminate the observed context switches.

In addition to this dm-mq specific blk-core fix, there are 2 DM core
fixes to dm-mq that (when paired with the blk-core fix) completely
eliminate the observed context switching:

1)  don't blk_mq_run_hw_queues in blk-mq request completion

    Motivated by desire to reduce overhead of dm-mq, punting to kblockd
    just increases context switches.

    In my testing against a really fast null_blk device there was no benefit
    to running blk_mq_run_hw_queues() on completion (and no other blk-mq
    driver does this).  So hopefully this change doesn't induce the need for
    yet another revert like commit 621739b00e16ca2d !

2)  use blk_mq_complete_request() in dm_complete_request()

    blk_complete_request() doesn't offer the traditional q->mq_ops vs
    .request_fn branching pattern that other historic block interfaces
    do (e.g. blk_get_request).  Using blk_mq_complete_request() for
    blk-mq requests is important for performance.  It should be noted
    that, like blk_complete_request(), blk_mq_complete_request() doesn't
    natively handle partial completions -- but the request-based
    DM-multipath target does provide the required partial completion
    support by dm.c:end_clone_bio() triggering requeueing of the request
    via dm-mpath.c:multipath_end_io()'s return of DM_ENDIO_REQUEUE.

dm-mq fix #2 is _much_ more important than #1 for eliminating the
context switches.
Before: cpu          : usr=15.10%, sys=59.39%, ctx=7905181, majf=0, minf=475
After:  cpu          : usr=20.60%, sys=79.35%, ctx=2008, majf=0, minf=472

With these changes multithreaded async read IOPs improved from ~950K
to ~1350K for this dm-mq stacked on null_blk test-case.  The raw read
IOPs of the underlying null_blk device for the same workload is ~1950K.

Fixes: 7fb4898e0 ("block: add blk-mq support to blk_insert_cloned_request()")
Fixes: bfebd1cdb ("dm: add full blk-mq support to request-based DM")
Reported-by: Sagi Grimberg <sagig@dev.mellanox.co.il>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 block/blk-core.c |  2 +-
 drivers/md/dm.c  | 13 ++++++-------
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 33e2f62d5062..f8e64cac981a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2189,7 +2189,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 	if (q->mq_ops) {
 		if (blk_queue_io_stat(q))
 			blk_account_io_start(rq, true);
-		blk_mq_insert_request(rq, false, true, true);
+		blk_mq_insert_request(rq, false, true, false);
 		return 0;
 	}
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index dd834927bc66..887c6a11885b 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1109,12 +1109,8 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
 	 * back into ->request_fn() could deadlock attempting to grab the
 	 * queue lock again.
 	 */
-	if (run_queue) {
-		if (md->queue->mq_ops)
-			blk_mq_run_hw_queues(md->queue, true);
-		else
-			blk_run_queue_async(md->queue);
-	}
+	if (!md->queue->mq_ops && run_queue)
+		blk_run_queue_async(md->queue);
 
 	/*
 	 * dm_put() must be at the end of this function. See the comment above
@@ -1336,7 +1332,10 @@ static void dm_complete_request(struct request *rq, int error)
 	struct dm_rq_target_io *tio = tio_from_request(rq);
 
 	tio->error = error;
-	blk_complete_request(rq);
+	if (!rq->q->mq_ops)
+		blk_complete_request(rq);
+	else
+		blk_mq_complete_request(rq, error);
 }
 
 /*

From 291e2b3900da45dcc9c58e264f960fcb822bd07a Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Tue, 1 Mar 2016 10:58:44 +0000
Subject: [PATCH 148/797] dm thin metadata: don't issue prefetches if a
 transaction abort has failed

commit 2eae9e4489b4cf83213fa3bd508b5afca3f01780 upstream.

If a transaction abort has failed then we can no longer use the metadata
device.  Typically this happens if the superblock is unreadable.

This fix addresses a crash seen during metadata device failure testing.

Fixes: 8a01a6af75 ("dm thin: prefetch missing metadata pages")
Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/dm-thin-metadata.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index c219a053c7f6..911ada643364 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -1943,5 +1943,8 @@ bool dm_pool_metadata_needs_check(struct dm_pool_metadata *pmd)
 
 void dm_pool_issue_prefetches(struct dm_pool_metadata *pmd)
 {
-	dm_tm_issue_prefetches(pmd->tm);
+	down_read(&pmd->root_lock);
+	if (!pmd->fail_io)
+		dm_tm_issue_prefetches(pmd->tm);
+	up_read(&pmd->root_lock);
 }

From 7f47aea487df2dc281c7f64ff7430aff3b260af0 Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Thu, 10 Mar 2016 16:20:58 +0000
Subject: [PATCH 149/797] dm cache: make sure every metadata function checks
 fail_io

commit d14fcf3dd79c0b8a8d0ba469c44a6b04f3a1403b upstream.

Otherwise operations may be attempted that will only ever go on to crash
(since the metadata device is either missing or unreliable if 'fail_io'
is set).

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/dm-cache-metadata.c | 98 ++++++++++++++++++++--------------
 drivers/md/dm-cache-metadata.h |  4 +-
 drivers/md/dm-cache-target.c   | 12 ++++-
 3 files changed, 71 insertions(+), 43 deletions(-)

diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index f6543f3a970f..27f2ef300f8b 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -867,19 +867,40 @@ static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd,
 	return 0;
 }
 
-#define WRITE_LOCK(cmd) \
-	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) \
+#define WRITE_LOCK(cmd)	\
+	down_write(&cmd->root_lock); \
+	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { \
+		up_write(&cmd->root_lock); \
 		return -EINVAL; \
-	down_write(&cmd->root_lock)
+	}
 
 #define WRITE_LOCK_VOID(cmd) \
-	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) \
+	down_write(&cmd->root_lock); \
+	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { \
+		up_write(&cmd->root_lock); \
 		return; \
-	down_write(&cmd->root_lock)
+	}
 
 #define WRITE_UNLOCK(cmd) \
 	up_write(&cmd->root_lock)
 
+#define READ_LOCK(cmd) \
+	down_read(&cmd->root_lock); \
+	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { \
+		up_read(&cmd->root_lock); \
+		return -EINVAL; \
+	}
+
+#define READ_LOCK_VOID(cmd)	\
+	down_read(&cmd->root_lock); \
+	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { \
+		up_read(&cmd->root_lock); \
+		return; \
+	}
+
+#define READ_UNLOCK(cmd) \
+	up_read(&cmd->root_lock)
+
 int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size)
 {
 	int r;
@@ -1015,22 +1036,20 @@ int dm_cache_load_discards(struct dm_cache_metadata *cmd,
 {
 	int r;
 
-	down_read(&cmd->root_lock);
+	READ_LOCK(cmd);
 	r = __load_discards(cmd, fn, context);
-	up_read(&cmd->root_lock);
+	READ_UNLOCK(cmd);
 
 	return r;
 }
 
-dm_cblock_t dm_cache_size(struct dm_cache_metadata *cmd)
+int dm_cache_size(struct dm_cache_metadata *cmd, dm_cblock_t *result)
 {
-	dm_cblock_t r;
+	READ_LOCK(cmd);
+	*result = cmd->cache_blocks;
+	READ_UNLOCK(cmd);
 
-	down_read(&cmd->root_lock);
-	r = cmd->cache_blocks;
-	up_read(&cmd->root_lock);
-
-	return r;
+	return 0;
 }
 
 static int __remove(struct dm_cache_metadata *cmd, dm_cblock_t cblock)
@@ -1188,9 +1207,9 @@ int dm_cache_load_mappings(struct dm_cache_metadata *cmd,
 {
 	int r;
 
-	down_read(&cmd->root_lock);
+	READ_LOCK(cmd);
 	r = __load_mappings(cmd, policy, fn, context);
-	up_read(&cmd->root_lock);
+	READ_UNLOCK(cmd);
 
 	return r;
 }
@@ -1215,18 +1234,18 @@ static int __dump_mappings(struct dm_cache_metadata *cmd)
 
 void dm_cache_dump(struct dm_cache_metadata *cmd)
 {
-	down_read(&cmd->root_lock);
+	READ_LOCK_VOID(cmd);
 	__dump_mappings(cmd);
-	up_read(&cmd->root_lock);
+	READ_UNLOCK(cmd);
 }
 
 int dm_cache_changed_this_transaction(struct dm_cache_metadata *cmd)
 {
 	int r;
 
-	down_read(&cmd->root_lock);
+	READ_LOCK(cmd);
 	r = cmd->changed;
-	up_read(&cmd->root_lock);
+	READ_UNLOCK(cmd);
 
 	return r;
 }
@@ -1276,9 +1295,9 @@ int dm_cache_set_dirty(struct dm_cache_metadata *cmd,
 void dm_cache_metadata_get_stats(struct dm_cache_metadata *cmd,
 				 struct dm_cache_statistics *stats)
 {
-	down_read(&cmd->root_lock);
+	READ_LOCK_VOID(cmd);
 	*stats = cmd->stats;
-	up_read(&cmd->root_lock);
+	READ_UNLOCK(cmd);
 }
 
 void dm_cache_metadata_set_stats(struct dm_cache_metadata *cmd,
@@ -1312,9 +1331,9 @@ int dm_cache_get_free_metadata_block_count(struct dm_cache_metadata *cmd,
 {
 	int r = -EINVAL;
 
-	down_read(&cmd->root_lock);
+	READ_LOCK(cmd);
 	r = dm_sm_get_nr_free(cmd->metadata_sm, result);
-	up_read(&cmd->root_lock);
+	READ_UNLOCK(cmd);
 
 	return r;
 }
@@ -1324,9 +1343,9 @@ int dm_cache_get_metadata_dev_size(struct dm_cache_metadata *cmd,
 {
 	int r = -EINVAL;
 
-	down_read(&cmd->root_lock);
+	READ_LOCK(cmd);
 	r = dm_sm_get_nr_blocks(cmd->metadata_sm, result);
-	up_read(&cmd->root_lock);
+	READ_UNLOCK(cmd);
 
 	return r;
 }
@@ -1417,7 +1436,13 @@ int dm_cache_write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *
 
 int dm_cache_metadata_all_clean(struct dm_cache_metadata *cmd, bool *result)
 {
-	return blocks_are_unmapped_or_clean(cmd, 0, cmd->cache_blocks, result);
+	int r;
+
+	READ_LOCK(cmd);
+	r = blocks_are_unmapped_or_clean(cmd, 0, cmd->cache_blocks, result);
+	READ_UNLOCK(cmd);
+
+	return r;
 }
 
 void dm_cache_metadata_set_read_only(struct dm_cache_metadata *cmd)
@@ -1440,10 +1465,7 @@ int dm_cache_metadata_set_needs_check(struct dm_cache_metadata *cmd)
 	struct dm_block *sblock;
 	struct cache_disk_superblock *disk_super;
 
-	/*
-	 * We ignore fail_io for this function.
-	 */
-	down_write(&cmd->root_lock);
+	WRITE_LOCK(cmd);
 	set_bit(NEEDS_CHECK, &cmd->flags);
 
 	r = superblock_lock(cmd, &sblock);
@@ -1458,19 +1480,17 @@ int dm_cache_metadata_set_needs_check(struct dm_cache_metadata *cmd)
 	dm_bm_unlock(sblock);
 
 out:
-	up_write(&cmd->root_lock);
+	WRITE_UNLOCK(cmd);
 	return r;
 }
 
-bool dm_cache_metadata_needs_check(struct dm_cache_metadata *cmd)
+int dm_cache_metadata_needs_check(struct dm_cache_metadata *cmd, bool *result)
 {
-	bool needs_check;
+	READ_LOCK(cmd);
+	*result = !!test_bit(NEEDS_CHECK, &cmd->flags);
+	READ_UNLOCK(cmd);
 
-	down_read(&cmd->root_lock);
-	needs_check = !!test_bit(NEEDS_CHECK, &cmd->flags);
-	up_read(&cmd->root_lock);
-
-	return needs_check;
+	return 0;
 }
 
 int dm_cache_metadata_abort(struct dm_cache_metadata *cmd)
diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h
index 2ffee21f318d..8528744195e5 100644
--- a/drivers/md/dm-cache-metadata.h
+++ b/drivers/md/dm-cache-metadata.h
@@ -66,7 +66,7 @@ void dm_cache_metadata_close(struct dm_cache_metadata *cmd);
  * origin blocks to map to.
  */
 int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size);
-dm_cblock_t dm_cache_size(struct dm_cache_metadata *cmd);
+int dm_cache_size(struct dm_cache_metadata *cmd, dm_cblock_t *result);
 
 int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd,
 				   sector_t discard_block_size,
@@ -137,7 +137,7 @@ int dm_cache_write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *
  */
 int dm_cache_metadata_all_clean(struct dm_cache_metadata *cmd, bool *result);
 
-bool dm_cache_metadata_needs_check(struct dm_cache_metadata *cmd);
+int dm_cache_metadata_needs_check(struct dm_cache_metadata *cmd, bool *result);
 int dm_cache_metadata_set_needs_check(struct dm_cache_metadata *cmd);
 void dm_cache_metadata_set_read_only(struct dm_cache_metadata *cmd);
 void dm_cache_metadata_set_read_write(struct dm_cache_metadata *cmd);
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 2fd4c8296144..515f83e7d9ab 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -987,9 +987,14 @@ static void notify_mode_switch(struct cache *cache, enum cache_metadata_mode mod
 
 static void set_cache_mode(struct cache *cache, enum cache_metadata_mode new_mode)
 {
-	bool needs_check = dm_cache_metadata_needs_check(cache->cmd);
+	bool needs_check;
 	enum cache_metadata_mode old_mode = get_cache_mode(cache);
 
+	if (dm_cache_metadata_needs_check(cache->cmd, &needs_check)) {
+		DMERR("unable to read needs_check flag, setting failure mode");
+		new_mode = CM_FAIL;
+	}
+
 	if (new_mode == CM_WRITE && needs_check) {
 		DMERR("%s: unable to switch cache to write mode until repaired.",
 		      cache_device_name(cache));
@@ -3513,6 +3518,7 @@ static void cache_status(struct dm_target *ti, status_type_t type,
 	char buf[BDEVNAME_SIZE];
 	struct cache *cache = ti->private;
 	dm_cblock_t residency;
+	bool needs_check;
 
 	switch (type) {
 	case STATUSTYPE_INFO:
@@ -3586,7 +3592,9 @@ static void cache_status(struct dm_target *ti, status_type_t type,
 		else
 			DMEMIT("rw ");
 
-		if (dm_cache_metadata_needs_check(cache->cmd))
+		r = dm_cache_metadata_needs_check(cache->cmd, &needs_check);
+
+		if (r || needs_check)
 			DMEMIT("needs_check ");
 		else
 			DMEMIT("- ");

From 8907d8a6fd3f21992283efd67002aea719396f2b Mon Sep 17 00:00:00 2001
From: "Bryn M. Reeves" <bmr@redhat.com>
Date: Mon, 14 Mar 2016 17:04:34 -0400
Subject: [PATCH 150/797] dm: fix rq_end_stats() NULL pointer in
 dm_requeue_original_request()

commit 98dbc9c6c61698792e3a66f32f3bf066201d42d7 upstream.

An "old" (.request_fn) DM 'struct request' stores a pointer to the
associated 'struct dm_rq_target_io' in rq->special.

dm_requeue_original_request(), previously named
dm_requeue_unmapped_original_request(), called dm_unprep_request() to
reset rq->special to NULL.  But rq_end_stats() would go on to hit a NULL
pointer deference because its call to tio_from_request() returned NULL.

Fix this by calling rq_end_stats() _before_ dm_unprep_request()

Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Fixes: e262f34741 ("dm stats: add support for request-based DM devices")
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/dm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 887c6a11885b..c338aebb4ccd 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1210,9 +1210,9 @@ static void dm_requeue_original_request(struct mapped_device *md,
 {
 	int rw = rq_data_dir(rq);
 
+	rq_end_stats(md, rq);
 	dm_unprep_request(rq);
 
-	rq_end_stats(md, rq);
 	if (!rq->q->mq_ops)
 		old_requeue_request(rq);
 	else {

From 951822beba268f76af9463c69095284df20311f6 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Wed, 10 Feb 2016 11:33:18 +0100
Subject: [PATCH 151/797] usb: retry reset if a device times out

commit 264904ccc33c604d4b3141bbd33808152dfac45b upstream.

Some devices I got show an inability to operate right after
power on if they are already connected. They are beyond recovery
if the descriptors are requested multiple times. So in case of
a timeout we rather bail early and reset again. But it must be
done only on the first loop lest we get into a reset/time out
spiral that can be overcome with a retry.

This patch is a rework of a patch that fell through the cracks.
http://www.spinics.net/lists/linux-usb/msg103263.html

Signed-off-by: Oliver Neukum <oneukum@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 1560f3f3e756..a44db86018c7 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -4426,7 +4426,13 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1,
 						r = -EPROTO;
 					break;
 				}
-				if (r == 0)
+				/*
+				 * Some devices time out if they are powered on
+				 * when already connected. They need a second
+				 * reset. But only on the first attempt,
+				 * lest we get into a time out/reset loop
+				 */
+				if (r == 0  || (r == -ETIMEDOUT && j == 0))
 					break;
 			}
 			udev->descriptor.bMaxPacketSize0 =

From aa563cf3bc7ef570f449501ff7ab12f3b7080ff0 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Wed, 17 Feb 2016 11:52:43 +0100
Subject: [PATCH 152/797] usb: hub: fix a typo in hub_port_init() leading to
 wrong logic

commit 0d5ce778c43bf888328231bcdce05d5c860655aa upstream.

A typo of j for i led to a logic bug. To rule out future
confusion, the variable names are made meaningful.

Signed-off-by: Oliver Neukum <ONeukum@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index a44db86018c7..2a274884c7ea 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -4277,7 +4277,7 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1,
 {
 	struct usb_device	*hdev = hub->hdev;
 	struct usb_hcd		*hcd = bus_to_hcd(hdev->bus);
-	int			i, j, retval;
+	int			retries, operations, retval, i;
 	unsigned		delay = HUB_SHORT_RESET_TIME;
 	enum usb_device_speed	oldspeed = udev->speed;
 	const char		*speed;
@@ -4379,7 +4379,7 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1,
 	 * first 8 bytes of the device descriptor to get the ep0 maxpacket
 	 * value.
 	 */
-	for (i = 0; i < GET_DESCRIPTOR_TRIES; (++i, msleep(100))) {
+	for (retries = 0; retries < GET_DESCRIPTOR_TRIES; (++retries, msleep(100))) {
 		bool did_new_scheme = false;
 
 		if (use_new_scheme(udev, retry_counter)) {
@@ -4406,7 +4406,7 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1,
 			 * 255 is for WUSB devices, we actually need to use
 			 * 512 (WUSB1.0[4.8.1]).
 			 */
-			for (j = 0; j < 3; ++j) {
+			for (operations = 0; operations < 3; ++operations) {
 				buf->bMaxPacketSize0 = 0;
 				r = usb_control_msg(udev, usb_rcvaddr0pipe(),
 					USB_REQ_GET_DESCRIPTOR, USB_DIR_IN,
@@ -4432,7 +4432,7 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1,
 				 * reset. But only on the first attempt,
 				 * lest we get into a time out/reset loop
 				 */
-				if (r == 0  || (r == -ETIMEDOUT && j == 0))
+				if (r == 0  || (r == -ETIMEDOUT && retries == 0))
 					break;
 			}
 			udev->descriptor.bMaxPacketSize0 =
@@ -4464,7 +4464,7 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1,
 		 * authorization will assign the final address.
 		 */
 		if (udev->wusb == 0) {
-			for (j = 0; j < SET_ADDRESS_TRIES; ++j) {
+			for (operations = 0; operations < SET_ADDRESS_TRIES; ++operations) {
 				retval = hub_set_address(udev, devnum);
 				if (retval >= 0)
 					break;

From a85722c650265714c592d7ef99d277106dbb17bb Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 7 Mar 2016 20:11:52 +0100
Subject: [PATCH 153/797] USB: uas: Reduce can_queue to MAX_CMNDS

commit 55ff8cfbc4e12a7d2187df523938cc671fbebdd1 upstream.

The uas driver can never queue more then MAX_CMNDS (- 1) tags and tags
are shared between luns, so there is no need to claim that we can_queue
some random large number.

Not claiming that we can_queue 65536 commands, fixes the uas driver
failing to initialize while allocating the tag map with a "Page allocation
failure (order 7)" error on systems which have been running for a while
and thus have fragmented memory.

Reported-and-tested-by: Yves-Alexis Perez <corsac@corsac.net>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/storage/uas.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 5c66d3f7a6d0..ce0cd6e20d4f 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -812,7 +812,7 @@ static struct scsi_host_template uas_host_template = {
 	.slave_configure = uas_slave_configure,
 	.eh_abort_handler = uas_eh_abort_handler,
 	.eh_bus_reset_handler = uas_eh_bus_reset_handler,
-	.can_queue = 65536,	/* Is there a limit on the _host_ ? */
+	.can_queue = MAX_CMNDS,
 	.this_id = -1,
 	.sg_tablesize = SG_NONE,
 	.skip_settle_delay = 1,

From 1ea680abf7640c777396909102bc22915107cb5b Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Tue, 15 Mar 2016 10:14:04 +0100
Subject: [PATCH 154/797] USB: cdc-acm: more sanity checking

commit 8835ba4a39cf53f705417b3b3a94eb067673f2c9 upstream.

An attack has become available which pretends to be a quirky
device circumventing normal sanity checks and crashes the kernel
by an insufficient number of interfaces. This patch adds a check
to the code path for quirky devices.

Signed-off-by: Oliver Neukum <ONeukum@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/cdc-acm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index fa4e23930614..d37fdcc3143c 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -1114,6 +1114,9 @@ static int acm_probe(struct usb_interface *intf,
 	if (quirks == NO_UNION_NORMAL) {
 		data_interface = usb_ifnum_to_if(usb_dev, 1);
 		control_interface = usb_ifnum_to_if(usb_dev, 0);
+		/* we would crash */
+		if (!data_interface || !control_interface)
+			return -ENODEV;
 		goto skip_normal_probe;
 	}
 

From 850631bedd3cb7f79cb32a456c9ad3a5f6e1d1f3 Mon Sep 17 00:00:00 2001
From: Josh Boyer <jwboyer@fedoraproject.org>
Date: Mon, 14 Mar 2016 10:42:38 -0400
Subject: [PATCH 155/797] USB: iowarrior: fix oops with malicious USB
 descriptors

commit 4ec0ef3a82125efc36173062a50624550a900ae0 upstream.

The iowarrior driver expects at least one valid endpoint.  If given
malicious descriptors that specify 0 for the number of endpoints,
it will crash in the probe function.  Ensure there is at least
one endpoint on the interface before using it.

The full report of this issue can be found here:
http://seclists.org/bugtraq/2016/Mar/87

Reported-by: Ralf Spenneberg <ralf@spenneberg.net>
Signed-off-by: Josh Boyer <jwboyer@fedoraproject.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/iowarrior.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c
index c6bfd13f6c92..1950e87b4219 100644
--- a/drivers/usb/misc/iowarrior.c
+++ b/drivers/usb/misc/iowarrior.c
@@ -787,6 +787,12 @@ static int iowarrior_probe(struct usb_interface *interface,
 	iface_desc = interface->cur_altsetting;
 	dev->product_id = le16_to_cpu(udev->descriptor.idProduct);
 
+	if (iface_desc->desc.bNumEndpoints < 1) {
+		dev_err(&interface->dev, "Invalid number of endpoints\n");
+		retval = -EINVAL;
+		goto error;
+	}
+
 	/* set up the endpoint information */
 	for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) {
 		endpoint = &iface_desc->endpoint[i].desc;

From b6c6426252e2653407811f46c883661955b9f5fa Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Wed, 16 Mar 2016 13:26:17 +0100
Subject: [PATCH 156/797] USB: usb_driver_claim_interface: add sanity checking

commit 0b818e3956fc1ad976bee791eadcbb3b5fec5bfd upstream.

Attacks that trick drivers into passing a NULL pointer
to usb_driver_claim_interface() using forged descriptors are
known. This thwarts them by sanity checking.

Signed-off-by: Oliver Neukum <ONeukum@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/driver.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 56593a9a8726..2057d91d8336 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -502,11 +502,15 @@ static int usb_unbind_interface(struct device *dev)
 int usb_driver_claim_interface(struct usb_driver *driver,
 				struct usb_interface *iface, void *priv)
 {
-	struct device *dev = &iface->dev;
+	struct device *dev;
 	struct usb_device *udev;
 	int retval = 0;
 	int lpm_disable_error;
 
+	if (!iface)
+		return -ENODEV;
+
+	dev = &iface->dev;
 	if (dev->driver)
 		return -EBUSY;
 

From 9deac9454b7a5643a09829f4731276cea6697b72 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Thu, 31 Mar 2016 12:04:24 -0400
Subject: [PATCH 157/797] USB: mct_u232: add sanity checking in probe

commit 4e9a0b05257f29cf4b75f3209243ed71614d062e upstream.

An attack using the lack of sanity checking in probe is known. This
patch checks for the existence of a second port.

CVE-2016-3136

Signed-off-by: Oliver Neukum <ONeukum@suse.com>
[johan: add error message ]
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/mct_u232.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c
index fd707d6a10e2..89726f702202 100644
--- a/drivers/usb/serial/mct_u232.c
+++ b/drivers/usb/serial/mct_u232.c
@@ -376,14 +376,21 @@ static void mct_u232_msr_to_state(struct usb_serial_port *port,
 
 static int mct_u232_port_probe(struct usb_serial_port *port)
 {
+	struct usb_serial *serial = port->serial;
 	struct mct_u232_private *priv;
 
+	/* check first to simplify error handling */
+	if (!serial->port[1] || !serial->port[1]->interrupt_in_urb) {
+		dev_err(&port->dev, "expected endpoint missing\n");
+		return -ENODEV;
+	}
+
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
 
 	/* Use second interrupt-in endpoint for reading. */
-	priv->read_urb = port->serial->port[1]->interrupt_in_urb;
+	priv->read_urb = serial->port[1]->interrupt_in_urb;
 	priv->read_urb->context = port;
 
 	spin_lock_init(&priv->lock);

From 4f6ad5b0d28c84030693fe21b308c0b711fa66f6 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Thu, 31 Mar 2016 12:04:26 -0400
Subject: [PATCH 158/797] USB: digi_acceleport: do sanity checking for the
 number of ports

commit 5a07975ad0a36708c6b0a5b9fea1ff811d0b0c1f upstream.

The driver can be crashed with devices that expose crafted descriptors
with too few endpoints.

See: http://seclists.org/bugtraq/2016/Mar/61

Signed-off-by: Oliver Neukum <ONeukum@suse.com>
[johan: fix OOB endpoint check and add error messages ]
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/digi_acceleport.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c
index 12b0e67473ba..3df7b7ec178e 100644
--- a/drivers/usb/serial/digi_acceleport.c
+++ b/drivers/usb/serial/digi_acceleport.c
@@ -1251,8 +1251,27 @@ static int digi_port_init(struct usb_serial_port *port, unsigned port_num)
 
 static int digi_startup(struct usb_serial *serial)
 {
+	struct device *dev = &serial->interface->dev;
 	struct digi_serial *serial_priv;
 	int ret;
+	int i;
+
+	/* check whether the device has the expected number of endpoints */
+	if (serial->num_port_pointers < serial->type->num_ports + 1) {
+		dev_err(dev, "OOB endpoints missing\n");
+		return -ENODEV;
+	}
+
+	for (i = 0; i < serial->type->num_ports + 1 ; i++) {
+		if (!serial->port[i]->read_urb) {
+			dev_err(dev, "bulk-in endpoint missing\n");
+			return -ENODEV;
+		}
+		if (!serial->port[i]->write_urb) {
+			dev_err(dev, "bulk-out endpoint missing\n");
+			return -ENODEV;
+		}
+	}
 
 	serial_priv = kzalloc(sizeof(*serial_priv), GFP_KERNEL);
 	if (!serial_priv)

From ca76906a7753052b00e491ba017393f9071b0406 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Thu, 31 Mar 2016 12:04:25 -0400
Subject: [PATCH 159/797] USB: cypress_m8: add endpoint sanity check

commit c55aee1bf0e6b6feec8b2927b43f7a09a6d5f754 upstream.

An attack using missing endpoints exists.

CVE-2016-3137

Signed-off-by: Oliver Neukum <ONeukum@suse.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/cypress_m8.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c
index 01bf53392819..244acb1299a9 100644
--- a/drivers/usb/serial/cypress_m8.c
+++ b/drivers/usb/serial/cypress_m8.c
@@ -447,6 +447,11 @@ static int cypress_generic_port_probe(struct usb_serial_port *port)
 	struct usb_serial *serial = port->serial;
 	struct cypress_private *priv;
 
+	if (!port->interrupt_out_urb || !port->interrupt_in_urb) {
+		dev_err(&port->dev, "required endpoint is missing\n");
+		return -ENODEV;
+	}
+
 	priv = kzalloc(sizeof(struct cypress_private), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
@@ -606,12 +611,6 @@ static int cypress_open(struct tty_struct *tty, struct usb_serial_port *port)
 		cypress_set_termios(tty, port, &priv->tmp_termios);
 
 	/* setup the port and start reading from the device */
-	if (!port->interrupt_in_urb) {
-		dev_err(&port->dev, "%s - interrupt_in_urb is empty!\n",
-			__func__);
-		return -1;
-	}
-
 	usb_fill_int_urb(port->interrupt_in_urb, serial->dev,
 		usb_rcvintpipe(serial->dev, port->interrupt_in_endpointAddress),
 		port->interrupt_in_urb->transfer_buffer,

From df3dddcc643832fc4fd133b8956be64b37841b67 Mon Sep 17 00:00:00 2001
From: Martyn Welch <martyn.welch@collabora.co.uk>
Date: Tue, 29 Mar 2016 17:47:29 +0100
Subject: [PATCH 160/797] USB: serial: cp210x: Adding GE Healthcare Device ID

commit cddc9434e3dcc37a85c4412fb8e277d3a582e456 upstream.

The CP2105 is used in the GE Healthcare Remote Alarm Box, with the
Manufacturer ID of 0x1901 and Product ID of 0x0194.

Signed-off-by: Martyn Welch <martyn.welch@collabora.co.uk>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/cp210x.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index 7a76fe4c2f9e..bdc0f2f24f19 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -164,6 +164,7 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */
 	{ USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */
 	{ USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */
+	{ USB_DEVICE(0x1901, 0x0194) },	/* GE Healthcare Remote Alarm Box */
 	{ USB_DEVICE(0x19CF, 0x3000) }, /* Parrot NMEA GPS Flight Recorder */
 	{ USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */
 	{ USB_DEVICE(0x1B1C, 0x1C00) }, /* Corsair USB Dongle */

From b3e0983cb9fc0b1e876fd51f186f8fa887089261 Mon Sep 17 00:00:00 2001
From: Josh Boyer <jwboyer@fedoraproject.org>
Date: Thu, 10 Mar 2016 09:48:52 -0500
Subject: [PATCH 161/797] USB: serial: ftdi_sio: Add support for ICP DAS
 I-756xU devices

commit ea6db90e750328068837bed34cb1302b7a177339 upstream.

A Fedora user reports that the ftdi_sio driver works properly for the
ICP DAS I-7561U device.  Further, the user manual for these devices
instructs users to load the driver and add the ids using the sysfs
interface.

Add support for these in the driver directly so that the devices work
out of the box instead of needing manual configuration.

Reported-by: <thesource@mail.ru>
Signed-off-by: Josh Boyer <jwboyer@fedoraproject.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/ftdi_sio.c     | 4 ++++
 drivers/usb/serial/ftdi_sio_ids.h | 8 ++++++++
 2 files changed, 12 insertions(+)

diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 8c660ae401d8..b61f12160d37 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -1004,6 +1004,10 @@ static const struct usb_device_id id_table_combined[] = {
 	{ USB_DEVICE(FTDI_VID, CHETCO_SEASMART_DISPLAY_PID) },
 	{ USB_DEVICE(FTDI_VID, CHETCO_SEASMART_LITE_PID) },
 	{ USB_DEVICE(FTDI_VID, CHETCO_SEASMART_ANALOG_PID) },
+	/* ICP DAS I-756xU devices */
+	{ USB_DEVICE(ICPDAS_VID, ICPDAS_I7560U_PID) },
+	{ USB_DEVICE(ICPDAS_VID, ICPDAS_I7561U_PID) },
+	{ USB_DEVICE(ICPDAS_VID, ICPDAS_I7563U_PID) },
 	{ }					/* Terminating entry */
 };
 
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index a84df2513994..c5d6c1e73e8e 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -871,6 +871,14 @@
 #define NOVITUS_VID			0x1a28
 #define NOVITUS_BONO_E_PID		0x6010
 
+/*
+ * ICPDAS I-756*U devices
+ */
+#define ICPDAS_VID			0x1b5c
+#define ICPDAS_I7560U_PID		0x0103
+#define ICPDAS_I7561U_PID		0x0104
+#define ICPDAS_I7563U_PID		0x0105
+
 /*
  * RT Systems programming cables for various ham radios
  */

From 49102971470cc282f50f674b4aa5c4adabeb281d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Thu, 7 Apr 2016 12:09:17 +0200
Subject: [PATCH 162/797] USB: option: add "D-Link DWM-221 B1" device id
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit d48d5691ebf88a15d95ba96486917ffc79256536 upstream.

Thomas reports:
"Windows:

00 diagnostics
01 modem
02 at-port
03 nmea
04 nic

Linux:

T:  Bus=02 Lev=01 Prnt=01 Port=03 Cnt=01 Dev#=  4 Spd=480 MxCh= 0
D:  Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs=  1
P:  Vendor=2001 ProdID=7e19 Rev=02.32
S:  Manufacturer=Mobile Connect
S:  Product=Mobile Connect
S:  SerialNumber=0123456789ABCDEF
C:  #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA
I:  If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
I:  If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
I:  If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
I:  If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
I:  If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan
I:  If#= 5 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage"

Reported-by: Thomas Schäfer <tschaefer@t-online.de>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/option.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 348e19834b83..c6f497f16526 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -1818,6 +1818,8 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d02, 0xff, 0x00, 0x00) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d03, 0xff, 0x02, 0x01) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d03, 0xff, 0x00, 0x00) },
+	{ USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7e19, 0xff),			/* D-Link DWM-221 B1 */
+	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */
 	{ USB_DEVICE_INTERFACE_CLASS(0x2020, 0x4000, 0xff) },                /* OLICARD300 - MT6225 */

From 5cede226daa83e26c4ef21773a75f535927b935d Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 22 Jan 2016 08:53:55 -0200
Subject: [PATCH 163/797] pwc: Add USB id for Philips Spc880nc webcam

commit 7445e45d19a09e5269dc85f17f9635be29d2f76c upstream.

SPC 880NC PC camera discussions:
	http://www.pclinuxos.com/forum/index.php/topic,135688.0.html

Reported-by: Kikim <klucznik0@op.pl>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/usb/pwc/pwc-if.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/media/usb/pwc/pwc-if.c b/drivers/media/usb/pwc/pwc-if.c
index b79c36fd8cd2..58f23bcfe94e 100644
--- a/drivers/media/usb/pwc/pwc-if.c
+++ b/drivers/media/usb/pwc/pwc-if.c
@@ -91,6 +91,7 @@ static const struct usb_device_id pwc_device_table [] = {
 	{ USB_DEVICE(0x0471, 0x0312) },
 	{ USB_DEVICE(0x0471, 0x0313) }, /* the 'new' 720K */
 	{ USB_DEVICE(0x0471, 0x0329) }, /* Philips SPC 900NC PC Camera */
+	{ USB_DEVICE(0x0471, 0x032C) }, /* Philips SPC 880NC PC Camera */
 	{ USB_DEVICE(0x069A, 0x0001) }, /* Askey */
 	{ USB_DEVICE(0x046D, 0x08B0) }, /* Logitech QuickCam Pro 3000 */
 	{ USB_DEVICE(0x046D, 0x08B1) }, /* Logitech QuickCam Notebook Pro */
@@ -811,6 +812,11 @@ static int usb_pwc_probe(struct usb_interface *intf, const struct usb_device_id
 			name = "Philips SPC 900NC webcam";
 			type_id = 740;
 			break;
+		case 0x032C:
+			PWC_INFO("Philips SPC 880NC USB webcam detected.\n");
+			name = "Philips SPC 880NC webcam";
+			type_id = 740;
+			break;
 		default:
 			return -ENODEV;
 			break;

From fbd40d7beef0b17624bc1f838f4d44dfa4b0326b Mon Sep 17 00:00:00 2001
From: Josh Boyer <jwboyer@fedoraproject.org>
Date: Mon, 14 Mar 2016 09:33:40 -0700
Subject: [PATCH 164/797] Input: powermate - fix oops with malicious USB
 descriptors

commit 9c6ba456711687b794dcf285856fc14e2c76074f upstream.

The powermate driver expects at least one valid USB endpoint in its
probe function.  If given malicious descriptors that specify 0 for
the number of endpoints, it will crash.  Validate the number of
endpoints on the interface before using them.

The full report for this issue can be found here:
http://seclists.org/bugtraq/2016/Mar/85

Reported-by: Ralf Spenneberg <ralf@spenneberg.net>
Signed-off-by: Josh Boyer <jwboyer@fedoraproject.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/misc/powermate.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/input/misc/powermate.c b/drivers/input/misc/powermate.c
index 63b539d3daba..84909a12ff36 100644
--- a/drivers/input/misc/powermate.c
+++ b/drivers/input/misc/powermate.c
@@ -307,6 +307,9 @@ static int powermate_probe(struct usb_interface *intf, const struct usb_device_i
 	int error = -ENOMEM;
 
 	interface = intf->cur_altsetting;
+	if (interface->desc.bNumEndpoints < 1)
+		return -EINVAL;
+
 	endpoint = &interface->endpoint[0].desc;
 	if (!usb_endpoint_is_int_in(endpoint))
 		return -EIO;

From 57f6ad5f1580a5a06c573fb15ed6dcf701e037f6 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 15 Mar 2016 12:09:10 +0100
Subject: [PATCH 165/797] ALSA: usb-audio: Fix NULL dereference in
 create_fixed_stream_quirk()

commit 0f886ca12765d20124bd06291c82951fd49a33be upstream.

create_fixed_stream_quirk() may cause a NULL-pointer dereference by
accessing the non-existing endpoint when a USB device with a malformed
USB descriptor is used.

This patch avoids it simply by adding a sanity check of bNumEndpoints
before the accesses.

Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=971125
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/usb/quirks.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index c458d60d5030..f2e4eebdf76d 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -180,6 +180,12 @@ static int create_fixed_stream_quirk(struct snd_usb_audio *chip,
 	}
 	alts = &iface->altsetting[fp->altset_idx];
 	altsd = get_iface_desc(alts);
+	if (altsd->bNumEndpoints < 1) {
+		kfree(fp);
+		kfree(rate_table);
+		return -EINVAL;
+	}
+
 	fp->protocol = altsd->bInterfaceProtocol;
 
 	if (fp->datainterval == 0)

From f9f026d3903957ffe515ab47ea5df421dafc47cb Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 15 Mar 2016 15:20:58 +0100
Subject: [PATCH 166/797] ALSA: usb-audio: Add sanity checks for endpoint
 accesses

commit 447d6275f0c21f6cc97a88b3a0c601436a4cdf2a upstream.

Add some sanity check codes before actually accessing the endpoint via
get_endpoint() in order to avoid the invalid access through a
malformed USB descriptor.  Mostly just checking bNumEndpoints, but in
one place (snd_microii_spdif_default_get()), the validity of iface and
altsetting index is checked as well.

Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=971125
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/usb/clock.c        | 2 ++
 sound/usb/endpoint.c     | 3 +++
 sound/usb/mixer_quirks.c | 4 ++++
 sound/usb/pcm.c          | 2 ++
 4 files changed, 11 insertions(+)

diff --git a/sound/usb/clock.c b/sound/usb/clock.c
index 2ed260b10f6d..7ccbcaf6a147 100644
--- a/sound/usb/clock.c
+++ b/sound/usb/clock.c
@@ -285,6 +285,8 @@ static int set_sample_rate_v1(struct snd_usb_audio *chip, int iface,
 	unsigned char data[3];
 	int err, crate;
 
+	if (get_iface_desc(alts)->bNumEndpoints < 1)
+		return -EINVAL;
 	ep = get_endpoint(alts, 0)->bEndpointAddress;
 
 	/* if endpoint doesn't have sampling rate control, bail out */
diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c
index 7b1cb365ffab..c07a7eda42a2 100644
--- a/sound/usb/endpoint.c
+++ b/sound/usb/endpoint.c
@@ -438,6 +438,9 @@ static void snd_complete_urb(struct urb *urb)
  *
  * New endpoints will be added to chip->ep_list and must be freed by
  * calling snd_usb_endpoint_free().
+ *
+ * For SND_USB_ENDPOINT_TYPE_SYNC, the caller needs to guarantee that
+ * bNumEndpoints > 1 beforehand.
  */
 struct snd_usb_endpoint *snd_usb_add_endpoint(struct snd_usb_audio *chip,
 					      struct usb_host_interface *alts,
diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c
index 279025650568..f6c3bf79af9a 100644
--- a/sound/usb/mixer_quirks.c
+++ b/sound/usb/mixer_quirks.c
@@ -1519,7 +1519,11 @@ static int snd_microii_spdif_default_get(struct snd_kcontrol *kcontrol,
 
 	/* use known values for that card: interface#1 altsetting#1 */
 	iface = usb_ifnum_to_if(chip->dev, 1);
+	if (!iface || iface->num_altsetting < 2)
+		return -EINVAL;
 	alts = &iface->altsetting[1];
+	if (get_iface_desc(alts)->bNumEndpoints < 1)
+		return -EINVAL;
 	ep = get_endpoint(alts, 0)->bEndpointAddress;
 
 	err = snd_usb_ctl_msg(chip->dev,
diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
index 9245f52d43bd..44d178ee9177 100644
--- a/sound/usb/pcm.c
+++ b/sound/usb/pcm.c
@@ -159,6 +159,8 @@ static int init_pitch_v1(struct snd_usb_audio *chip, int iface,
 	unsigned char data[1];
 	int err;
 
+	if (get_iface_desc(alts)->bNumEndpoints < 1)
+		return -EINVAL;
 	ep = get_endpoint(alts, 0)->bEndpointAddress;
 
 	data[0] = 1;

From 94bfaf24e6ba9a789fa0aa50fb4b7d228f8d3cff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Victor=20Cl=C3=A9ment?= <victor.clement@openmailbox.org>
Date: Sat, 19 Mar 2016 13:17:42 +0100
Subject: [PATCH 167/797] ALSA: usb-audio: add Microsoft HD-5001 to quirks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 0ef21100ae912f76ed89f76ecd894f4ffb3689c1 upstream.

The Microsoft HD-5001 webcam microphone does not support sample rate
reading as the HD-5000 one.
This results in dmesg errors and sound hanging with pulseaudio.

Signed-off-by: Victor Clément <victor.clement@openmailbox.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/usb/quirks.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index f2e4eebdf76d..2967242a5716 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1127,6 +1127,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
 	switch (chip->usb_id) {
 	case USB_ID(0x045E, 0x075D): /* MS Lifecam Cinema  */
 	case USB_ID(0x045E, 0x076D): /* MS Lifecam HD-5000 */
+	case USB_ID(0x045E, 0x076E): /* MS Lifecam HD-5001 */
 	case USB_ID(0x045E, 0x076F): /* MS Lifecam HD-6000 */
 	case USB_ID(0x045E, 0x0772): /* MS Lifecam Studio */
 	case USB_ID(0x045E, 0x0779): /* MS Lifecam HD-3000 */

From 4d073cfdf7f685628485d9692623c668b173bb60 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 15 Mar 2016 12:14:49 +0100
Subject: [PATCH 168/797] ALSA: usb-audio: Minor code cleanup in
 create_fixed_stream_quirk()

commit 902eb7fd1e4af3ac69b9b30f8373f118c92b9729 upstream.

Just a minor code cleanup: unify the error paths.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/usb/quirks.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 2967242a5716..f9263de8b9a2 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -167,23 +167,18 @@ static int create_fixed_stream_quirk(struct snd_usb_audio *chip,
 	stream = (fp->endpoint & USB_DIR_IN)
 		? SNDRV_PCM_STREAM_CAPTURE : SNDRV_PCM_STREAM_PLAYBACK;
 	err = snd_usb_add_audio_stream(chip, stream, fp);
-	if (err < 0) {
-		kfree(fp);
-		kfree(rate_table);
-		return err;
-	}
+	if (err < 0)
+		goto error;
 	if (fp->iface != get_iface_desc(&iface->altsetting[0])->bInterfaceNumber ||
 	    fp->altset_idx >= iface->num_altsetting) {
-		kfree(fp);
-		kfree(rate_table);
-		return -EINVAL;
+		err = -EINVAL;
+		goto error;
 	}
 	alts = &iface->altsetting[fp->altset_idx];
 	altsd = get_iface_desc(alts);
 	if (altsd->bNumEndpoints < 1) {
-		kfree(fp);
-		kfree(rate_table);
-		return -EINVAL;
+		err = -EINVAL;
+		goto error;
 	}
 
 	fp->protocol = altsd->bInterfaceProtocol;
@@ -196,6 +191,11 @@ static int create_fixed_stream_quirk(struct snd_usb_audio *chip,
 	snd_usb_init_pitch(chip, fp->iface, alts, fp);
 	snd_usb_init_sample_rate(chip, fp->iface, alts, fp, fp->rate_max);
 	return 0;
+
+ error:
+	kfree(fp);
+	kfree(rate_table);
+	return err;
 }
 
 static int create_auto_pcm_quirk(struct snd_usb_audio *chip,

From b7f03eeaaf7cdb9528c9710d648182af5a4db493 Mon Sep 17 00:00:00 2001
From: Vladis Dronov <vdronov@redhat.com>
Date: Thu, 31 Mar 2016 12:05:43 -0400
Subject: [PATCH 169/797] ALSA: usb-audio: Fix double-free in error paths after
 snd_usb_add_audio_stream() call

commit 836b34a935abc91e13e63053d0a83b24dfb5ea78 upstream.

create_fixed_stream_quirk(), snd_usb_parse_audio_interface() and
create_uaxx_quirk() functions allocate the audioformat object by themselves
and free it upon error before returning. However, once the object is linked
to a stream, it's freed again in snd_usb_audio_pcm_free(), thus it'll be
double-freed, eventually resulting in a memory corruption.

This patch fixes these failures in the error paths by unlinking the audioformat
object before freeing it.

Based on a patch by Takashi Iwai <tiwai@suse.de>

[Note for stable backports:
 this patch requires the commit 902eb7fd1e4a ('ALSA: usb-audio: Minor
 code cleanup in create_fixed_stream_quirk()')]

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1283358
Reported-by: Ralf Spenneberg <ralf@spenneberg.net>
Signed-off-by: Vladis Dronov <vdronov@redhat.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/usb/quirks.c | 4 ++++
 sound/usb/stream.c | 6 +++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index f9263de8b9a2..cd7eac28edee 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -150,6 +150,7 @@ static int create_fixed_stream_quirk(struct snd_usb_audio *chip,
 		usb_audio_err(chip, "cannot memdup\n");
 		return -ENOMEM;
 	}
+	INIT_LIST_HEAD(&fp->list);
 	if (fp->nr_rates > MAX_NR_RATES) {
 		kfree(fp);
 		return -EINVAL;
@@ -193,6 +194,7 @@ static int create_fixed_stream_quirk(struct snd_usb_audio *chip,
 	return 0;
 
  error:
+	list_del(&fp->list); /* unlink for avoiding double-free */
 	kfree(fp);
 	kfree(rate_table);
 	return err;
@@ -468,6 +470,7 @@ static int create_uaxx_quirk(struct snd_usb_audio *chip,
 	fp->ep_attr = get_endpoint(alts, 0)->bmAttributes;
 	fp->datainterval = 0;
 	fp->maxpacksize = le16_to_cpu(get_endpoint(alts, 0)->wMaxPacketSize);
+	INIT_LIST_HEAD(&fp->list);
 
 	switch (fp->maxpacksize) {
 	case 0x120:
@@ -491,6 +494,7 @@ static int create_uaxx_quirk(struct snd_usb_audio *chip,
 		? SNDRV_PCM_STREAM_CAPTURE : SNDRV_PCM_STREAM_PLAYBACK;
 	err = snd_usb_add_audio_stream(chip, stream, fp);
 	if (err < 0) {
+		list_del(&fp->list); /* unlink for avoiding double-free */
 		kfree(fp);
 		return err;
 	}
diff --git a/sound/usb/stream.c b/sound/usb/stream.c
index 8ee14f2365e7..3b23102230c0 100644
--- a/sound/usb/stream.c
+++ b/sound/usb/stream.c
@@ -316,7 +316,9 @@ static struct snd_pcm_chmap_elem *convert_chmap(int channels, unsigned int bits,
 /*
  * add this endpoint to the chip instance.
  * if a stream with the same endpoint already exists, append to it.
- * if not, create a new pcm stream.
+ * if not, create a new pcm stream. note, fp is added to the substream
+ * fmt_list and will be freed on the chip instance release. do not free
+ * fp or do remove it from the substream fmt_list to avoid double-free.
  */
 int snd_usb_add_audio_stream(struct snd_usb_audio *chip,
 			     int stream,
@@ -677,6 +679,7 @@ int snd_usb_parse_audio_interface(struct snd_usb_audio *chip, int iface_no)
 					* (fp->maxpacksize & 0x7ff);
 		fp->attributes = parse_uac_endpoint_attributes(chip, alts, protocol, iface_no);
 		fp->clock = clock;
+		INIT_LIST_HEAD(&fp->list);
 
 		/* some quirks for attributes here */
 
@@ -725,6 +728,7 @@ int snd_usb_parse_audio_interface(struct snd_usb_audio *chip, int iface_no)
 		dev_dbg(&dev->dev, "%u:%d: add audio endpoint %#x\n", iface_no, altno, fp->endpoint);
 		err = snd_usb_add_audio_stream(chip, stream, fp);
 		if (err < 0) {
+			list_del(&fp->list); /* unlink for avoiding double-free */
 			kfree(fp->rate_table);
 			kfree(fp->chmap);
 			kfree(fp);

From fb243c3d81a5d8b5a275c4e118bea481df5d0510 Mon Sep 17 00:00:00 2001
From: Dmitry Tunin <hanipouspilot@gmail.com>
Date: Wed, 10 Feb 2016 15:33:17 +0300
Subject: [PATCH 170/797] Bluetooth: btusb: Add new AR3012 ID 13d3:3395

commit 609574eb46335cfac1421a07c0505627cbbab1f0 upstream.

T: Bus=03 Lev=02 Prnt=02 Port=00 Cnt=01 Dev#= 3 Spd=12 MxCh= 0
D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1
P: Vendor=13d3 ProdID=3395 Rev=00.01
C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA
I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb

BugLink: https://bugs.launchpad.net/bugs/1542564

Reported-and-tested-by: Christopher Simerly <kilikopela29@gmail.com>
Signed-off-by: Dmitry Tunin <hanipouspilot@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/bluetooth/ath3k.c | 2 ++
 drivers/bluetooth/btusb.c | 1 +
 2 files changed, 3 insertions(+)

diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
index fa893c3ec408..cb1cb9acc1b2 100644
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c
@@ -113,6 +113,7 @@ static const struct usb_device_id ath3k_table[] = {
 	{ USB_DEVICE(0x13d3, 0x3362) },
 	{ USB_DEVICE(0x13d3, 0x3375) },
 	{ USB_DEVICE(0x13d3, 0x3393) },
+	{ USB_DEVICE(0x13d3, 0x3395) },
 	{ USB_DEVICE(0x13d3, 0x3402) },
 	{ USB_DEVICE(0x13d3, 0x3408) },
 	{ USB_DEVICE(0x13d3, 0x3423) },
@@ -175,6 +176,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = {
 	{ USB_DEVICE(0x13d3, 0x3362), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3375), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3393), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x13d3, 0x3395), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3402), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3408), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3423), .driver_info = BTUSB_ATH3012 },
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 968897108c76..d4888ea28b91 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -227,6 +227,7 @@ static const struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x13d3, 0x3362), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3375), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3393), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x13d3, 0x3395), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3402), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3408), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3423), .driver_info = BTUSB_ATH3012 },

From 36a7f23fc74cf3f3947ea837310b1140e9592f6d Mon Sep 17 00:00:00 2001
From: Dmitry Tunin <hanipouspilot@gmail.com>
Date: Sun, 28 Feb 2016 11:04:06 +0300
Subject: [PATCH 171/797] Bluetooth: btusb: Add a new AR3012 ID 04ca:3014

commit 81d90442eac779938217c3444b240aa51fd3db47 upstream.

T: Bus=01 Lev=01 Prnt=01 Port=04 Cnt=03 Dev#= 5 Spd=12 MxCh= 0
D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1
P: Vendor=04ca ProdID=3014 Rev=00.02
C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA
I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb

BugLink: https://bugs.launchpad.net/bugs/1546694

Signed-off-by: Dmitry Tunin <hanipouspilot@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/bluetooth/ath3k.c | 2 ++
 drivers/bluetooth/btusb.c | 1 +
 2 files changed, 3 insertions(+)

diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
index cb1cb9acc1b2..3062a3ae1999 100644
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c
@@ -92,6 +92,7 @@ static const struct usb_device_id ath3k_table[] = {
 	{ USB_DEVICE(0x04CA, 0x300d) },
 	{ USB_DEVICE(0x04CA, 0x300f) },
 	{ USB_DEVICE(0x04CA, 0x3010) },
+	{ USB_DEVICE(0x04CA, 0x3014) },
 	{ USB_DEVICE(0x0930, 0x0219) },
 	{ USB_DEVICE(0x0930, 0x021c) },
 	{ USB_DEVICE(0x0930, 0x0220) },
@@ -155,6 +156,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = {
 	{ USB_DEVICE(0x04ca, 0x300d), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x300f), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x04ca, 0x3014), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0930, 0x021c), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 },
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index d4888ea28b91..b4dd9400c4fc 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -206,6 +206,7 @@ static const struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x04ca, 0x300d), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x300f), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x04ca, 0x3014), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0930, 0x021c), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 },

From 28dee875257cf0985de5ddea4053d968fe365b44 Mon Sep 17 00:00:00 2001
From: Dmitry Tunin <hanipouspilot@gmail.com>
Date: Fri, 4 Mar 2016 01:32:19 +0300
Subject: [PATCH 172/797] Bluetooth: btusb: Add a new AR3012 ID 13d3:3472

commit 75c6aca4765dbe3d0c1507ab5052f2e373dc2331 upstream.

T: Bus=01 Lev=01 Prnt=01 Port=04 Cnt=01 Dev#= 4 Spd=12 MxCh= 0
D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1
P: Vendor=13d3 ProdID=3472 Rev=00.01
C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA
I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb

BugLink: https://bugs.launchpad.net/bugs/1552925

Signed-off-by: Dmitry Tunin <hanipouspilot@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/bluetooth/ath3k.c | 2 ++
 drivers/bluetooth/btusb.c | 1 +
 2 files changed, 3 insertions(+)

diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
index 3062a3ae1999..0c4a748fef7a 100644
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c
@@ -119,6 +119,7 @@ static const struct usb_device_id ath3k_table[] = {
 	{ USB_DEVICE(0x13d3, 0x3408) },
 	{ USB_DEVICE(0x13d3, 0x3423) },
 	{ USB_DEVICE(0x13d3, 0x3432) },
+	{ USB_DEVICE(0x13d3, 0x3472) },
 	{ USB_DEVICE(0x13d3, 0x3474) },
 
 	/* Atheros AR5BBU12 with sflash firmware */
@@ -183,6 +184,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = {
 	{ USB_DEVICE(0x13d3, 0x3408), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3423), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3432), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x13d3, 0x3472), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3474), .driver_info = BTUSB_ATH3012 },
 
 	/* Atheros AR5BBU22 with sflash firmware */
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index b4dd9400c4fc..342ec8d203e3 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -233,6 +233,7 @@ static const struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x13d3, 0x3408), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3423), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3432), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x13d3, 0x3472), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3474), .driver_info = BTUSB_ATH3012 },
 
 	/* Atheros AR5BBU12 with sflash firmware */

From 120e2febfc11ea91e34bec1c92fe5d6475c89508 Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Tue, 12 Jan 2016 11:17:38 -0600
Subject: [PATCH 173/797] crypto: ccp - Add hash state import and export
 support

commit 952bce9792e6bf36fda09c2e5718abb5d9327369 upstream.

Commit 8996eafdcbad ("crypto: ahash - ensure statesize is non-zero")
added a check to prevent ahash algorithms from successfully registering
if the import and export functions were not implemented. This prevents
an oops in the hash_accept function of algif_hash. This commit causes
the ccp-crypto module SHA support and AES CMAC support from successfully
registering and causing the ccp-crypto module load to fail because the
ahash import and export functions are not implemented.

Update the CCP Crypto API support to provide import and export support
for ahash algorithms.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/crypto/ccp/ccp-crypto-aes-cmac.c | 23 +++++++++++++++++++++++
 drivers/crypto/ccp/ccp-crypto-sha.c      | 23 +++++++++++++++++++++++
 2 files changed, 46 insertions(+)

diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
index d89f20c04266..00207cf5c79b 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
@@ -220,6 +220,26 @@ static int ccp_aes_cmac_digest(struct ahash_request *req)
 	return ccp_aes_cmac_finup(req);
 }
 
+static int ccp_aes_cmac_export(struct ahash_request *req, void *out)
+{
+	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req);
+	struct ccp_aes_cmac_req_ctx *state = out;
+
+	*state = *rctx;
+
+	return 0;
+}
+
+static int ccp_aes_cmac_import(struct ahash_request *req, const void *in)
+{
+	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req);
+	const struct ccp_aes_cmac_req_ctx *state = in;
+
+	*rctx = *state;
+
+	return 0;
+}
+
 static int ccp_aes_cmac_setkey(struct crypto_ahash *tfm, const u8 *key,
 			       unsigned int key_len)
 {
@@ -352,10 +372,13 @@ int ccp_register_aes_cmac_algs(struct list_head *head)
 	alg->final = ccp_aes_cmac_final;
 	alg->finup = ccp_aes_cmac_finup;
 	alg->digest = ccp_aes_cmac_digest;
+	alg->export = ccp_aes_cmac_export;
+	alg->import = ccp_aes_cmac_import;
 	alg->setkey = ccp_aes_cmac_setkey;
 
 	halg = &alg->halg;
 	halg->digestsize = AES_BLOCK_SIZE;
+	halg->statesize = sizeof(struct ccp_aes_cmac_req_ctx);
 
 	base = &halg->base;
 	snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "cmac(aes)");
diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c
index d14b3f28e010..3aae58def106 100644
--- a/drivers/crypto/ccp/ccp-crypto-sha.c
+++ b/drivers/crypto/ccp/ccp-crypto-sha.c
@@ -207,6 +207,26 @@ static int ccp_sha_digest(struct ahash_request *req)
 	return ccp_sha_finup(req);
 }
 
+static int ccp_sha_export(struct ahash_request *req, void *out)
+{
+	struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
+	struct ccp_sha_req_ctx *state = out;
+
+	*state = *rctx;
+
+	return 0;
+}
+
+static int ccp_sha_import(struct ahash_request *req, const void *in)
+{
+	struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
+	const struct ccp_sha_req_ctx *state = in;
+
+	*rctx = *state;
+
+	return 0;
+}
+
 static int ccp_sha_setkey(struct crypto_ahash *tfm, const u8 *key,
 			  unsigned int key_len)
 {
@@ -403,9 +423,12 @@ static int ccp_register_sha_alg(struct list_head *head,
 	alg->final = ccp_sha_final;
 	alg->finup = ccp_sha_finup;
 	alg->digest = ccp_sha_digest;
+	alg->export = ccp_sha_export;
+	alg->import = ccp_sha_import;
 
 	halg = &alg->halg;
 	halg->digestsize = def->digest_size;
+	halg->statesize = sizeof(struct ccp_sha_req_ctx);
 
 	base = &halg->base;
 	snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);

From 8c5156ad2da4493e6402bf7335d044445dca8c00 Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Fri, 29 Jan 2016 12:45:14 -0600
Subject: [PATCH 174/797] crypto: ccp - Limit the amount of information
 exported

commit d1662165ae612ec8b5f94a6b07e65ea58b6dce34 upstream.

Since the exported information can be exposed to user-space, instead of
exporting the entire request context only export the minimum information
needed.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/crypto/ccp/ccp-crypto-aes-cmac.c | 16 +++++++++++-----
 drivers/crypto/ccp/ccp-crypto-sha.c      | 20 +++++++++++++++-----
 drivers/crypto/ccp/ccp-crypto.h          | 22 ++++++++++++++++++++++
 3 files changed, 48 insertions(+), 10 deletions(-)

diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
index 00207cf5c79b..6a2d836eb2d9 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
@@ -223,9 +223,12 @@ static int ccp_aes_cmac_digest(struct ahash_request *req)
 static int ccp_aes_cmac_export(struct ahash_request *req, void *out)
 {
 	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req);
-	struct ccp_aes_cmac_req_ctx *state = out;
+	struct ccp_aes_cmac_exp_ctx *state = out;
 
-	*state = *rctx;
+	state->null_msg = rctx->null_msg;
+	memcpy(state->iv, rctx->iv, sizeof(state->iv));
+	state->buf_count = rctx->buf_count;
+	memcpy(state->buf, rctx->buf, sizeof(state->buf));
 
 	return 0;
 }
@@ -233,9 +236,12 @@ static int ccp_aes_cmac_export(struct ahash_request *req, void *out)
 static int ccp_aes_cmac_import(struct ahash_request *req, const void *in)
 {
 	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req);
-	const struct ccp_aes_cmac_req_ctx *state = in;
+	const struct ccp_aes_cmac_exp_ctx *state = in;
 
-	*rctx = *state;
+	rctx->null_msg = state->null_msg;
+	memcpy(rctx->iv, state->iv, sizeof(rctx->iv));
+	rctx->buf_count = state->buf_count;
+	memcpy(rctx->buf, state->buf, sizeof(rctx->buf));
 
 	return 0;
 }
@@ -378,7 +384,7 @@ int ccp_register_aes_cmac_algs(struct list_head *head)
 
 	halg = &alg->halg;
 	halg->digestsize = AES_BLOCK_SIZE;
-	halg->statesize = sizeof(struct ccp_aes_cmac_req_ctx);
+	halg->statesize = sizeof(struct ccp_aes_cmac_exp_ctx);
 
 	base = &halg->base;
 	snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "cmac(aes)");
diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c
index 3aae58def106..a67128a7af23 100644
--- a/drivers/crypto/ccp/ccp-crypto-sha.c
+++ b/drivers/crypto/ccp/ccp-crypto-sha.c
@@ -210,9 +210,14 @@ static int ccp_sha_digest(struct ahash_request *req)
 static int ccp_sha_export(struct ahash_request *req, void *out)
 {
 	struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
-	struct ccp_sha_req_ctx *state = out;
+	struct ccp_sha_exp_ctx *state = out;
 
-	*state = *rctx;
+	state->type = rctx->type;
+	state->msg_bits = rctx->msg_bits;
+	state->first = rctx->first;
+	memcpy(state->ctx, rctx->ctx, sizeof(state->ctx));
+	state->buf_count = rctx->buf_count;
+	memcpy(state->buf, rctx->buf, sizeof(state->buf));
 
 	return 0;
 }
@@ -220,9 +225,14 @@ static int ccp_sha_export(struct ahash_request *req, void *out)
 static int ccp_sha_import(struct ahash_request *req, const void *in)
 {
 	struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
-	const struct ccp_sha_req_ctx *state = in;
+	const struct ccp_sha_exp_ctx *state = in;
 
-	*rctx = *state;
+	rctx->type = state->type;
+	rctx->msg_bits = state->msg_bits;
+	rctx->first = state->first;
+	memcpy(rctx->ctx, state->ctx, sizeof(rctx->ctx));
+	rctx->buf_count = state->buf_count;
+	memcpy(rctx->buf, state->buf, sizeof(rctx->buf));
 
 	return 0;
 }
@@ -428,7 +438,7 @@ static int ccp_register_sha_alg(struct list_head *head,
 
 	halg = &alg->halg;
 	halg->digestsize = def->digest_size;
-	halg->statesize = sizeof(struct ccp_sha_req_ctx);
+	halg->statesize = sizeof(struct ccp_sha_exp_ctx);
 
 	base = &halg->base;
 	snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);
diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h
index 76a96f0f44c6..a326ec20bfa8 100644
--- a/drivers/crypto/ccp/ccp-crypto.h
+++ b/drivers/crypto/ccp/ccp-crypto.h
@@ -129,6 +129,15 @@ struct ccp_aes_cmac_req_ctx {
 	struct ccp_cmd cmd;
 };
 
+struct ccp_aes_cmac_exp_ctx {
+	unsigned int null_msg;
+
+	u8 iv[AES_BLOCK_SIZE];
+
+	unsigned int buf_count;
+	u8 buf[AES_BLOCK_SIZE];
+};
+
 /***** SHA related defines *****/
 #define MAX_SHA_CONTEXT_SIZE	SHA256_DIGEST_SIZE
 #define MAX_SHA_BLOCK_SIZE	SHA256_BLOCK_SIZE
@@ -171,6 +180,19 @@ struct ccp_sha_req_ctx {
 	struct ccp_cmd cmd;
 };
 
+struct ccp_sha_exp_ctx {
+	enum ccp_sha_type type;
+
+	u64 msg_bits;
+
+	unsigned int first;
+
+	u8 ctx[MAX_SHA_CONTEXT_SIZE];
+
+	unsigned int buf_count;
+	u8 buf[MAX_SHA_BLOCK_SIZE];
+};
+
 /***** Common Context Structure *****/
 struct ccp_ctx {
 	int (*complete)(struct crypto_async_request *req, int ret);

From cc78d091bd92300de98b1b372da7d5ee9dcd8e63 Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Tue, 2 Feb 2016 11:38:21 -0600
Subject: [PATCH 175/797] crypto: ccp - Don't assume export/import areas are
 aligned

commit b31dde2a5cb1bf764282abf934266b7193c2bc7c upstream.

Use a local variable for the exported and imported state so that
alignment is not an issue. On export, set a local variable from the
request context and then memcpy the contents of the local variable to
the export memory area. On import, memcpy the import memory area into
a local variable and then use the local variable to set the request
context.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/crypto/ccp/ccp-crypto-aes-cmac.c | 26 +++++++++++-------
 drivers/crypto/ccp/ccp-crypto-sha.c      | 34 ++++++++++++++----------
 2 files changed, 36 insertions(+), 24 deletions(-)

diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
index 6a2d836eb2d9..d095452b8828 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
@@ -223,12 +223,15 @@ static int ccp_aes_cmac_digest(struct ahash_request *req)
 static int ccp_aes_cmac_export(struct ahash_request *req, void *out)
 {
 	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req);
-	struct ccp_aes_cmac_exp_ctx *state = out;
+	struct ccp_aes_cmac_exp_ctx state;
 
-	state->null_msg = rctx->null_msg;
-	memcpy(state->iv, rctx->iv, sizeof(state->iv));
-	state->buf_count = rctx->buf_count;
-	memcpy(state->buf, rctx->buf, sizeof(state->buf));
+	state.null_msg = rctx->null_msg;
+	memcpy(state.iv, rctx->iv, sizeof(state.iv));
+	state.buf_count = rctx->buf_count;
+	memcpy(state.buf, rctx->buf, sizeof(state.buf));
+
+	/* 'out' may not be aligned so memcpy from local variable */
+	memcpy(out, &state, sizeof(state));
 
 	return 0;
 }
@@ -236,12 +239,15 @@ static int ccp_aes_cmac_export(struct ahash_request *req, void *out)
 static int ccp_aes_cmac_import(struct ahash_request *req, const void *in)
 {
 	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req);
-	const struct ccp_aes_cmac_exp_ctx *state = in;
+	struct ccp_aes_cmac_exp_ctx state;
 
-	rctx->null_msg = state->null_msg;
-	memcpy(rctx->iv, state->iv, sizeof(rctx->iv));
-	rctx->buf_count = state->buf_count;
-	memcpy(rctx->buf, state->buf, sizeof(rctx->buf));
+	/* 'in' may not be aligned so memcpy to local variable */
+	memcpy(&state, in, sizeof(state));
+
+	rctx->null_msg = state.null_msg;
+	memcpy(rctx->iv, state.iv, sizeof(rctx->iv));
+	rctx->buf_count = state.buf_count;
+	memcpy(rctx->buf, state.buf, sizeof(rctx->buf));
 
 	return 0;
 }
diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c
index a67128a7af23..7002c6b283e5 100644
--- a/drivers/crypto/ccp/ccp-crypto-sha.c
+++ b/drivers/crypto/ccp/ccp-crypto-sha.c
@@ -210,14 +210,17 @@ static int ccp_sha_digest(struct ahash_request *req)
 static int ccp_sha_export(struct ahash_request *req, void *out)
 {
 	struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
-	struct ccp_sha_exp_ctx *state = out;
+	struct ccp_sha_exp_ctx state;
 
-	state->type = rctx->type;
-	state->msg_bits = rctx->msg_bits;
-	state->first = rctx->first;
-	memcpy(state->ctx, rctx->ctx, sizeof(state->ctx));
-	state->buf_count = rctx->buf_count;
-	memcpy(state->buf, rctx->buf, sizeof(state->buf));
+	state.type = rctx->type;
+	state.msg_bits = rctx->msg_bits;
+	state.first = rctx->first;
+	memcpy(state.ctx, rctx->ctx, sizeof(state.ctx));
+	state.buf_count = rctx->buf_count;
+	memcpy(state.buf, rctx->buf, sizeof(state.buf));
+
+	/* 'out' may not be aligned so memcpy from local variable */
+	memcpy(out, &state, sizeof(state));
 
 	return 0;
 }
@@ -225,14 +228,17 @@ static int ccp_sha_export(struct ahash_request *req, void *out)
 static int ccp_sha_import(struct ahash_request *req, const void *in)
 {
 	struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
-	const struct ccp_sha_exp_ctx *state = in;
+	struct ccp_sha_exp_ctx state;
 
-	rctx->type = state->type;
-	rctx->msg_bits = state->msg_bits;
-	rctx->first = state->first;
-	memcpy(rctx->ctx, state->ctx, sizeof(rctx->ctx));
-	rctx->buf_count = state->buf_count;
-	memcpy(rctx->buf, state->buf, sizeof(rctx->buf));
+	/* 'in' may not be aligned so memcpy to local variable */
+	memcpy(&state, in, sizeof(state));
+
+	rctx->type = state.type;
+	rctx->msg_bits = state.msg_bits;
+	rctx->first = state.first;
+	memcpy(rctx->ctx, state.ctx, sizeof(rctx->ctx));
+	rctx->buf_count = state.buf_count;
+	memcpy(rctx->buf, state.buf, sizeof(rctx->buf));
 
 	return 0;
 }

From 0cdc91f539d77f4be720330f577b708a56cc9391 Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Thu, 25 Feb 2016 16:48:13 -0600
Subject: [PATCH 176/797] crypto: ccp - memset request context to zero during
 import

commit ce0ae266feaf35930394bd770c69778e4ef03ba9 upstream.

Since a crypto_ahash_import() can be called against a request context
that has not had a crypto_ahash_init() performed, the request context
needs to be cleared to insure there is no random data present. If not,
the random data can result in a kernel oops during crypto_ahash_update().

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/crypto/ccp/ccp-crypto-aes-cmac.c | 1 +
 drivers/crypto/ccp/ccp-crypto-sha.c      | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
index d095452b8828..3d9acc53d247 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
@@ -244,6 +244,7 @@ static int ccp_aes_cmac_import(struct ahash_request *req, const void *in)
 	/* 'in' may not be aligned so memcpy to local variable */
 	memcpy(&state, in, sizeof(state));
 
+	memset(rctx, 0, sizeof(*rctx));
 	rctx->null_msg = state.null_msg;
 	memcpy(rctx->iv, state.iv, sizeof(rctx->iv));
 	rctx->buf_count = state.buf_count;
diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c
index 7002c6b283e5..8ef06fad8b14 100644
--- a/drivers/crypto/ccp/ccp-crypto-sha.c
+++ b/drivers/crypto/ccp/ccp-crypto-sha.c
@@ -233,6 +233,7 @@ static int ccp_sha_import(struct ahash_request *req, const void *in)
 	/* 'in' may not be aligned so memcpy to local variable */
 	memcpy(&state, in, sizeof(state));
 
+	memset(rctx, 0, sizeof(*rctx));
 	rctx->type = state.type;
 	rctx->msg_bits = state.msg_bits;
 	rctx->first = state.first;

From f69c1b51f6da629e6f03b336ffec8c31b56e6f8a Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 30 Jan 2016 17:38:28 +0300
Subject: [PATCH 177/797] crypto: keywrap - memzero the correct memory

commit 2b8b28fd232233c22fb61009dd8b0587390d2875 upstream.

We're clearing the wrong memory.  The memory corruption is likely
harmless because we weren't going to use that stack memory again but not
zeroing is a potential information leak.

Fixes: e28facde3c39 ('crypto: keywrap - add key wrapping block chaining mode')
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 crypto/keywrap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/crypto/keywrap.c b/crypto/keywrap.c
index b1d106ce55f3..72014f963ba7 100644
--- a/crypto/keywrap.c
+++ b/crypto/keywrap.c
@@ -212,7 +212,7 @@ static int crypto_kw_decrypt(struct blkcipher_desc *desc,
 			  SEMIBSIZE))
 		ret = -EBADMSG;
 
-	memzero_explicit(&block, sizeof(struct crypto_kw_block));
+	memzero_explicit(block, sizeof(struct crypto_kw_block));
 
 	return ret;
 }
@@ -297,7 +297,7 @@ static int crypto_kw_encrypt(struct blkcipher_desc *desc,
 	/* establish the IV for the caller to pick up */
 	memcpy(desc->info, block->A, SEMIBSIZE);
 
-	memzero_explicit(&block, sizeof(struct crypto_kw_block));
+	memzero_explicit(block, sizeof(struct crypto_kw_block));
 
 	return 0;
 }

From 90933f3fb612574e326f9872c5bd2121f8e2da39 Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vz@mleia.com>
Date: Sun, 6 Mar 2016 03:21:52 +0200
Subject: [PATCH 178/797] crypto: atmel - fix checks of error code returned by
 devm_ioremap_resource()

commit 9b52d55f4f0e2bb9a34abbcf99e05e17f1b3b281 upstream.

The change fixes potential oops while accessing iomem on invalid
address, if devm_ioremap_resource() fails due to some reason.

The devm_ioremap_resource() function returns ERR_PTR() and never
returns NULL, which makes useless a following check for NULL.

Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
Fixes: b0e8b3417a62 ("crypto: atmel - use devm_xxx() managed function")
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/crypto/atmel-aes.c  | 4 ++--
 drivers/crypto/atmel-sha.c  | 4 ++--
 drivers/crypto/atmel-tdes.c | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
index fb16d812c8f5..1dffb13e5c2f 100644
--- a/drivers/crypto/atmel-aes.c
+++ b/drivers/crypto/atmel-aes.c
@@ -1396,9 +1396,9 @@ static int atmel_aes_probe(struct platform_device *pdev)
 	}
 
 	aes_dd->io_base = devm_ioremap_resource(&pdev->dev, aes_res);
-	if (!aes_dd->io_base) {
+	if (IS_ERR(aes_dd->io_base)) {
 		dev_err(dev, "can't ioremap\n");
-		err = -ENOMEM;
+		err = PTR_ERR(aes_dd->io_base);
 		goto res_err;
 	}
 
diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
index 3178f84d2757..0dadb6332f0e 100644
--- a/drivers/crypto/atmel-sha.c
+++ b/drivers/crypto/atmel-sha.c
@@ -1405,9 +1405,9 @@ static int atmel_sha_probe(struct platform_device *pdev)
 	}
 
 	sha_dd->io_base = devm_ioremap_resource(&pdev->dev, sha_res);
-	if (!sha_dd->io_base) {
+	if (IS_ERR(sha_dd->io_base)) {
 		dev_err(dev, "can't ioremap\n");
-		err = -ENOMEM;
+		err = PTR_ERR(sha_dd->io_base);
 		goto res_err;
 	}
 
diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c
index 2c7a628d0375..bf467d7be35c 100644
--- a/drivers/crypto/atmel-tdes.c
+++ b/drivers/crypto/atmel-tdes.c
@@ -1417,9 +1417,9 @@ static int atmel_tdes_probe(struct platform_device *pdev)
 	}
 
 	tdes_dd->io_base = devm_ioremap_resource(&pdev->dev, tdes_res);
-	if (!tdes_dd->io_base) {
+	if (IS_ERR(tdes_dd->io_base)) {
 		dev_err(dev, "can't ioremap\n");
-		err = -ENOMEM;
+		err = PTR_ERR(tdes_dd->io_base);
 		goto res_err;
 	}
 

From 75efb5fe5ce23f72ef8641f593b5829152c6203c Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vz@mleia.com>
Date: Sun, 6 Mar 2016 03:22:04 +0200
Subject: [PATCH 179/797] crypto: ux500 - fix checks of error code returned by
 devm_ioremap_resource()

commit b62917a2622ebcb03a500ef20da47be80d8c8951 upstream.

The change fixes potential oops while accessing iomem on invalid
address, if devm_ioremap_resource() fails due to some reason.

The devm_ioremap_resource() function returns ERR_PTR() and never
returns NULL, which makes useless a following check for NULL.

Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
Fixes: 5a4eea2658c93 ("crypto: ux500 - Use devm_xxx() managed function")
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/crypto/ux500/cryp/cryp_core.c | 4 ++--
 drivers/crypto/ux500/hash/hash_core.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/ux500/cryp/cryp_core.c b/drivers/crypto/ux500/cryp/cryp_core.c
index 4c243c1ffc7f..790f7cadc1ed 100644
--- a/drivers/crypto/ux500/cryp/cryp_core.c
+++ b/drivers/crypto/ux500/cryp/cryp_core.c
@@ -1440,9 +1440,9 @@ static int ux500_cryp_probe(struct platform_device *pdev)
 
 	device_data->phybase = res->start;
 	device_data->base = devm_ioremap_resource(dev, res);
-	if (!device_data->base) {
+	if (IS_ERR(device_data->base)) {
 		dev_err(dev, "[%s]: ioremap failed!", __func__);
-		ret = -ENOMEM;
+		ret = PTR_ERR(device_data->base);
 		goto out;
 	}
 
diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c
index f47d112041b2..66b1c3313e2e 100644
--- a/drivers/crypto/ux500/hash/hash_core.c
+++ b/drivers/crypto/ux500/hash/hash_core.c
@@ -1675,9 +1675,9 @@ static int ux500_hash_probe(struct platform_device *pdev)
 
 	device_data->phybase = res->start;
 	device_data->base = devm_ioremap_resource(dev, res);
-	if (!device_data->base) {
+	if (IS_ERR(device_data->base)) {
 		dev_err(dev, "%s: ioremap() failed!\n", __func__);
-		ret = -ENOMEM;
+		ret = PTR_ERR(device_data->base);
 		goto out;
 	}
 	spin_lock_init(&device_data->ctx_lock);

From f08fc4eed81e135c687ac246531a6dbbd236eb14 Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Thu, 17 Mar 2016 10:47:10 +0100
Subject: [PATCH 180/797] crypto: marvell/cesa - forward
 devm_ioremap_resource() error code

commit dfe97ad30e8c038261663a18b9e04b8b5bc07bea upstream.

Forward devm_ioremap_resource() error code instead of returning
-ENOMEM.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Reported-by: Russell King - ARM Linux <linux@arm.linux.org.uk>
Fixes: f63601fd616a ("crypto: marvell/cesa - add a new driver for Marvell's CESA")
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/crypto/marvell/cesa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c
index c0656e7f37b5..80239ae69527 100644
--- a/drivers/crypto/marvell/cesa.c
+++ b/drivers/crypto/marvell/cesa.c
@@ -420,7 +420,7 @@ static int mv_cesa_probe(struct platform_device *pdev)
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs");
 	cesa->regs = devm_ioremap_resource(dev, res);
 	if (IS_ERR(cesa->regs))
-		return -ENOMEM;
+		return PTR_ERR(cesa->regs);
 
 	ret = mv_cesa_dev_dma_init(cesa);
 	if (ret)

From 499f9ff872f8792d3318b0bd5e6533bfe48abf0d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 24 Feb 2016 14:37:15 +0000
Subject: [PATCH 181/797] X.509: Fix leap year handling again

commit ac4cbedfdf55455b4c447f17f0fa027dbf02b2a6 upstream.

There are still a couple of minor issues in the X.509 leap year handling:

 (1) To avoid doing a modulus-by-400 in addition to a modulus-by-100 when
     determining whether the year is a leap year or not, I divided the year
     by 100 after doing the modulus-by-100, thereby letting the compiler do
     one instruction for both, and then did a modulus-by-4.

     Unfortunately, I then passed the now-modified year value to mktime64()
     to construct a time value.

     Since this isn't a fast path and since mktime64() does a bunch of
     divisions, just condense down to "% 400".  It's also easier to read.

 (2) The default month length for any February where the year doesn't
     divide by four exactly is obtained from the month_length[] array where
     the value is 29, not 28.

     This is fixed by altering the table.

Reported-by: Rudolf Polzer <rpolzer@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: David Woodhouse <David.Woodhouse@intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 crypto/asymmetric_keys/x509_cert_parser.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c
index 021d39c0ba75..13c4e5a5fe8c 100644
--- a/crypto/asymmetric_keys/x509_cert_parser.c
+++ b/crypto/asymmetric_keys/x509_cert_parser.c
@@ -494,7 +494,7 @@ int x509_decode_time(time64_t *_t,  size_t hdrlen,
 		     unsigned char tag,
 		     const unsigned char *value, size_t vlen)
 {
-	static const unsigned char month_lengths[] = { 31, 29, 31, 30, 31, 30,
+	static const unsigned char month_lengths[] = { 31, 28, 31, 30, 31, 30,
 						       31, 31, 30, 31, 30, 31 };
 	const unsigned char *p = value;
 	unsigned year, mon, day, hour, min, sec, mon_len;
@@ -540,9 +540,9 @@ int x509_decode_time(time64_t *_t,  size_t hdrlen,
 		if (year % 4 == 0) {
 			mon_len = 29;
 			if (year % 100 == 0) {
-				year /= 100;
-				if (year % 4 != 0)
-					mon_len = 28;
+				mon_len = 28;
+				if (year % 400 == 0)
+					mon_len = 29;
 			}
 		}
 	}

From fa0ae4f223ab29ab853959b08449b5c08c9cb7a0 Mon Sep 17 00:00:00 2001
From: Alexander Usyskin <alexander.usyskin@intel.com>
Date: Sun, 7 Feb 2016 23:35:32 +0200
Subject: [PATCH 182/797] mei: bus: check if the device is enabled before data
 transfer

commit 15c13dfcad883a1e76b714480fb27be96247fd82 upstream.

The bus data transfer interface was missing the check if the device is
in enabled state, this may lead to stack corruption during link reset.

Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/bus.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index 0b05aa938799..1a173d0af694 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -53,6 +53,11 @@ ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length,
 	bus = cl->dev;
 
 	mutex_lock(&bus->device_lock);
+	if (bus->dev_state != MEI_DEV_ENABLED) {
+		rets = -ENODEV;
+		goto out;
+	}
+
 	if (!mei_cl_is_connected(cl)) {
 		rets = -ENODEV;
 		goto out;
@@ -109,6 +114,10 @@ ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length)
 	bus = cl->dev;
 
 	mutex_lock(&bus->device_lock);
+	if (bus->dev_state != MEI_DEV_ENABLED) {
+		rets = -ENODEV;
+		goto out;
+	}
 
 	cb = mei_cl_read_cb(cl, NULL);
 	if (cb)

From 3e69549b48e39e72c7aad093e0c5e29ab547a9f1 Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Date: Fri, 29 Jan 2016 09:47:22 -0800
Subject: [PATCH 183/797] tpm: fix the rollback in tpm_chip_register()

commit 72c91ce8523ae5828fe5e4417ae0aaab53707a08 upstream.

Fixed the rollback and gave more self-documenting names for the
functions.

Fixes: d972b0523f ("tpm: fix call order in tpm-chip.c")
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Reviewed-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/tpm/tpm-chip.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
index 45cc39aabeee..1a9dcee8da5a 100644
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@@ -140,7 +140,7 @@ struct tpm_chip *tpmm_chip_alloc(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(tpmm_chip_alloc);
 
-static int tpm_dev_add_device(struct tpm_chip *chip)
+static int tpm_add_char_device(struct tpm_chip *chip)
 {
 	int rc;
 
@@ -151,7 +151,6 @@ static int tpm_dev_add_device(struct tpm_chip *chip)
 			chip->devname, MAJOR(chip->dev.devt),
 			MINOR(chip->dev.devt), rc);
 
-		device_unregister(&chip->dev);
 		return rc;
 	}
 
@@ -162,13 +161,14 @@ static int tpm_dev_add_device(struct tpm_chip *chip)
 			chip->devname, MAJOR(chip->dev.devt),
 			MINOR(chip->dev.devt), rc);
 
+		cdev_del(&chip->cdev);
 		return rc;
 	}
 
 	return rc;
 }
 
-static void tpm_dev_del_device(struct tpm_chip *chip)
+static void tpm_del_char_device(struct tpm_chip *chip)
 {
 	cdev_del(&chip->cdev);
 	device_unregister(&chip->dev);
@@ -222,7 +222,7 @@ int tpm_chip_register(struct tpm_chip *chip)
 
 	tpm_add_ppi(chip);
 
-	rc = tpm_dev_add_device(chip);
+	rc = tpm_add_char_device(chip);
 	if (rc)
 		goto out_err;
 
@@ -274,6 +274,6 @@ void tpm_chip_unregister(struct tpm_chip *chip)
 		sysfs_remove_link(&chip->pdev->kobj, "ppi");
 
 	tpm1_chip_unregister(chip);
-	tpm_dev_del_device(chip);
+	tpm_del_char_device(chip);
 }
 EXPORT_SYMBOL_GPL(tpm_chip_unregister);

From 160f50a3f40f3aa32ec7c7c9d18b3ffb5bdf12e2 Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Date: Thu, 18 Feb 2016 22:11:29 +0200
Subject: [PATCH 184/797] tpm_crb: tpm2_shutdown() must be called before
 tpm_chip_unregister()

commit 99cda8cb4639de81cde785b5bab9bc52e916e594 upstream.

Wrong call order.

Reported-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Fixes: 74d6b3ceaa17
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/tpm/tpm_crb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c
index 4bb9727c1047..61e64293b765 100644
--- a/drivers/char/tpm/tpm_crb.c
+++ b/drivers/char/tpm/tpm_crb.c
@@ -310,11 +310,11 @@ static int crb_acpi_remove(struct acpi_device *device)
 	struct device *dev = &device->dev;
 	struct tpm_chip *chip = dev_get_drvdata(dev);
 
-	tpm_chip_unregister(chip);
-
 	if (chip->flags & TPM_CHIP_FLAG_TPM2)
 		tpm2_shutdown(chip, TPM2_SU_CLEAR);
 
+	tpm_chip_unregister(chip);
+
 	return 0;
 }
 

From 062c8a4ff40fedf82c1ec177a63b06c41801c2e9 Mon Sep 17 00:00:00 2001
From: Harald Hoyer <harald@redhat.com>
Date: Sat, 6 Feb 2016 15:44:42 +0100
Subject: [PATCH 185/797] tpm_eventlog.c: fix binary_bios_measurements

commit 186d124f07da193a8f47e491af85cb695d415f2f upstream.

The commit 0cc698af36ff ("vTPM: support little endian guests") copied
the event, but without the event data, did an endian conversion on the
size and tried to output the event data from the copied version, which
has only have one byte of the data, resulting in garbage event data.

[jarkko.sakkinen@linux.intel.com: fixed minor coding style issues and
 renamed the local variable tempPtr as temp_ptr now that there is an
 excuse to do this.]

Signed-off-by: Harald Hoyer <harald@redhat.com>
Fixes: 0cc698af36ff ("vTPM: support little endian guests")
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/tpm/tpm_eventlog.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/char/tpm/tpm_eventlog.c b/drivers/char/tpm/tpm_eventlog.c
index bd72fb04225e..4e6940acf639 100644
--- a/drivers/char/tpm/tpm_eventlog.c
+++ b/drivers/char/tpm/tpm_eventlog.c
@@ -232,7 +232,7 @@ static int tpm_binary_bios_measurements_show(struct seq_file *m, void *v)
 {
 	struct tcpa_event *event = v;
 	struct tcpa_event temp_event;
-	char *tempPtr;
+	char *temp_ptr;
 	int i;
 
 	memcpy(&temp_event, event, sizeof(struct tcpa_event));
@@ -242,10 +242,16 @@ static int tpm_binary_bios_measurements_show(struct seq_file *m, void *v)
 	temp_event.event_type = do_endian_conversion(event->event_type);
 	temp_event.event_size = do_endian_conversion(event->event_size);
 
-	tempPtr = (char *)&temp_event;
+	temp_ptr = (char *) &temp_event;
 
-	for (i = 0; i < sizeof(struct tcpa_event) + temp_event.event_size; i++)
-		seq_putc(m, tempPtr[i]);
+	for (i = 0; i < (sizeof(struct tcpa_event) - 1) ; i++)
+		seq_putc(m, temp_ptr[i]);
+
+	temp_ptr = (char *) v;
+
+	for (i = (sizeof(struct tcpa_event) - 1);
+	     i < (sizeof(struct tcpa_event) + temp_event.event_size); i++)
+		seq_putc(m, temp_ptr[i]);
 
 	return 0;
 

From 158c0029a8861591d42c6ad144d09cbd2c2dfe81 Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Date: Mon, 8 Feb 2016 22:31:08 +0200
Subject: [PATCH 186/797] tpm: fix the cleanup of struct tpm_chip

commit 8e0ee3c9faed7ca68807ea45141775856c438ac0 upstream.

If the initialization fails before tpm_chip_register(), put_device()
will be not called, which causes release callback not to be called.
This patch fixes the issue by adding put_device() to devres list of
the parent device.

Fixes: 313d21eeab ("tpm: device class for tpm")
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Reviewed-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/tpm/tpm-chip.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
index 1a9dcee8da5a..252142524ff2 100644
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@@ -136,6 +136,8 @@ struct tpm_chip *tpmm_chip_alloc(struct device *dev,
 	chip->cdev.owner = chip->pdev->driver->owner;
 	chip->cdev.kobj.parent = &chip->dev.kobj;
 
+	devm_add_action(dev, (void (*)(void *)) put_device, &chip->dev);
+
 	return chip;
 }
 EXPORT_SYMBOL_GPL(tpmm_chip_alloc);
@@ -171,7 +173,7 @@ static int tpm_add_char_device(struct tpm_chip *chip)
 static void tpm_del_char_device(struct tpm_chip *chip)
 {
 	cdev_del(&chip->cdev);
-	device_unregister(&chip->dev);
+	device_del(&chip->dev);
 }
 
 static int tpm1_chip_register(struct tpm_chip *chip)

From 72c49c697bb63054c7c3a26b90d15fb3a95e30eb Mon Sep 17 00:00:00 2001
From: Grazvydas Ignotas <notasas@gmail.com>
Date: Sat, 13 Feb 2016 22:41:51 +0200
Subject: [PATCH 187/797] HID: logitech: fix Dual Action gamepad support

commit 5d74325a2201376a95520a4a38a1ce2c65761c49 upstream.

The patch that added Logitech Dual Action gamepad support forgot to
update the special driver list for the device. This caused the logitech
driver not to probe unless kernel module load order was favorable.
Update the special driver list to fix it. Thanks to Simon Wood for the
idea.

Cc: Vitaly Katraew <zawullon@gmail.com>
Fixes: 56d0c8b7c8fb ("HID: add support for Logitech Dual Action gamepads")
Signed-off-by: Grazvydas Ignotas <notasas@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hid/hid-core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index c6f7a694f67a..f16155f5b6e2 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1897,6 +1897,7 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_ELITE_KBD) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_CORDLESS_DESKTOP_LX500) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_EXTREME_3D) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_DUAL_ACTION) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WHEEL) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD_CORD) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD) },

From 773332f0e2b1b530079c812975833c8c2d59e4d7 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor@chromium.org>
Date: Mon, 14 Mar 2016 15:21:04 -0700
Subject: [PATCH 188/797] HID: i2c-hid: fix OOB write in
 i2c_hid_set_or_send_report()

commit 3b654288b196ceaa156029d9457ccbded0489b98 upstream.

Even though hid_hw_* checks that passed in data_len is less than
HID_MAX_BUFFER_SIZE it is not enough, as i2c-hid does not necessarily
allocate buffers of HID_MAX_BUFFER_SIZE but rather checks all device
reports and select largest size. In-kernel users normally just send as much
data as report needs, so there is no problem, but hidraw users can do
whatever they please:

BUG: KASAN: slab-out-of-bounds in memcpy+0x34/0x54 at addr ffffffc07135ea80
Write of size 4101 by task syz-executor/8747
CPU: 2 PID: 8747 Comm: syz-executor Tainted: G    BU         3.18.0 #37
Hardware name: Google Tegra210 Smaug Rev 1,3+ (DT)
Call trace:
[<ffffffc00020ebcc>] dump_backtrace+0x0/0x258 arch/arm64/kernel/traps.c:83
[<ffffffc00020ee40>] show_stack+0x1c/0x2c arch/arm64/kernel/traps.c:172
[<     inline     >] __dump_stack lib/dump_stack.c:15
[<ffffffc001958114>] dump_stack+0x90/0x140 lib/dump_stack.c:50
[<     inline     >] print_error_description mm/kasan/report.c:97
[<     inline     >] kasan_report_error mm/kasan/report.c:278
[<ffffffc0004597dc>] kasan_report+0x268/0x530 mm/kasan/report.c:305
[<ffffffc0004592e8>] __asan_storeN+0x20/0x150 mm/kasan/kasan.c:718
[<ffffffc0004594e0>] memcpy+0x30/0x54 mm/kasan/kasan.c:299
[<ffffffc001306354>] __i2c_hid_command+0x2b0/0x7b4 drivers/hid/i2c-hid/i2c-hid.c:178
[<     inline     >] i2c_hid_set_or_send_report drivers/hid/i2c-hid/i2c-hid.c:321
[<ffffffc0013079a0>] i2c_hid_output_raw_report.isra.2+0x3d4/0x4b8 drivers/hid/i2c-hid/i2c-hid.c:589
[<ffffffc001307ad8>] i2c_hid_output_report+0x54/0x68 drivers/hid/i2c-hid/i2c-hid.c:602
[<     inline     >] hid_hw_output_report include/linux/hid.h:1039
[<ffffffc0012cc7a0>] hidraw_send_report+0x400/0x414 drivers/hid/hidraw.c:154
[<ffffffc0012cc7f4>] hidraw_write+0x40/0x64 drivers/hid/hidraw.c:177
[<ffffffc0004681dc>] vfs_write+0x1d4/0x3cc fs/read_write.c:534
[<     inline     >] SYSC_pwrite64 fs/read_write.c:627
[<ffffffc000468984>] SyS_pwrite64+0xec/0x144 fs/read_write.c:614
Object at ffffffc07135ea80, in cache kmalloc-512
Object allocated with size 268 bytes.

Let's check data length against the buffer size before attempting to copy
data over.

Reported-by: Alexander Potapenko <glider@google.com>
Signed-off-by: Dmitry Torokhov <dtor@chromium.org>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hid/i2c-hid/i2c-hid.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c
index 10bd8e6e4c9c..0b80633bae91 100644
--- a/drivers/hid/i2c-hid/i2c-hid.c
+++ b/drivers/hid/i2c-hid/i2c-hid.c
@@ -282,18 +282,22 @@ static int i2c_hid_set_or_send_report(struct i2c_client *client, u8 reportType,
 	u16 dataRegister = le16_to_cpu(ihid->hdesc.wDataRegister);
 	u16 outputRegister = le16_to_cpu(ihid->hdesc.wOutputRegister);
 	u16 maxOutputLength = le16_to_cpu(ihid->hdesc.wMaxOutputLength);
-
-	/* hid_hw_* already checked that data_len < HID_MAX_BUFFER_SIZE */
-	u16 size =	2			/* size */ +
-			(reportID ? 1 : 0)	/* reportID */ +
-			data_len		/* buf */;
-	int args_len =	(reportID >= 0x0F ? 1 : 0) /* optional third byte */ +
-			2			/* dataRegister */ +
-			size			/* args */;
+	u16 size;
+	int args_len;
 	int index = 0;
 
 	i2c_hid_dbg(ihid, "%s\n", __func__);
 
+	if (data_len > ihid->bufsize)
+		return -EINVAL;
+
+	size =		2			/* size */ +
+			(reportID ? 1 : 0)	/* reportID */ +
+			data_len		/* buf */;
+	args_len =	(reportID >= 0x0F ? 1 : 0) /* optional third byte */ +
+			2			/* dataRegister */ +
+			size			/* args */;
+
 	if (!use_data && maxOutputLength == 0)
 		return -ENOSYS;
 

From cca86656508d12f998e11ee7f71b8a40864af3b4 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Mon, 7 Mar 2016 11:02:38 +0100
Subject: [PATCH 189/797] HID: multitouch: force retrieving of Win8 signature
 blob

commit 45c5c6828214605eaefa6755c47bd1a2c7eb203e upstream.

The Synaptics 0x11e5 over I2C found in the Asus T100-CHI requires to
fetch the signature blob to actually start sending events.

With this patch, we should be close enough to the Windows driver which
checks the content of the blob at plugin to validate or not the
touchscreen.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=113481
Fixes: 6d4f5440 ("HID: multitouch: Fetch feature reports on demand for Win8 devices")
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hid/hid-multitouch.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index 2b8ff18d3713..c5ec4f915594 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -396,6 +396,11 @@ static void mt_feature_mapping(struct hid_device *hdev,
 			td->is_buttonpad = true;
 
 		break;
+	case 0xff0000c5:
+		/* Retrieve the Win8 blob once to enable some devices */
+		if (usage->usage_index == 0)
+			mt_get_feature(hdev, field->report);
+		break;
 	}
 }
 

From b2fb06096e2538e131bd9551eb5a70ba42c1b3f7 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Fri, 12 Feb 2016 17:10:37 +0100
Subject: [PATCH 190/797] HID: fix hid_ignore_special_drivers module parameter

commit 4392bf333388cabdad5afe5b1500002d7b9c318e upstream.

hid_ignore_special_drivers works fine until hid_scan_report autodetects and
reassign devices (for hid-multitouch, hid-microsoft and hid-rmi).

Simplify the handling of the parameter: if it is there, use hid-generic, no
matter what, and if not, scan the device or rely on the hid_have_special_driver
table.

This was detected while trying to disable hid-multitouch on a Surface Pro cover
which prevented to use the keyboard.

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hid/hid-core.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index f16155f5b6e2..ec791e169f8f 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -2616,9 +2616,10 @@ int hid_add_device(struct hid_device *hdev)
 	/*
 	 * Scan generic devices for group information
 	 */
-	if (hid_ignore_special_drivers ||
-	    (!hdev->group &&
-	     !hid_match_id(hdev, hid_have_special_driver))) {
+	if (hid_ignore_special_drivers) {
+		hdev->group = HID_GROUP_GENERIC;
+	} else if (!hdev->group &&
+		   !hid_match_id(hdev, hid_have_special_driver)) {
 		ret = hid_scan_report(hdev);
 		if (ret)
 			hid_warn(hdev, "bad device descriptor (%d)\n", ret);

From a5e8deb7f07fb8f7d34c908dea303ba948752916 Mon Sep 17 00:00:00 2001
From: "Spencer E. Olson" <olsonse@umich.edu>
Date: Tue, 12 Jan 2016 10:33:18 -0700
Subject: [PATCH 191/797] staging: comedi: ni_tiocmd: change mistaken use of
 start_src for start_arg

commit 1fd24a4702d2af0ea4d5845126cf57d4d1796216 upstream.

This fixes a bug in function ni_tio_input_inttrig().  The trigger number
should be compared to cmd->start_arg, not cmd->start_src.

Fixes: 6a760394d7eb ("staging: comedi: ni_tiocmd: clarify the cmd->start_arg validation and use")
Signed-off-by: Spencer E. Olson <olsonse@umich.edu>
Reviewed-by: Ian Abbott <abbotti@mev.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/comedi/drivers/ni_tiocmd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/comedi/drivers/ni_tiocmd.c b/drivers/staging/comedi/drivers/ni_tiocmd.c
index 437f723bb34d..823e47910004 100644
--- a/drivers/staging/comedi/drivers/ni_tiocmd.c
+++ b/drivers/staging/comedi/drivers/ni_tiocmd.c
@@ -92,7 +92,7 @@ static int ni_tio_input_inttrig(struct comedi_device *dev,
 	unsigned long flags;
 	int ret = 0;
 
-	if (trig_num != cmd->start_src)
+	if (trig_num != cmd->start_arg)
 		return -EINVAL;
 
 	spin_lock_irqsave(&counter->lock, flags);

From 263b0af7cc419a6f5254269a30e9784b5476f433 Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vz@mleia.com>
Date: Wed, 23 Mar 2016 00:38:43 +0200
Subject: [PATCH 192/797] staging: android: ion_test: fix check of
 platform_device_register_simple() error code

commit ccbc2a9e7878ff09bcaed4893c2a2d3adbb797e2 upstream.

On error platform_device_register_simple() returns ERR_PTR() value,
check for NULL always fails. The change corrects the check itself and
propagates the returned error upwards.

Fixes: 81fb0b901397 ("staging: android: ion_test: unregister the platform device")
Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/android/ion/ion_test.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/android/ion/ion_test.c b/drivers/staging/android/ion/ion_test.c
index b8dcf5a26cc4..58d46893e5ff 100644
--- a/drivers/staging/android/ion/ion_test.c
+++ b/drivers/staging/android/ion/ion_test.c
@@ -285,8 +285,8 @@ static int __init ion_test_init(void)
 {
 	ion_test_pdev = platform_device_register_simple("ion-test",
 							-1, NULL, 0);
-	if (!ion_test_pdev)
-		return -ENODEV;
+	if (IS_ERR(ion_test_pdev))
+		return PTR_ERR(ion_test_pdev);
 
 	return platform_driver_probe(&ion_test_platform_driver, ion_test_probe);
 }

From 62fe263236e09a5b4ec64bc71a42d37caf943eca Mon Sep 17 00:00:00 2001
From: H Hartley Sweeten <hsweeten@visionengravers.com>
Date: Tue, 22 Mar 2016 10:04:48 -0700
Subject: [PATCH 193/797] staging: comedi: ni_mio_common: fix the
 ni_write[blw]() functions

commit bd3a3cd6c27b117fb9a43a38c8072c95332beecc upstream.

Memory mapped io (dev->mmio) should not also be writing to the ioport
(dev->iobase) registers. Add the missing 'else' to these functions.

Fixes: 0953ee4acca0 ("staging: comedi: ni_mio_common: checkpatch.pl cleanup (else not useful)")
Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Reviewed-by: Ian Abbott <abbotti@mev.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/comedi/drivers/ni_mio_common.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c
index 6cc304a4c59b..27fbf1a81097 100644
--- a/drivers/staging/comedi/drivers/ni_mio_common.c
+++ b/drivers/staging/comedi/drivers/ni_mio_common.c
@@ -246,24 +246,24 @@ static void ni_writel(struct comedi_device *dev, uint32_t data, int reg)
 {
 	if (dev->mmio)
 		writel(data, dev->mmio + reg);
-
-	outl(data, dev->iobase + reg);
+	else
+		outl(data, dev->iobase + reg);
 }
 
 static void ni_writew(struct comedi_device *dev, uint16_t data, int reg)
 {
 	if (dev->mmio)
 		writew(data, dev->mmio + reg);
-
-	outw(data, dev->iobase + reg);
+	else
+		outw(data, dev->iobase + reg);
 }
 
 static void ni_writeb(struct comedi_device *dev, uint8_t data, int reg)
 {
 	if (dev->mmio)
 		writeb(data, dev->mmio + reg);
-
-	outb(data, dev->iobase + reg);
+	else
+		outb(data, dev->iobase + reg);
 }
 
 static uint32_t ni_readl(struct comedi_device *dev, int reg)

From 583aacb1f69b733e6808625fbc44da8f499c9bfb Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sun, 10 Jan 2016 20:36:12 -0800
Subject: [PATCH 194/797] tty: Fix GPF in flush_to_ldisc(), part 2

commit f33798deecbd59a2955f40ac0ae2bc7dff54c069 upstream.

commit 9ce119f318ba ("tty: Fix GPF in flush_to_ldisc()") fixed a
GPF caused by a line discipline which does not define a receive_buf()
method.

However, the vt driver (and speakup driver also) pushes selection
data directly to the line discipline receive_buf() method via
tty_ldisc_receive_buf(). Fix the same problem in tty_ldisc_receive_buf().

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/tty.h b/include/linux/tty.h
index 6b6e811f4575..3bf03b6b52e9 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -594,7 +594,7 @@ static inline int tty_ldisc_receive_buf(struct tty_ldisc *ld, unsigned char *p,
 		count = ld->ops->receive_buf2(ld->tty, p, f, count);
 	else {
 		count = min_t(int, count, ld->tty->receive_room);
-		if (count)
+		if (count && ld->ops->receive_buf)
 			ld->ops->receive_buf(ld->tty, p, f, count);
 	}
 	return count;

From 456db805f77c45611caa217996dcb93f24afb2e5 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sat, 9 Jan 2016 17:48:45 -0800
Subject: [PATCH 195/797] net: irda: Fix use-after-free in irtty_open()

commit 401879c57f01cbf2da204ad2e8db910525c6dbea upstream.

The N_IRDA line discipline may access the previous line discipline's closed
and already-fre private data on open [1].

The tty->disc_data field _never_ refers to valid data on entry to the
line discipline's open() method. Rather, the ldisc is expected to
initialize that field for its own use for the lifetime of the instance
(ie. from open() to close() only).

[1]
    ==================================================================
    BUG: KASAN: use-after-free in irtty_open+0x422/0x550 at addr ffff8800331dd068
    Read of size 4 by task a.out/13960
    =============================================================================
    BUG kmalloc-512 (Tainted: G    B          ): kasan: bad access detected
    -----------------------------------------------------------------------------
    ...
    Call Trace:
     [<ffffffff815fa2ae>] __asan_report_load4_noabort+0x3e/0x40 mm/kasan/report.c:279
     [<ffffffff836938a2>] irtty_open+0x422/0x550 drivers/net/irda/irtty-sir.c:436
     [<ffffffff829f1b80>] tty_ldisc_open.isra.2+0x60/0xa0 drivers/tty/tty_ldisc.c:447
     [<ffffffff829f21c0>] tty_set_ldisc+0x1a0/0x940 drivers/tty/tty_ldisc.c:567
     [<     inline     >] tiocsetd drivers/tty/tty_io.c:2650
     [<ffffffff829da49e>] tty_ioctl+0xace/0x1fd0 drivers/tty/tty_io.c:2883
     [<     inline     >] vfs_ioctl fs/ioctl.c:43
     [<ffffffff816708ac>] do_vfs_ioctl+0x57c/0xe60 fs/ioctl.c:607
     [<     inline     >] SYSC_ioctl fs/ioctl.c:622
     [<ffffffff81671204>] SyS_ioctl+0x74/0x80 fs/ioctl.c:613
     [<ffffffff852a7876>] entry_SYSCALL_64_fastpath+0x16/0x7a

Reported-and-tested-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/irda/irtty-sir.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/drivers/net/irda/irtty-sir.c b/drivers/net/irda/irtty-sir.c
index 696852eb23c3..7a3f990c1935 100644
--- a/drivers/net/irda/irtty-sir.c
+++ b/drivers/net/irda/irtty-sir.c
@@ -430,16 +430,6 @@ static int irtty_open(struct tty_struct *tty)
 
 	/* Module stuff handled via irda_ldisc.owner - Jean II */
 
-	/* First make sure we're not already connected. */
-	if (tty->disc_data != NULL) {
-		priv = tty->disc_data;
-		if (priv && priv->magic == IRTTY_MAGIC) {
-			ret = -EEXIST;
-			goto out;
-		}
-		tty->disc_data = NULL;		/* ### */
-	}
-
 	/* stop the underlying  driver */
 	irtty_stop_receiver(tty, TRUE);
 	if (tty->ops->stop)

From fc0768092cebd0b70a08f5423263669ea3849ef9 Mon Sep 17 00:00:00 2001
From: Sebastian Frias <sf84@laposte.net>
Date: Fri, 18 Dec 2015 17:40:05 +0100
Subject: [PATCH 196/797] 8250: use callbacks to access UART_DLL/UART_DLM

commit 0b41ce991052022c030fd868e03877700220b090 upstream.

Some UART HW has a single register combining UART_DLL/UART_DLM
(this was probably forgotten in the change that introduced the
callbacks, commit b32b19b8ffc05cbd3bf91c65e205f6a912ca15d9)

Fixes: b32b19b8ffc0 ("[SERIAL] 8250: set divisor register correctly ...")

Signed-off-by: Sebastian Frias <sf84@laposte.net>
Reviewed-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_port.c | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 52d82d2ac726..56ccbcefdd85 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -713,22 +713,16 @@ static int size_fifo(struct uart_8250_port *up)
  */
 static unsigned int autoconfig_read_divisor_id(struct uart_8250_port *p)
 {
-	unsigned char old_dll, old_dlm, old_lcr;
-	unsigned int id;
+	unsigned char old_lcr;
+	unsigned int id, old_dl;
 
 	old_lcr = serial_in(p, UART_LCR);
 	serial_out(p, UART_LCR, UART_LCR_CONF_MODE_A);
+	old_dl = serial_dl_read(p);
+	serial_dl_write(p, 0);
+	id = serial_dl_read(p);
+	serial_dl_write(p, old_dl);
 
-	old_dll = serial_in(p, UART_DLL);
-	old_dlm = serial_in(p, UART_DLM);
-
-	serial_out(p, UART_DLL, 0);
-	serial_out(p, UART_DLM, 0);
-
-	id = serial_in(p, UART_DLL) | serial_in(p, UART_DLM) << 8;
-
-	serial_out(p, UART_DLL, old_dll);
-	serial_out(p, UART_DLM, old_dlm);
 	serial_out(p, UART_LCR, old_lcr);
 
 	return id;

From 4fe401d63051c524e45d2a1c57bf1e34ecc19a7b Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 14 Feb 2016 17:51:37 -0200
Subject: [PATCH 197/797] saa7134: Fix bytesperline not being set correctly for
 planar formats

commit 3e71da19f9dc22e39a755d6ae9678661abb66adc upstream.

bytesperline should be the bytesperline for the first plane for planar
formats, not that of all planes combined.

This fixes a crash in xawtv caused by the wrong bpl.

BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1305389
Reported-and-tested-by: Stas Sergeev <stsp@list.ru>

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/pci/saa7134/saa7134-video.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/media/pci/saa7134/saa7134-video.c b/drivers/media/pci/saa7134/saa7134-video.c
index 518086c7aed5..15e56c07b217 100644
--- a/drivers/media/pci/saa7134/saa7134-video.c
+++ b/drivers/media/pci/saa7134/saa7134-video.c
@@ -1219,10 +1219,13 @@ static int saa7134_g_fmt_vid_cap(struct file *file, void *priv,
 	f->fmt.pix.height       = dev->height;
 	f->fmt.pix.field        = dev->field;
 	f->fmt.pix.pixelformat  = dev->fmt->fourcc;
-	f->fmt.pix.bytesperline =
-		(f->fmt.pix.width * dev->fmt->depth) >> 3;
+	if (dev->fmt->planar)
+		f->fmt.pix.bytesperline = f->fmt.pix.width;
+	else
+		f->fmt.pix.bytesperline =
+			(f->fmt.pix.width * dev->fmt->depth) / 8;
 	f->fmt.pix.sizeimage =
-		f->fmt.pix.height * f->fmt.pix.bytesperline;
+		(f->fmt.pix.height * f->fmt.pix.width * dev->fmt->depth) / 8;
 	f->fmt.pix.colorspace   = V4L2_COLORSPACE_SMPTE170M;
 	return 0;
 }
@@ -1298,10 +1301,13 @@ static int saa7134_try_fmt_vid_cap(struct file *file, void *priv,
 	if (f->fmt.pix.height > maxh)
 		f->fmt.pix.height = maxh;
 	f->fmt.pix.width &= ~0x03;
-	f->fmt.pix.bytesperline =
-		(f->fmt.pix.width * fmt->depth) >> 3;
+	if (fmt->planar)
+		f->fmt.pix.bytesperline = f->fmt.pix.width;
+	else
+		f->fmt.pix.bytesperline =
+			(f->fmt.pix.width * fmt->depth) / 8;
 	f->fmt.pix.sizeimage =
-		f->fmt.pix.height * f->fmt.pix.bytesperline;
+		(f->fmt.pix.height * f->fmt.pix.width * fmt->depth) / 8;
 	f->fmt.pix.colorspace   = V4L2_COLORSPACE_SMPTE170M;
 
 	return 0;

From 454b8cbea62141cdaee224a118678c890c7aa186 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Wed, 10 Feb 2016 09:32:25 -0200
Subject: [PATCH 198/797] adv7511: TX_EDID_PRESENT is still 1 after a
 disconnect

commit b339a72e04a62f0b1882c43492fc712f1176b3e6 upstream.

The V4L2_CID_TX_EDID_PRESENT control reports if an EDID is present.
The adv7511 however still reported the EDID present after disconnecting
the HDMI cable. Fix the logic regarding this control. And when the EDID
is disconnected also call ADV7511_EDID_DETECT to notify the bridge driver.
This was also missing.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/i2c/adv7511.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/media/i2c/adv7511.c b/drivers/media/i2c/adv7511.c
index e4900df1140b..c24839cfcc35 100644
--- a/drivers/media/i2c/adv7511.c
+++ b/drivers/media/i2c/adv7511.c
@@ -1161,12 +1161,23 @@ static void adv7511_dbg_dump_edid(int lvl, int debug, struct v4l2_subdev *sd, in
 	}
 }
 
+static void adv7511_notify_no_edid(struct v4l2_subdev *sd)
+{
+	struct adv7511_state *state = get_adv7511_state(sd);
+	struct adv7511_edid_detect ed;
+
+	/* We failed to read the EDID, so send an event for this. */
+	ed.present = false;
+	ed.segment = adv7511_rd(sd, 0xc4);
+	v4l2_subdev_notify(sd, ADV7511_EDID_DETECT, (void *)&ed);
+	v4l2_ctrl_s_ctrl(state->have_edid0_ctrl, 0x0);
+}
+
 static void adv7511_edid_handler(struct work_struct *work)
 {
 	struct delayed_work *dwork = to_delayed_work(work);
 	struct adv7511_state *state = container_of(dwork, struct adv7511_state, edid_handler);
 	struct v4l2_subdev *sd = &state->sd;
-	struct adv7511_edid_detect ed;
 
 	v4l2_dbg(1, debug, sd, "%s:\n", __func__);
 
@@ -1191,9 +1202,7 @@ static void adv7511_edid_handler(struct work_struct *work)
 	}
 
 	/* We failed to read the EDID, so send an event for this. */
-	ed.present = false;
-	ed.segment = adv7511_rd(sd, 0xc4);
-	v4l2_subdev_notify(sd, ADV7511_EDID_DETECT, (void *)&ed);
+	adv7511_notify_no_edid(sd);
 	v4l2_dbg(1, debug, sd, "%s: no edid found\n", __func__);
 }
 
@@ -1264,7 +1273,6 @@ static void adv7511_check_monitor_present_status(struct v4l2_subdev *sd)
 	/* update read only ctrls */
 	v4l2_ctrl_s_ctrl(state->hotplug_ctrl, adv7511_have_hotplug(sd) ? 0x1 : 0x0);
 	v4l2_ctrl_s_ctrl(state->rx_sense_ctrl, adv7511_have_rx_sense(sd) ? 0x1 : 0x0);
-	v4l2_ctrl_s_ctrl(state->have_edid0_ctrl, state->edid.segments ? 0x1 : 0x0);
 
 	if ((status & MASK_ADV7511_HPD_DETECT) && ((status & MASK_ADV7511_MSEN_DETECT) || state->edid.segments)) {
 		v4l2_dbg(1, debug, sd, "%s: hotplug and (rx-sense or edid)\n", __func__);
@@ -1294,6 +1302,7 @@ static void adv7511_check_monitor_present_status(struct v4l2_subdev *sd)
 		}
 		adv7511_s_power(sd, false);
 		memset(&state->edid, 0, sizeof(struct adv7511_state_edid));
+		adv7511_notify_no_edid(sd);
 	}
 }
 
@@ -1370,6 +1379,7 @@ static bool adv7511_check_edid_status(struct v4l2_subdev *sd)
 		}
 		/* one more segment read ok */
 		state->edid.segments = segment + 1;
+		v4l2_ctrl_s_ctrl(state->have_edid0_ctrl, 0x1);
 		if (((state->edid.data[0x7e] >> 1) + 1) > state->edid.segments) {
 			/* Request next EDID segment */
 			v4l2_dbg(1, debug, sd, "%s: request segment %d\n", __func__, state->edid.segments);
@@ -1389,7 +1399,6 @@ static bool adv7511_check_edid_status(struct v4l2_subdev *sd)
 		ed.present = true;
 		ed.segment = 0;
 		state->edid_detect_counter++;
-		v4l2_ctrl_s_ctrl(state->have_edid0_ctrl, state->edid.segments ? 0x1 : 0x0);
 		v4l2_subdev_notify(sd, ADV7511_EDID_DETECT, (void *)&ed);
 		return ed.present;
 	}

From 3c26bcd82af503a92ae4e10223707f2b51ec3cd9 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 7 Feb 2016 09:24:29 -0200
Subject: [PATCH 199/797] bttv: Width must be a multiple of 16 when capturing
 planar formats

commit 5c915c68763889f0183a1cc61c84bb228b60124a upstream.

On my bttv card "Hauppauge WinTV [card=10]" capturing in YV12 fmt at max
size results in a solid green rectangle being captured (all colors 0 in
YUV).

This turns out to be caused by max-width (924) not being a multiple of 16.

We've likely never hit this problem before since normally xawtv / tvtime,
etc. will prefer packed pixel formats. But when using a video card which
is using xf86-video-modesetting + glamor, only planar XVideo fmts are
available, and xawtv will chose a matching capture format to avoid needing
to do conversion, triggering the solid green window problem.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/pci/bt8xx/bttv-driver.c | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/drivers/media/pci/bt8xx/bttv-driver.c b/drivers/media/pci/bt8xx/bttv-driver.c
index 15a4ebc2844d..51dbef2f9a48 100644
--- a/drivers/media/pci/bt8xx/bttv-driver.c
+++ b/drivers/media/pci/bt8xx/bttv-driver.c
@@ -2334,6 +2334,19 @@ static int bttv_g_fmt_vid_overlay(struct file *file, void *priv,
 	return 0;
 }
 
+static void bttv_get_width_mask_vid_cap(const struct bttv_format *fmt,
+					unsigned int *width_mask,
+					unsigned int *width_bias)
+{
+	if (fmt->flags & FORMAT_FLAGS_PLANAR) {
+		*width_mask = ~15; /* width must be a multiple of 16 pixels */
+		*width_bias = 8;   /* nearest */
+	} else {
+		*width_mask = ~3; /* width must be a multiple of 4 pixels */
+		*width_bias = 2;  /* nearest */
+	}
+}
+
 static int bttv_try_fmt_vid_cap(struct file *file, void *priv,
 						struct v4l2_format *f)
 {
@@ -2343,6 +2356,7 @@ static int bttv_try_fmt_vid_cap(struct file *file, void *priv,
 	enum v4l2_field field;
 	__s32 width, height;
 	__s32 height2;
+	unsigned int width_mask, width_bias;
 	int rc;
 
 	fmt = format_by_fourcc(f->fmt.pix.pixelformat);
@@ -2375,9 +2389,9 @@ static int bttv_try_fmt_vid_cap(struct file *file, void *priv,
 	width = f->fmt.pix.width;
 	height = f->fmt.pix.height;
 
+	bttv_get_width_mask_vid_cap(fmt, &width_mask, &width_bias);
 	rc = limit_scaled_size_lock(fh, &width, &height, field,
-			       /* width_mask: 4 pixels */ ~3,
-			       /* width_bias: nearest */ 2,
+			       width_mask, width_bias,
 			       /* adjust_size */ 1,
 			       /* adjust_crop */ 0);
 	if (0 != rc)
@@ -2410,6 +2424,7 @@ static int bttv_s_fmt_vid_cap(struct file *file, void *priv,
 	struct bttv_fh *fh = priv;
 	struct bttv *btv = fh->btv;
 	__s32 width, height;
+	unsigned int width_mask, width_bias;
 	enum v4l2_field field;
 
 	retval = bttv_switch_type(fh, f->type);
@@ -2424,9 +2439,10 @@ static int bttv_s_fmt_vid_cap(struct file *file, void *priv,
 	height = f->fmt.pix.height;
 	field = f->fmt.pix.field;
 
+	fmt = format_by_fourcc(f->fmt.pix.pixelformat);
+	bttv_get_width_mask_vid_cap(fmt, &width_mask, &width_bias);
 	retval = limit_scaled_size_lock(fh, &width, &height, f->fmt.pix.field,
-			       /* width_mask: 4 pixels */ ~3,
-			       /* width_bias: nearest */ 2,
+			       width_mask, width_bias,
 			       /* adjust_size */ 1,
 			       /* adjust_crop */ 1);
 	if (0 != retval)
@@ -2434,8 +2450,6 @@ static int bttv_s_fmt_vid_cap(struct file *file, void *priv,
 
 	f->fmt.pix.field = field;
 
-	fmt = format_by_fourcc(f->fmt.pix.pixelformat);
-
 	/* update our state informations */
 	fh->fmt              = fmt;
 	fh->cap.field        = f->fmt.pix.field;

From 6aed423a0e54306003228d3b68196c542af6cd59 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Mon, 4 Jan 2016 17:30:09 -0200
Subject: [PATCH 200/797] coda: fix first encoded frame payload

commit 74dc385cb450089b28c28be2c8a0baca296b95f9 upstream.

During the recent vb2_buffer restructuring, the calculation of the
buffer payload reported to userspace was accidentally broken for the
first encoded frame, counting only the length of the headers.
This patch re-adds the length of the actual frame data.

Fixes: 2d7007153f0c ("[media] media: videobuf2: Restructure vb2_buffer")

Reported-by: Michael Olbrich <m.olbrich@pengutronix.de>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Tested-by: Jan Luebbe <jlu@pengutronix.de>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/platform/coda/coda-bit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/platform/coda/coda-bit.c b/drivers/media/platform/coda/coda-bit.c
index 654e964f84a2..d76511c1c1e3 100644
--- a/drivers/media/platform/coda/coda-bit.c
+++ b/drivers/media/platform/coda/coda-bit.c
@@ -1342,7 +1342,7 @@ static void coda_finish_encode(struct coda_ctx *ctx)
 
 	/* Calculate bytesused field */
 	if (dst_buf->sequence == 0) {
-		vb2_set_plane_payload(&dst_buf->vb2_buf, 0,
+		vb2_set_plane_payload(&dst_buf->vb2_buf, 0, wr_ptr - start_ptr +
 					ctx->vpu_header_size[0] +
 					ctx->vpu_header_size[1] +
 					ctx->vpu_header_size[2]);

From 8c1fa99764bd76be5707447b40aa0806acfc61bf Mon Sep 17 00:00:00 2001
From: Tiffany Lin <tiffany.lin@mediatek.com>
Date: Tue, 19 Jan 2016 05:56:50 -0200
Subject: [PATCH 201/797] media: v4l2-compat-ioctl32: fix missing length copy
 in put_v4l2_buffer32

commit 7df5ab8774aa383c6d2bff00688d004585d96dfd upstream.

In v4l2-compliance utility, test QUERYBUF required correct length
value to go through each planar to check planar's length in
multi-planar buffer type

Signed-off-by: Tiffany Lin <tiffany.lin@mediatek.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 21 +++++++------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
index 327e83ac2469..f38c076752ce 100644
--- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
+++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
@@ -415,7 +415,8 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user
 		get_user(kp->index, &up->index) ||
 		get_user(kp->type, &up->type) ||
 		get_user(kp->flags, &up->flags) ||
-		get_user(kp->memory, &up->memory))
+		get_user(kp->memory, &up->memory) ||
+		get_user(kp->length, &up->length))
 			return -EFAULT;
 
 	if (V4L2_TYPE_IS_OUTPUT(kp->type))
@@ -427,9 +428,6 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user
 			return -EFAULT;
 
 	if (V4L2_TYPE_IS_MULTIPLANAR(kp->type)) {
-		if (get_user(kp->length, &up->length))
-			return -EFAULT;
-
 		num_planes = kp->length;
 		if (num_planes == 0) {
 			kp->m.planes = NULL;
@@ -462,16 +460,14 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user
 	} else {
 		switch (kp->memory) {
 		case V4L2_MEMORY_MMAP:
-			if (get_user(kp->length, &up->length) ||
-				get_user(kp->m.offset, &up->m.offset))
+			if (get_user(kp->m.offset, &up->m.offset))
 				return -EFAULT;
 			break;
 		case V4L2_MEMORY_USERPTR:
 			{
 			compat_long_t tmp;
 
-			if (get_user(kp->length, &up->length) ||
-			    get_user(tmp, &up->m.userptr))
+			if (get_user(tmp, &up->m.userptr))
 				return -EFAULT;
 
 			kp->m.userptr = (unsigned long)compat_ptr(tmp);
@@ -513,7 +509,8 @@ static int put_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user
 		copy_to_user(&up->timecode, &kp->timecode, sizeof(struct v4l2_timecode)) ||
 		put_user(kp->sequence, &up->sequence) ||
 		put_user(kp->reserved2, &up->reserved2) ||
-		put_user(kp->reserved, &up->reserved))
+		put_user(kp->reserved, &up->reserved) ||
+		put_user(kp->length, &up->length))
 			return -EFAULT;
 
 	if (V4L2_TYPE_IS_MULTIPLANAR(kp->type)) {
@@ -536,13 +533,11 @@ static int put_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user
 	} else {
 		switch (kp->memory) {
 		case V4L2_MEMORY_MMAP:
-			if (put_user(kp->length, &up->length) ||
-				put_user(kp->m.offset, &up->m.offset))
+			if (put_user(kp->m.offset, &up->m.offset))
 				return -EFAULT;
 			break;
 		case V4L2_MEMORY_USERPTR:
-			if (put_user(kp->length, &up->length) ||
-				put_user(kp->m.userptr, &up->m.userptr))
+			if (put_user(kp->m.userptr, &up->m.userptr))
 				return -EFAULT;
 			break;
 		case V4L2_MEMORY_OVERLAY:

From ef87aef51d0de4220bf9c4de34de89f2716b3398 Mon Sep 17 00:00:00 2001
From: Asai Thambi SP <asamymuthupa@micron.com>
Date: Wed, 24 Feb 2016 21:17:32 -0800
Subject: [PATCH 202/797] mtip32xx: Avoid issuing standby immediate cmd during
 FTL rebuild

commit d8a18d2d8f5de55666c6011ed175939d22c8e3d8 upstream.

Prevent standby immediate command from being issued in remove,
suspend and shutdown paths, while drive is in FTL rebuild process.

Signed-off-by: Selvan Mani <smani@micron.com>
Signed-off-by: Vignesh Gunasekaran <vgunasekaran@micron.com>
Signed-off-by: Asai Thambi S P <asamymuthupa@micron.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/block/mtip32xx/mtip32xx.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 3457ac8c03e2..deb0c761d5db 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3270,20 +3270,25 @@ static int mtip_hw_init(struct driver_data *dd)
 	return rv;
 }
 
-static void mtip_standby_drive(struct driver_data *dd)
+static int mtip_standby_drive(struct driver_data *dd)
 {
-	if (dd->sr)
-		return;
+	int rv = 0;
 
+	if (dd->sr || !dd->port)
+		return -ENODEV;
 	/*
 	 * Send standby immediate (E0h) to the drive so that it
 	 * saves its state.
 	 */
 	if (!test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags) &&
-	    !test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag))
-		if (mtip_standby_immediate(dd->port))
+	    !test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag) &&
+	    !test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag)) {
+		rv = mtip_standby_immediate(dd->port);
+		if (rv)
 			dev_warn(&dd->pdev->dev,
 				"STANDBY IMMEDIATE failed\n");
+	}
+	return rv;
 }
 
 /*
@@ -3341,8 +3346,7 @@ static int mtip_hw_shutdown(struct driver_data *dd)
 	 * Send standby immediate (E0h) to the drive so that it
 	 * saves its state.
 	 */
-	if (!dd->sr && dd->port)
-		mtip_standby_immediate(dd->port);
+	mtip_standby_drive(dd);
 
 	return 0;
 }
@@ -3365,7 +3369,7 @@ static int mtip_hw_suspend(struct driver_data *dd)
 	 * Send standby immediate (E0h) to the drive
 	 * so that it saves its state.
 	 */
-	if (mtip_standby_immediate(dd->port) != 0) {
+	if (mtip_standby_drive(dd) != 0) {
 		dev_err(&dd->pdev->dev,
 			"Failed standby-immediate command\n");
 		return -EFAULT;

From 1b899eb4833d3394f37272d38b4b1a26eac30feb Mon Sep 17 00:00:00 2001
From: Asai Thambi SP <asamymuthupa@micron.com>
Date: Wed, 24 Feb 2016 21:16:00 -0800
Subject: [PATCH 203/797] mtip32xx: Fix broken service thread handling

commit cfc05bd31384c4898bf2437a4de5557f3cf9803a upstream.

Service thread does not detect the need for taskfile error hanlding. Fixed the
flag condition to process taskfile error.

Signed-off-by: Selvan Mani <smani@micron.com>
Signed-off-by: Asai Thambi S P <asamymuthupa@micron.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/block/mtip32xx/mtip32xx.c | 6 +++---
 drivers/block/mtip32xx/mtip32xx.h | 5 +++++
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index deb0c761d5db..de4d965139ed 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -2924,9 +2924,7 @@ static int mtip_service_thread(void *data)
 		 * is in progress nor error handling is active
 		 */
 		wait_event_interruptible(port->svc_wait, (port->flags) &&
-			!(port->flags & MTIP_PF_PAUSE_IO));
-
-		set_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags);
+			(port->flags & MTIP_PF_SVC_THD_WORK));
 
 		if (kthread_should_stop() ||
 			test_bit(MTIP_PF_SVC_THD_STOP_BIT, &port->flags))
@@ -2936,6 +2934,8 @@ static int mtip_service_thread(void *data)
 				&dd->dd_flag)))
 			goto st_out;
 
+		set_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags);
+
 restart_eh:
 		/* Demux bits: start with error handling */
 		if (test_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags)) {
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index 3274784008eb..8635239c521f 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -144,6 +144,11 @@ enum {
 	MTIP_PF_REBUILD_BIT         = 6,
 	MTIP_PF_SVC_THD_STOP_BIT    = 8,
 
+	MTIP_PF_SVC_THD_WORK	= ((1 << MTIP_PF_EH_ACTIVE_BIT) |
+				  (1 << MTIP_PF_ISSUE_CMDS_BIT) |
+				  (1 << MTIP_PF_REBUILD_BIT) |
+				  (1 << MTIP_PF_SVC_THD_STOP_BIT)),
+
 	/* below are bit numbers in 'dd_flag' defined in driver_data */
 	MTIP_DDF_SEC_LOCK_BIT	    = 0,
 	MTIP_DDF_REMOVE_PENDING_BIT = 1,

From 828e9f2e8aa20881e1fc46152590d09520161ef8 Mon Sep 17 00:00:00 2001
From: Asai Thambi SP <asamymuthupa@micron.com>
Date: Wed, 24 Feb 2016 21:16:21 -0800
Subject: [PATCH 204/797] mtip32xx: Remove unwanted code from taskfile error
 handler

commit e35b94738a2f7caa12017f69ef385cb6b8028965 upstream.

Remove setting and clearing MTIP_PF_EH_ACTIVE_BIT flag in
mtip_handle_tfe() as they are redundant. Also avoid waking
up service thread from mtip_handle_tfe() because it is
already woken up in case of taskfile error.

Signed-off-by: Selvan Mani <smani@micron.com>
Signed-off-by: Rajesh Kumar Sambandam <rsambandam@micron.com>
Signed-off-by: Asai Thambi S P <asamymuthupa@micron.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/block/mtip32xx/mtip32xx.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index de4d965139ed..a9cc83d6e185 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -618,8 +618,6 @@ static void mtip_handle_tfe(struct driver_data *dd)
 
 	port = dd->port;
 
-	set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
-
 	if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
 		cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL);
 		dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n");
@@ -628,7 +626,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
 			cmd->comp_func(port, MTIP_TAG_INTERNAL,
 					cmd, PORT_IRQ_TF_ERR);
 		}
-		goto handle_tfe_exit;
+		return;
 	}
 
 	/* clear the tag accumulator */
@@ -771,11 +769,6 @@ static void mtip_handle_tfe(struct driver_data *dd)
 		}
 	}
 	print_tags(dd, "reissued (TFE)", tagaccum, cmd_cnt);
-
-handle_tfe_exit:
-	/* clear eh_active */
-	clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
-	wake_up_interruptible(&port->svc_wait);
 }
 
 /*

From 3d58a02e4fdad9cb7ce0799363eaa54c012fcd33 Mon Sep 17 00:00:00 2001
From: Asai Thambi SP <asamymuthupa@micron.com>
Date: Wed, 24 Feb 2016 21:16:38 -0800
Subject: [PATCH 205/797] mtip32xx: Print exact time when an internal command
 is interrupted

commit 5b7e0a8ac85e2dfd83830dc9e0b3554d153a37e3 upstream.

Print exact time when an internal command is interrupted.

Signed-off-by: Selvan Mani <smani@micron.com>
Signed-off-by: Rajesh Kumar Sambandam <rsambandam@micron.com>
Signed-off-by: Asai Thambi S P <asamymuthupa@micron.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/block/mtip32xx/mtip32xx.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index a9cc83d6e185..27feff9a5604 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -1092,6 +1092,7 @@ static int mtip_exec_internal_command(struct mtip_port *port,
 	struct mtip_cmd *int_cmd;
 	struct driver_data *dd = port->dd;
 	int rv = 0;
+	unsigned long start;
 
 	/* Make sure the buffer is 8 byte aligned. This is asic specific. */
 	if (buffer & 0x00000007) {
@@ -1155,6 +1156,8 @@ static int mtip_exec_internal_command(struct mtip_port *port,
 	/* Populate the command header */
 	int_cmd->command_header->byte_count = 0;
 
+	start = jiffies;
+
 	/* Issue the command to the hardware */
 	mtip_issue_non_ncq_command(port, MTIP_TAG_INTERNAL);
 
@@ -1165,8 +1168,9 @@ static int mtip_exec_internal_command(struct mtip_port *port,
 				msecs_to_jiffies(timeout))) <= 0) {
 			if (rv == -ERESTARTSYS) { /* interrupted */
 				dev_err(&dd->pdev->dev,
-					"Internal command [%02X] was interrupted after %lu ms\n",
-					fis->command, timeout);
+					"Internal command [%02X] was interrupted after %u ms\n",
+					fis->command,
+					jiffies_to_msecs(jiffies - start));
 				rv = -EINTR;
 				goto exec_ic_exit;
 			} else if (rv == 0) /* timeout */

From e241a8dab98aff4d79d36e8cc71c4487c909bdd5 Mon Sep 17 00:00:00 2001
From: Asai Thambi SP <asamymuthupa@micron.com>
Date: Wed, 24 Feb 2016 21:17:47 -0800
Subject: [PATCH 206/797] mtip32xx: Fix for rmmod crash when drive is in FTL
 rebuild

commit 59cf70e236c96594d9f1e065755d8fce9df5356b upstream.

When FTL rebuild is in progress, alloc_disk() initializes the disk
but device node will be created by add_disk() only after successful
completion of FTL rebuild. So, skip deletion of device node in
removal path when FTL rebuild is in progress.

Signed-off-by: Selvan Mani <smani@micron.com>
Signed-off-by: Asai Thambi S P <asamymuthupa@micron.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/block/mtip32xx/mtip32xx.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 27feff9a5604..47d96bdf5f9e 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -2975,10 +2975,8 @@ static int mtip_service_thread(void *data)
 		}
 
 		if (test_bit(MTIP_PF_REBUILD_BIT, &port->flags)) {
-			if (mtip_ftl_rebuild_poll(dd) < 0)
-				set_bit(MTIP_DDF_REBUILD_FAILED_BIT,
-							&dd->dd_flag);
-			clear_bit(MTIP_PF_REBUILD_BIT, &port->flags);
+			if (mtip_ftl_rebuild_poll(dd) == 0)
+				clear_bit(MTIP_PF_REBUILD_BIT, &port->flags);
 		}
 	}
 
@@ -3858,7 +3856,6 @@ static int mtip_block_initialize(struct driver_data *dd)
 
 	mtip_hw_debugfs_init(dd);
 
-skip_create_disk:
 	memset(&dd->tags, 0, sizeof(dd->tags));
 	dd->tags.ops = &mtip_mq_ops;
 	dd->tags.nr_hw_queues = 1;
@@ -3888,6 +3885,7 @@ static int mtip_block_initialize(struct driver_data *dd)
 	dd->disk->queue		= dd->queue;
 	dd->queue->queuedata	= dd;
 
+skip_create_disk:
 	/* Initialize the protocol layer. */
 	wait_for_rebuild = mtip_hw_get_identify(dd);
 	if (wait_for_rebuild < 0) {
@@ -4048,7 +4046,8 @@ static int mtip_block_remove(struct driver_data *dd)
 		dd->bdev = NULL;
 	}
 	if (dd->disk) {
-		del_gendisk(dd->disk);
+		if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag))
+			del_gendisk(dd->disk);
 		if (dd->disk->queue) {
 			blk_cleanup_queue(dd->queue);
 			blk_mq_free_tag_set(&dd->tags);
@@ -4089,7 +4088,8 @@ static int mtip_block_shutdown(struct driver_data *dd)
 		dev_info(&dd->pdev->dev,
 			"Shutting down %s ...\n", dd->disk->disk_name);
 
-		del_gendisk(dd->disk);
+		if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag))
+			del_gendisk(dd->disk);
 		if (dd->disk->queue) {
 			blk_cleanup_queue(dd->queue);
 			blk_mq_free_tag_set(&dd->tags);

From e2e6e22464ac6fb53e60d74ddf02fb829959ab9c Mon Sep 17 00:00:00 2001
From: Asai Thambi SP <asamymuthupa@micron.com>
Date: Wed, 24 Feb 2016 21:18:10 -0800
Subject: [PATCH 207/797] mtip32xx: Handle safe removal during IO

commit 51c6570eb922146470c2fe660c34585414679bd6 upstream.

Flush inflight IOs using fsync_bdev() when the device is safely
removed. Also, block further IOs in device open function.

Signed-off-by: Selvan Mani <smani@micron.com>
Signed-off-by: Rajesh Kumar Sambandam <rsambandam@micron.com>
Signed-off-by: Asai Thambi S P <asamymuthupa@micron.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/block/mtip32xx/mtip32xx.c | 34 +++++++++++++++++++++++++++++--
 drivers/block/mtip32xx/mtip32xx.h |  1 +
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 47d96bdf5f9e..cda3efecab8d 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3602,6 +3602,28 @@ static int mtip_block_getgeo(struct block_device *dev,
 	return 0;
 }
 
+static int mtip_block_open(struct block_device *dev, fmode_t mode)
+{
+	struct driver_data *dd;
+
+	if (dev && dev->bd_disk) {
+		dd = (struct driver_data *) dev->bd_disk->private_data;
+
+		if (dd) {
+			if (test_bit(MTIP_DDF_REMOVAL_BIT,
+							&dd->dd_flag)) {
+				return -ENODEV;
+			}
+			return 0;
+		}
+	}
+	return -ENODEV;
+}
+
+void mtip_block_release(struct gendisk *disk, fmode_t mode)
+{
+}
+
 /*
  * Block device operation function.
  *
@@ -3609,6 +3631,8 @@ static int mtip_block_getgeo(struct block_device *dev,
  * layer.
  */
 static const struct block_device_operations mtip_block_ops = {
+	.open		= mtip_block_open,
+	.release	= mtip_block_release,
 	.ioctl		= mtip_block_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= mtip_block_compat_ioctl,
@@ -4434,7 +4458,7 @@ static void mtip_pci_remove(struct pci_dev *pdev)
 	struct driver_data *dd = pci_get_drvdata(pdev);
 	unsigned long flags, to;
 
-	set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag);
+	set_bit(MTIP_DDF_REMOVAL_BIT, &dd->dd_flag);
 
 	spin_lock_irqsave(&dev_lock, flags);
 	list_del_init(&dd->online_list);
@@ -4451,12 +4475,18 @@ static void mtip_pci_remove(struct pci_dev *pdev)
 	} while (atomic_read(&dd->irq_workers_active) != 0 &&
 		time_before(jiffies, to));
 
+	fsync_bdev(dd->bdev);
+
 	if (atomic_read(&dd->irq_workers_active) != 0) {
 		dev_warn(&dd->pdev->dev,
 			"Completion workers still active!\n");
 	}
 
-	blk_mq_stop_hw_queues(dd->queue);
+	if (dd->sr)
+		blk_mq_stop_hw_queues(dd->queue);
+
+	set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag);
+
 	/* Clean up the block layer. */
 	mtip_block_remove(dd);
 
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index 8635239c521f..50af742421e2 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -158,6 +158,7 @@ enum {
 	MTIP_DDF_RESUME_BIT         = 6,
 	MTIP_DDF_INIT_DONE_BIT      = 7,
 	MTIP_DDF_REBUILD_FAILED_BIT = 8,
+	MTIP_DDF_REMOVAL_BIT	    = 9,
 
 	MTIP_DDF_STOP_IO      = ((1 << MTIP_DDF_REMOVE_PENDING_BIT) |
 				(1 << MTIP_DDF_SEC_LOCK_BIT) |

From f75d029f84a7c85a0d0875506c823ba50b10b3a1 Mon Sep 17 00:00:00 2001
From: Asai Thambi SP <asamymuthupa@micron.com>
Date: Wed, 24 Feb 2016 21:18:20 -0800
Subject: [PATCH 208/797] mtip32xx: Handle FTL rebuild failure state during
 device initialization

commit aae4a033868c496adae86fc6f9c3e0c405bbf360 upstream.

Allow device initialization to finish gracefully when it is in
FTL rebuild failure state. Also, recover device out of this state
after successfully secure erasing it.

Signed-off-by: Selvan Mani <smani@micron.com>
Signed-off-by: Vignesh Gunasekaran <vgunasekaran@micron.com>
Signed-off-by: Asai Thambi S P <asamymuthupa@micron.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/block/mtip32xx/mtip32xx.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index cda3efecab8d..6bec93997948 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -699,7 +699,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
 			fail_reason = "thermal shutdown";
 		}
 		if (buf[288] == 0xBF) {
-			set_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag);
+			set_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag);
 			dev_info(&dd->pdev->dev,
 				"Drive indicates rebuild has failed. Secure erase required.\n");
 			fail_all_ncq_cmds = 1;
@@ -1000,6 +1000,7 @@ static bool mtip_pause_ncq(struct mtip_port *port,
 			(fis->features == 0x27 || fis->features == 0x72 ||
 			 fis->features == 0x62 || fis->features == 0x26))) {
 		clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
+		clear_bit(MTIP_DDF_REBUILD_FAILED_BIT, &port->dd->dd_flag);
 		/* Com reset after secure erase or lowlevel format */
 		mtip_restart_port(port);
 		clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
@@ -1166,6 +1167,7 @@ static int mtip_exec_internal_command(struct mtip_port *port,
 		if ((rv = wait_for_completion_interruptible_timeout(
 				&wait,
 				msecs_to_jiffies(timeout))) <= 0) {
+
 			if (rv == -ERESTARTSYS) { /* interrupted */
 				dev_err(&dd->pdev->dev,
 					"Internal command [%02X] was interrupted after %u ms\n",
@@ -3091,7 +3093,7 @@ static int mtip_hw_get_identify(struct driver_data *dd)
 		if (buf[288] == 0xBF) {
 			dev_info(&dd->pdev->dev,
 				"Drive indicates rebuild has failed.\n");
-			/* TODO */
+			set_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag);
 		}
 	}
 
@@ -3694,10 +3696,9 @@ static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
 				rq_data_dir(rq))) {
 			return -ENODATA;
 		}
-		if (unlikely(test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag)))
+		if (unlikely(test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag) ||
+			test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag)))
 			return -ENODATA;
-		if (test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag))
-			return -ENXIO;
 	}
 
 	if (rq->cmd_flags & REQ_DISCARD) {

From d45d26e491c01c98934a7d941343df8477cad38f Mon Sep 17 00:00:00 2001
From: Asai Thambi SP <asamymuthupa@micron.com>
Date: Wed, 24 Feb 2016 21:21:13 -0800
Subject: [PATCH 209/797] mtip32xx: Implement timeout handler

commit abb0ccd185c9e31847709b86192e6c815d1f57ad upstream.

Added timeout handler. Replaced blk_mq_end_request() with
blk_mq_complete_request() to avoid double completion of a request.

Signed-off-by: Selvan Mani <smani@micron.com>
Signed-off-by: Rajesh Kumar Sambandam <rsambandam@micron.com>
Signed-off-by: Asai Thambi S P <asamymuthupa@micron.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/block/mtip32xx/mtip32xx.c | 95 ++++++++++++++++++++++++++++---
 drivers/block/mtip32xx/mtip32xx.h |  7 ++-
 2 files changed, 92 insertions(+), 10 deletions(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 6bec93997948..2a9001edce1d 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -233,15 +233,9 @@ static void mtip_async_complete(struct mtip_port *port,
 			"Command tag %d failed due to TFE\n", tag);
 	}
 
-	/* Unmap the DMA scatter list entries */
-	dma_unmap_sg(&dd->pdev->dev, cmd->sg, cmd->scatter_ents, cmd->direction);
-
 	rq = mtip_rq_from_tag(dd, tag);
 
-	if (unlikely(cmd->unaligned))
-		up(&port->cmd_slot_unal);
-
-	blk_mq_end_request(rq, status ? -EIO : 0);
+	blk_mq_complete_request(rq, status);
 }
 
 /*
@@ -2896,6 +2890,42 @@ static int mtip_ftl_rebuild_poll(struct driver_data *dd)
 	return -EFAULT;
 }
 
+static void mtip_softirq_done_fn(struct request *rq)
+{
+	struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
+	struct driver_data *dd = rq->q->queuedata;
+
+	/* Unmap the DMA scatter list entries */
+	dma_unmap_sg(&dd->pdev->dev, cmd->sg, cmd->scatter_ents,
+							cmd->direction);
+
+	if (unlikely(cmd->unaligned))
+		up(&dd->port->cmd_slot_unal);
+
+	blk_mq_end_request(rq, rq->errors);
+}
+
+static void mtip_abort_cmd(struct request *req, void *data,
+							bool reserved)
+{
+	struct driver_data *dd = data;
+
+	dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag);
+
+	clear_bit(req->tag, dd->port->cmds_to_issue);
+	req->errors = -EIO;
+	mtip_softirq_done_fn(req);
+}
+
+static void mtip_queue_cmd(struct request *req, void *data,
+							bool reserved)
+{
+	struct driver_data *dd = data;
+
+	set_bit(req->tag, dd->port->cmds_to_issue);
+	blk_abort_request(req);
+}
+
 /*
  * service thread to issue queued commands
  *
@@ -2908,7 +2938,7 @@ static int mtip_ftl_rebuild_poll(struct driver_data *dd)
 static int mtip_service_thread(void *data)
 {
 	struct driver_data *dd = (struct driver_data *)data;
-	unsigned long slot, slot_start, slot_wrap;
+	unsigned long slot, slot_start, slot_wrap, to;
 	unsigned int num_cmd_slots = dd->slot_groups * 32;
 	struct mtip_port *port = dd->port;
 
@@ -2945,6 +2975,32 @@ static int mtip_service_thread(void *data)
 		if (test_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags))
 			goto restart_eh;
 
+		if (test_bit(MTIP_PF_TO_ACTIVE_BIT, &port->flags)) {
+			to = jiffies + msecs_to_jiffies(5000);
+
+			do {
+				mdelay(100);
+			} while (atomic_read(&dd->irq_workers_active) != 0 &&
+				time_before(jiffies, to));
+
+			if (atomic_read(&dd->irq_workers_active) != 0)
+				dev_warn(&dd->pdev->dev,
+					"Completion workers still active!");
+
+			spin_lock(dd->queue->queue_lock);
+			blk_mq_all_tag_busy_iter(*dd->tags.tags,
+							mtip_queue_cmd, dd);
+			spin_unlock(dd->queue->queue_lock);
+
+			set_bit(MTIP_PF_ISSUE_CMDS_BIT, &dd->port->flags);
+
+			if (mtip_device_reset(dd))
+				blk_mq_all_tag_busy_iter(*dd->tags.tags,
+							mtip_abort_cmd, dd);
+
+			clear_bit(MTIP_PF_TO_ACTIVE_BIT, &dd->port->flags);
+		}
+
 		if (test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) {
 			slot = 1;
 			/* used to restrict the loop to one iteration */
@@ -3810,11 +3866,33 @@ static int mtip_init_cmd(void *data, struct request *rq, unsigned int hctx_idx,
 	return 0;
 }
 
+static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req,
+								bool reserved)
+{
+	struct driver_data *dd = req->q->queuedata;
+	int ret = BLK_EH_RESET_TIMER;
+
+	if (reserved)
+		goto exit_handler;
+
+	if (test_bit(req->tag, dd->port->cmds_to_issue))
+		goto exit_handler;
+
+	if (test_and_set_bit(MTIP_PF_TO_ACTIVE_BIT, &dd->port->flags))
+		goto exit_handler;
+
+	wake_up_interruptible(&dd->port->svc_wait);
+exit_handler:
+	return ret;
+}
+
 static struct blk_mq_ops mtip_mq_ops = {
 	.queue_rq	= mtip_queue_rq,
 	.map_queue	= blk_mq_map_queue,
 	.init_request	= mtip_init_cmd,
 	.exit_request	= mtip_free_cmd,
+	.complete	= mtip_softirq_done_fn,
+	.timeout        = mtip_cmd_timeout,
 };
 
 /*
@@ -3890,6 +3968,7 @@ static int mtip_block_initialize(struct driver_data *dd)
 	dd->tags.numa_node = dd->numa_node;
 	dd->tags.flags = BLK_MQ_F_SHOULD_MERGE;
 	dd->tags.driver_data = dd;
+	dd->tags.timeout = MTIP_NCQ_CMD_TIMEOUT_MS;
 
 	rv = blk_mq_alloc_tag_set(&dd->tags);
 	if (rv) {
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index 50af742421e2..7617888f7944 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -134,10 +134,12 @@ enum {
 	MTIP_PF_EH_ACTIVE_BIT       = 1, /* error handling */
 	MTIP_PF_SE_ACTIVE_BIT       = 2, /* secure erase */
 	MTIP_PF_DM_ACTIVE_BIT       = 3, /* download microcde */
+	MTIP_PF_TO_ACTIVE_BIT       = 9, /* timeout handling */
 	MTIP_PF_PAUSE_IO      =	((1 << MTIP_PF_IC_ACTIVE_BIT) |
 				(1 << MTIP_PF_EH_ACTIVE_BIT) |
 				(1 << MTIP_PF_SE_ACTIVE_BIT) |
-				(1 << MTIP_PF_DM_ACTIVE_BIT)),
+				(1 << MTIP_PF_DM_ACTIVE_BIT) |
+				(1 << MTIP_PF_TO_ACTIVE_BIT)),
 
 	MTIP_PF_SVC_THD_ACTIVE_BIT  = 4,
 	MTIP_PF_ISSUE_CMDS_BIT      = 5,
@@ -147,7 +149,8 @@ enum {
 	MTIP_PF_SVC_THD_WORK	= ((1 << MTIP_PF_EH_ACTIVE_BIT) |
 				  (1 << MTIP_PF_ISSUE_CMDS_BIT) |
 				  (1 << MTIP_PF_REBUILD_BIT) |
-				  (1 << MTIP_PF_SVC_THD_STOP_BIT)),
+				  (1 << MTIP_PF_SVC_THD_STOP_BIT) |
+				  (1 << MTIP_PF_TO_ACTIVE_BIT)),
 
 	/* below are bit numbers in 'dd_flag' defined in driver_data */
 	MTIP_DDF_SEC_LOCK_BIT	    = 0,

From 2c46344a83ea7265d391e978711c07fc6380d8d7 Mon Sep 17 00:00:00 2001
From: Asai Thambi SP <asamymuthupa@micron.com>
Date: Wed, 24 Feb 2016 21:21:20 -0800
Subject: [PATCH 210/797] mtip32xx: Cleanup queued requests after surprise
 removal

commit 008e56d200225321371748d95908e6222436f06d upstream.

Fail all pending requests after surprise removal of a drive.

Signed-off-by: Vignesh Gunasekaran <vgunasekaran@micron.com>
Signed-off-by: Selvan Mani <smani@micron.com>
Signed-off-by: Asai Thambi S P <asamymuthupa@micron.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/block/mtip32xx/mtip32xx.c | 78 ++++++++++++++++++++++++-------
 1 file changed, 60 insertions(+), 18 deletions(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 2a9001edce1d..55d3d1da72de 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -173,7 +173,13 @@ static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd)
 {
 	struct request *rq;
 
+	if (mtip_check_surprise_removal(dd->pdev))
+		return NULL;
+
 	rq = blk_mq_alloc_request(dd->queue, 0, __GFP_RECLAIM, true);
+	if (IS_ERR(rq))
+		return NULL;
+
 	return blk_mq_rq_to_pdu(rq);
 }
 
@@ -575,6 +581,8 @@ static void mtip_completion(struct mtip_port *port,
 		dev_warn(&port->dd->pdev->dev,
 			"Internal command %d completed with TFE\n", tag);
 
+	command->comp_func = NULL;
+	command->comp_data = NULL;
 	complete(waiting);
 }
 
@@ -1009,12 +1017,14 @@ static bool mtip_pause_ncq(struct mtip_port *port,
  *
  * @port    Pointer to port data structure
  * @timeout Max duration to wait (ms)
+ * @atomic  gfp_t flag to indicate blockable context or not
  *
  * return value
  *	0	Success
  *	-EBUSY  Commands still active
  */
-static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
+static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout,
+								gfp_t atomic)
 {
 	unsigned long to;
 	unsigned int n;
@@ -1025,16 +1035,21 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
 	to = jiffies + msecs_to_jiffies(timeout);
 	do {
 		if (test_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags) &&
-			test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) {
+			test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags) &&
+			atomic == GFP_KERNEL) {
 			msleep(20);
 			continue; /* svc thd is actively issuing commands */
 		}
 
-		msleep(100);
+		if (atomic == GFP_KERNEL)
+			msleep(100);
+		else {
+			cpu_relax();
+			udelay(100);
+		}
+
 		if (mtip_check_surprise_removal(port->dd->pdev))
 			goto err_fault;
-		if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
-			goto err_fault;
 
 		/*
 		 * Ignore s_active bit 0 of array element 0.
@@ -1096,6 +1111,10 @@ static int mtip_exec_internal_command(struct mtip_port *port,
 	}
 
 	int_cmd = mtip_get_int_command(dd);
+	if (!int_cmd) {
+		dbg_printk(MTIP_DRV_NAME "Unable to allocate tag for PIO cmd\n");
+		return -EFAULT;
+	}
 
 	set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
 
@@ -1108,7 +1127,7 @@ static int mtip_exec_internal_command(struct mtip_port *port,
 		if (fis->command != ATA_CMD_STANDBYNOW1) {
 			/* wait for io to complete if non atomic */
 			if (mtip_quiesce_io(port,
-					MTIP_QUIESCE_IO_TIMEOUT_MS) < 0) {
+				MTIP_QUIESCE_IO_TIMEOUT_MS, atomic) < 0) {
 				dev_warn(&dd->pdev->dev,
 					"Failed to quiesce IO\n");
 				mtip_put_int_command(dd, int_cmd);
@@ -3354,10 +3373,6 @@ static int mtip_standby_drive(struct driver_data *dd)
  */
 static int mtip_hw_exit(struct driver_data *dd)
 {
-	/*
-	 * Send standby immediate (E0h) to the drive so that it
-	 * saves its state.
-	 */
 	if (!dd->sr) {
 		/* de-initialize the port. */
 		mtip_deinit_port(dd->port);
@@ -3974,7 +3989,7 @@ static int mtip_block_initialize(struct driver_data *dd)
 	if (rv) {
 		dev_err(&dd->pdev->dev,
 			"Unable to allocate request queue\n");
-		goto block_queue_alloc_init_error;
+		goto block_queue_alloc_tag_error;
 	}
 
 	/* Allocate the request queue. */
@@ -4086,8 +4101,9 @@ static int mtip_block_initialize(struct driver_data *dd)
 read_capacity_error:
 init_hw_cmds_error:
 	blk_cleanup_queue(dd->queue);
-	blk_mq_free_tag_set(&dd->tags);
 block_queue_alloc_init_error:
+	blk_mq_free_tag_set(&dd->tags);
+block_queue_alloc_tag_error:
 	mtip_hw_debugfs_exit(dd);
 disk_index_error:
 	spin_lock(&rssd_index_lock);
@@ -4104,6 +4120,22 @@ static int mtip_block_initialize(struct driver_data *dd)
 	return rv;
 }
 
+static void mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv)
+{
+	struct driver_data *dd = (struct driver_data *)data;
+	struct mtip_cmd *cmd;
+
+	if (likely(!reserv))
+		blk_mq_complete_request(rq, -ENODEV);
+	else if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &dd->port->flags)) {
+
+		cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL);
+		if (cmd->comp_func)
+			cmd->comp_func(dd->port, MTIP_TAG_INTERNAL,
+					cmd, -ENODEV);
+	}
+}
+
 /*
  * Block layer deinitialization function.
  *
@@ -4135,12 +4167,23 @@ static int mtip_block_remove(struct driver_data *dd)
 		}
 	}
 
-	if (!dd->sr)
-		mtip_standby_drive(dd);
+	if (!dd->sr) {
+		/*
+		 * Explicitly wait here for IOs to quiesce,
+		 * as mtip_standby_drive usually won't wait for IOs.
+		 */
+		if (!mtip_quiesce_io(dd->port, MTIP_QUIESCE_IO_TIMEOUT_MS,
+								GFP_KERNEL))
+			mtip_standby_drive(dd);
+	}
 	else
 		dev_info(&dd->pdev->dev, "device %s surprise removal\n",
 						dd->disk->disk_name);
 
+	blk_mq_freeze_queue_start(dd->queue);
+	blk_mq_stop_hw_queues(dd->queue);
+	blk_mq_all_tag_busy_iter(dd->tags.tags[0], mtip_no_dev_cleanup, dd);
+
 	/*
 	 * Delete our gendisk structure. This also removes the device
 	 * from /dev
@@ -4555,16 +4598,15 @@ static void mtip_pci_remove(struct pci_dev *pdev)
 	} while (atomic_read(&dd->irq_workers_active) != 0 &&
 		time_before(jiffies, to));
 
-	fsync_bdev(dd->bdev);
+	if (!dd->sr)
+		fsync_bdev(dd->bdev);
 
 	if (atomic_read(&dd->irq_workers_active) != 0) {
 		dev_warn(&dd->pdev->dev,
 			"Completion workers still active!\n");
 	}
 
-	if (dd->sr)
-		blk_mq_stop_hw_queues(dd->queue);
-
+	blk_set_queue_dying(dd->queue);
 	set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag);
 
 	/* Clean up the block layer. */

From bbb4bee779b3301a3ab7924f85f38b96bdf1fd4a Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 10 Mar 2016 11:33:43 +0100
Subject: [PATCH 211/797] ALSA: hda - Apply reboot D3 fix for CX20724 codec,
 too

commit 56dc66ff1c6d71f9a38c4a7c000b72b921fe4c89 upstream.

Just like CX20722, CX7024 codec also requires the power down at reboot
in order to reduce the noise at reboot/shutdown.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=113511
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_conexant.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index ef198903c0c3..600af5878e75 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -204,8 +204,13 @@ static void cx_auto_reboot_notify(struct hda_codec *codec)
 {
 	struct conexant_spec *spec = codec->spec;
 
-	if (codec->core.vendor_id != 0x14f150f2)
+	switch (codec->core.vendor_id) {
+	case 0x14f150f2: /* CX20722 */
+	case 0x14f150f4: /* CX20724 */
+		break;
+	default:
 		return;
+	}
 
 	/* Turn the CX20722 codec into D3 to avoid spurious noises
 	   from the internal speaker during (and after) reboot */

From ae8168541087b26f946f31555e977c146aba637a Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 10 Mar 2016 20:56:20 +0100
Subject: [PATCH 212/797] ALSA: pcm: Avoid "BUG:" string for warnings again

commit 0ab1ace856205d10cbc1924b2d931c01ffd216a6 upstream.

The commit [d507941beb1e: ALSA: pcm: Correct PCM BUG error message]
made the warning prefix back to "BUG:" due to its previous wrong
prefix.  But a kernel message containing "BUG:" seems taken as an Oops
message wrongly by some brain-dead daemons, and it annoys users in the
end.  Instead of teaching daemons, change the string again to a more
reasonable one.

Fixes: 507941beb1e ('ALSA: pcm: Correct PCM BUG error message')
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/core/pcm_lib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index 6b5a811e01a5..3a9b66c6e09c 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -322,7 +322,7 @@ static int snd_pcm_update_hw_ptr0(struct snd_pcm_substream *substream,
 			char name[16];
 			snd_pcm_debug_name(substream, name, sizeof(name));
 			pcm_err(substream->pcm,
-				"BUG: %s, pos = %ld, buffer size = %ld, period size = %ld\n",
+				"invalid position: %s, pos = %ld, buffer size = %ld, period size = %ld\n",
 				name, pos, runtime->buffer_size,
 				runtime->period_size);
 		}

From 5fae159b7d25987747919ccb73a8813da81abd97 Mon Sep 17 00:00:00 2001
From: "Vittorio Gambaletta (VittGam)" <linuxbugs@vittgam.net>
Date: Sun, 13 Mar 2016 22:19:34 +0100
Subject: [PATCH 213/797] ALSA: intel8x0: Add clock quirk entry for AD1981B on
 IBM ThinkPad X41.

commit 4061db03dd71d195b9973ee466f6ed32f6a3fc16 upstream.

The clock measurement on the AC'97 audio card found in the IBM ThinkPad X41
will often fail, so add a quirk entry to fix it.

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=441087
Signed-off-by: Vittorio Gambaletta <linuxbugs@vittgam.net>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/intel8x0.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c
index 42bcbac801a3..ccdab29a8b66 100644
--- a/sound/pci/intel8x0.c
+++ b/sound/pci/intel8x0.c
@@ -2879,6 +2879,7 @@ static void intel8x0_measure_ac97_clock(struct intel8x0 *chip)
 
 static struct snd_pci_quirk intel8x0_clock_list[] = {
 	SND_PCI_QUIRK(0x0e11, 0x008a, "AD1885", 41000),
+	SND_PCI_QUIRK(0x1014, 0x0581, "AD1981B", 48000),
 	SND_PCI_QUIRK(0x1028, 0x00be, "AD1885", 44100),
 	SND_PCI_QUIRK(0x1028, 0x0177, "AD1980", 48000),
 	SND_PCI_QUIRK(0x1028, 0x01ad, "AD1981B", 48000),

From ee2a37ab39b013589647ced2e6526c7358cb2111 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 10 Mar 2016 12:02:49 +0100
Subject: [PATCH 214/797] ALSA: hda - Don't handle ELD notify from invalid port

commit 4f8e4f3537cafc4de128e6bfdf83baa78bc60eb1 upstream.

The current Intel HDMI codec driver supports only three fixed ports
from port B to port D.  However, i915 driver may assign a DP on other
ports, e.g. port A, when no eDP is used.  This incompatibility is
caught later at pin_nid_to_pin_index() and results in a warning
message like "HDMI: pin nid 4 not registered" at each time.

This patch filters out such invalid events beforehand, so that the
kernel won't be too grumbling.

Reported-by: Stefan Assmann <sassmann@kpanic.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_hdmi.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 70c945603379..f7bcd8dbac14 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -2353,6 +2353,10 @@ static void intel_pin_eld_notify(void *audio_ptr, int port)
 	struct hda_codec *codec = audio_ptr;
 	int pin_nid = port + 0x04;
 
+	/* we assume only from port-B to port-D */
+	if (port < 1 || port > 3)
+		return;
+
 	/* skip notification during system suspend (but not in runtime PM);
 	 * the state will be updated at resume
 	 */

From db894649e34abceade484c35e5acd346e74e916d Mon Sep 17 00:00:00 2001
From: Hui Wang <hui.wang@canonical.com>
Date: Fri, 11 Mar 2016 12:04:02 +0800
Subject: [PATCH 215/797] ALSA: hda - fix the mic mute button and led problem
 for a Lenovo AIO

commit 6ef2f68fa38bf415830f67903d87180d933e0f47 upstream.

This Lenovo ThinkCentre AIO also uses Line2 as mic mute button and
uses GPIO2 to control the mic mute led, so applying this quirk can
make both the button and led work.

BugLink: https://bugs.launchpad.net/bugs/1555912
Signed-off-by: Hui Wang <hui.wang@canonical.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index c2430b36e1ce..6968b796baa3 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -5529,6 +5529,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x17aa, 0x2226, "ThinkPad X250", ALC292_FIXUP_TPT440_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC293_FIXUP_LENOVO_SPK_NOISE),
 	SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
+	SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
 	SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
 	SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC),
 	SND_PCI_QUIRK(0x17aa, 0x3978, "IdeaPad Y410P", ALC269_FIXUP_NO_SHUTUP),

From 1dac534145d0f84b3fcadf5b69d8de3ad147f471 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 15 Mar 2016 16:44:55 +0100
Subject: [PATCH 216/797] ALSA: hda - Fix unconditional GPIO toggle via
 automute

commit 1f7c6658962fa1260c1658d681bd6bb0c746b99a upstream.

Cirrus HD-audio driver may adjust GPIO pins for EAPD dynamically
depending on the jack plug state.  This works fine for the auto-mute
mode where the speaker gets muted upon the HP jack plug.   OTOH, when
the auto-mute mode is off, this turns off the EAPD unexpectedly
depending on the jack state, which results in the silent speaker
output.

This patch fixes the silent speaker output issue by setting GPIO bits
constantly when the auto-mute mode is off.

Reported-and-tested-by: moosotc@gmail.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_cirrus.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c
index c1c855a6c0af..a47e8ae0eb30 100644
--- a/sound/pci/hda/patch_cirrus.c
+++ b/sound/pci/hda/patch_cirrus.c
@@ -174,8 +174,12 @@ static void cs_automute(struct hda_codec *codec)
 	snd_hda_gen_update_outputs(codec);
 
 	if (spec->gpio_eapd_hp || spec->gpio_eapd_speaker) {
-		spec->gpio_data = spec->gen.hp_jack_present ?
-			spec->gpio_eapd_hp : spec->gpio_eapd_speaker;
+		if (spec->gen.automute_speaker)
+			spec->gpio_data = spec->gen.hp_jack_present ?
+				spec->gpio_eapd_hp : spec->gpio_eapd_speaker;
+		else
+			spec->gpio_data =
+				spec->gpio_eapd_hp | spec->gpio_eapd_speaker;
 		snd_hda_codec_write(codec, 0x01, 0,
 				    AC_VERB_SET_GPIO_DATA, spec->gpio_data);
 	}

From d2a70d6055c5da54d3f45805a19c37a693acd07f Mon Sep 17 00:00:00 2001
From: Kamal Mostafa <kamal@canonical.com>
Date: Wed, 27 Jan 2016 22:29:33 -0800
Subject: [PATCH 217/797] tools/hv: Use include/uapi with __EXPORTED_HEADERS__

commit 50fe6dd10069e7c062e27f29606f6e91ea979399 upstream.

Use the local uapi headers to keep in sync with "recently" added #define's
(e.g. VSS_OP_REGISTER1).

Fixes: 3eb2094c59e8 ("Adding makefile for tools/hv")
Signed-off-by: Kamal Mostafa <kamal@canonical.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/hv/Makefile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/hv/Makefile b/tools/hv/Makefile
index a8ab79556926..a8c4644022a6 100644
--- a/tools/hv/Makefile
+++ b/tools/hv/Makefile
@@ -5,6 +5,8 @@ PTHREAD_LIBS = -lpthread
 WARNINGS = -Wall -Wextra
 CFLAGS = $(WARNINGS) -g $(PTHREAD_LIBS) $(shell getconf LFS_CFLAGS)
 
+CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
+
 all: hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon
 %: %.c
 	$(CC) $(CFLAGS) -o $@ $^

From 93272beafa9d9a5933590c90d2fa525e86e67032 Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Wed, 9 Mar 2016 23:47:25 -0500
Subject: [PATCH 218/797] jbd2: fix FS corruption possibility in
 jbd2_journal_destroy() on umount path

commit c0a2ad9b50dd80eeccd73d9ff962234590d5ec93 upstream.

On umount path, jbd2_journal_destroy() writes latest transaction ID
(->j_tail_sequence) to be used at next mount.

The bug is that ->j_tail_sequence is not holding latest transaction ID
in some cases. So, at next mount, there is chance to conflict with
remaining (not overwritten yet) transactions.

	mount (id=10)
	write transaction (id=11)
	write transaction (id=12)
	umount (id=10) <= the bug doesn't write latest ID

	mount (id=10)
	write transaction (id=11)
	crash

	mount
	[recovery process]
		transaction (id=11)
		transaction (id=12) <= valid transaction ID, but old commit
                                       must not replay

Like above, this bug become the cause of recovery failure, or FS
corruption.

So why ->j_tail_sequence doesn't point latest ID?

Because if checkpoint transactions was reclaimed by memory pressure
(i.e. bdev_try_to_free_page()), then ->j_tail_sequence is not updated.
(And another case is, __jbd2_journal_clean_checkpoint_list() is called
with empty transaction.)

So in above cases, ->j_tail_sequence is not pointing latest
transaction ID at umount path. Plus, REQ_FLUSH for checkpoint is not
done too.

So, to fix this problem with minimum changes, this patch updates
->j_tail_sequence, and issue REQ_FLUSH.  (With more complex changes,
some optimizations would be possible to avoid unnecessary REQ_FLUSH
for example though.)

BTW,

	journal->j_tail_sequence =
		++journal->j_transaction_sequence;

Increment of ->j_transaction_sequence seems to be unnecessary, but
ext3 does this.

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/jbd2/journal.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 81e622681c82..624a57a9c4aa 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1408,11 +1408,12 @@ int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
 /**
  * jbd2_mark_journal_empty() - Mark on disk journal as empty.
  * @journal: The journal to update.
+ * @write_op: With which operation should we write the journal sb
  *
  * Update a journal's dynamic superblock fields to show that journal is empty.
  * Write updated superblock to disk waiting for IO to complete.
  */
-static void jbd2_mark_journal_empty(journal_t *journal)
+static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
 {
 	journal_superblock_t *sb = journal->j_superblock;
 
@@ -1430,7 +1431,7 @@ static void jbd2_mark_journal_empty(journal_t *journal)
 	sb->s_start    = cpu_to_be32(0);
 	read_unlock(&journal->j_state_lock);
 
-	jbd2_write_superblock(journal, WRITE_FUA);
+	jbd2_write_superblock(journal, write_op);
 
 	/* Log is no longer empty */
 	write_lock(&journal->j_state_lock);
@@ -1716,7 +1717,13 @@ int jbd2_journal_destroy(journal_t *journal)
 	if (journal->j_sb_buffer) {
 		if (!is_journal_aborted(journal)) {
 			mutex_lock(&journal->j_checkpoint_mutex);
-			jbd2_mark_journal_empty(journal);
+
+			write_lock(&journal->j_state_lock);
+			journal->j_tail_sequence =
+				++journal->j_transaction_sequence;
+			write_unlock(&journal->j_state_lock);
+
+			jbd2_mark_journal_empty(journal, WRITE_FLUSH_FUA);
 			mutex_unlock(&journal->j_checkpoint_mutex);
 		} else
 			err = -EIO;
@@ -1975,7 +1982,7 @@ int jbd2_journal_flush(journal_t *journal)
 	 * the magic code for a fully-recovered superblock.  Any future
 	 * commits of data to the journal will restore the current
 	 * s_start value. */
-	jbd2_mark_journal_empty(journal);
+	jbd2_mark_journal_empty(journal, WRITE_FUA);
 	mutex_unlock(&journal->j_checkpoint_mutex);
 	write_lock(&journal->j_state_lock);
 	J_ASSERT(!journal->j_running_transaction);
@@ -2021,7 +2028,7 @@ int jbd2_journal_wipe(journal_t *journal, int write)
 	if (write) {
 		/* Lock to make assertions happy... */
 		mutex_lock(&journal->j_checkpoint_mutex);
-		jbd2_mark_journal_empty(journal);
+		jbd2_mark_journal_empty(journal, WRITE_FUA);
 		mutex_unlock(&journal->j_checkpoint_mutex);
 	}
 

From 1a8f4a490871df59d1f3cfe28fae4458d2cbef7f Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 15 Dec 2015 16:38:22 +0100
Subject: [PATCH 219/797] brd: Fix discard request processing

commit 5e4298be45e83ecdffaabb370eea9396889b07f1 upstream.

Avoid that discard requests with size => PAGE_SIZE fail with
-EIO. Refuse discard requests if the discard size is not a
multiple of the page size.

Fixes: 2dbe54957636 ("brd: Refuse improperly aligned discard requests")
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Jan Kara <jack@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Robert Elliot <elliott@hp.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/block/brd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index a5880f4ab40e..1914c63ca8b1 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -338,7 +338,7 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio)
 
 	if (unlikely(bio->bi_rw & REQ_DISCARD)) {
 		if (sector & ((PAGE_SIZE >> SECTOR_SHIFT) - 1) ||
-		    bio->bi_iter.bi_size & PAGE_MASK)
+		    bio->bi_iter.bi_size & ~PAGE_MASK)
 			goto io_error;
 		discard_from_brd(brd, sector, bio->bi_iter.bi_size);
 		goto out;

From 84512e476ce92fbdb60d4687e3ea230dbf0655c8 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Thu, 11 Feb 2016 11:03:09 -0800
Subject: [PATCH 220/797] IB/srpt: Simplify srpt_handle_tsk_mgmt()

commit 51093254bf879bc9ce96590400a87897c7498463 upstream.

Let the target core check task existence instead of the SRP target
driver. Additionally, let the target core check the validity of the
task management request instead of the ib_srpt driver.

This patch fixes the following kernel crash:

BUG: unable to handle kernel NULL pointer dereference at 0000000000000001
IP: [<ffffffffa0565f37>] srpt_handle_new_iu+0x6d7/0x790 [ib_srpt]
Oops: 0002 [#1] SMP
Call Trace:
 [<ffffffffa05660ce>] srpt_process_completion+0xde/0x570 [ib_srpt]
 [<ffffffffa056669f>] srpt_compl_thread+0x13f/0x160 [ib_srpt]
 [<ffffffff8109726f>] kthread+0xcf/0xe0
 [<ffffffff81613cfc>] ret_from_fork+0x7c/0xb0

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Fixes: 3e4f574857ee ("ib_srpt: Convert TMR path to target_submit_tmr")
Tested-by: Alex Estrin <alex.estrin@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Nicholas Bellinger <nab@linux-iscsi.org>
Cc: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/ulp/srpt/ib_srpt.c | 59 +--------------------------
 1 file changed, 1 insertion(+), 58 deletions(-)

diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 2e2fe818ca9f..eaabf3125846 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -1737,47 +1737,6 @@ static int srpt_handle_cmd(struct srpt_rdma_ch *ch,
 	return -1;
 }
 
-/**
- * srpt_rx_mgmt_fn_tag() - Process a task management function by tag.
- * @ch: RDMA channel of the task management request.
- * @fn: Task management function to perform.
- * @req_tag: Tag of the SRP task management request.
- * @mgmt_ioctx: I/O context of the task management request.
- *
- * Returns zero if the target core will process the task management
- * request asynchronously.
- *
- * Note: It is assumed that the initiator serializes tag-based task management
- * requests.
- */
-static int srpt_rx_mgmt_fn_tag(struct srpt_send_ioctx *ioctx, u64 tag)
-{
-	struct srpt_device *sdev;
-	struct srpt_rdma_ch *ch;
-	struct srpt_send_ioctx *target;
-	int ret, i;
-
-	ret = -EINVAL;
-	ch = ioctx->ch;
-	BUG_ON(!ch);
-	BUG_ON(!ch->sport);
-	sdev = ch->sport->sdev;
-	BUG_ON(!sdev);
-	spin_lock_irq(&sdev->spinlock);
-	for (i = 0; i < ch->rq_size; ++i) {
-		target = ch->ioctx_ring[i];
-		if (target->cmd.se_lun == ioctx->cmd.se_lun &&
-		    target->cmd.tag == tag &&
-		    srpt_get_cmd_state(target) != SRPT_STATE_DONE) {
-			ret = 0;
-			/* now let the target core abort &target->cmd; */
-			break;
-		}
-	}
-	spin_unlock_irq(&sdev->spinlock);
-	return ret;
-}
-
 static int srp_tmr_to_tcm(int fn)
 {
 	switch (fn) {
@@ -1812,7 +1771,6 @@ static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
 	struct se_cmd *cmd;
 	struct se_session *sess = ch->sess;
 	uint64_t unpacked_lun;
-	uint32_t tag = 0;
 	int tcm_tmr;
 	int rc;
 
@@ -1828,25 +1786,10 @@ static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
 	srpt_set_cmd_state(send_ioctx, SRPT_STATE_MGMT);
 	send_ioctx->cmd.tag = srp_tsk->tag;
 	tcm_tmr = srp_tmr_to_tcm(srp_tsk->tsk_mgmt_func);
-	if (tcm_tmr < 0) {
-		send_ioctx->cmd.se_tmr_req->response =
-			TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED;
-		goto fail;
-	}
 	unpacked_lun = srpt_unpack_lun((uint8_t *)&srp_tsk->lun,
 				       sizeof(srp_tsk->lun));
-
-	if (srp_tsk->tsk_mgmt_func == SRP_TSK_ABORT_TASK) {
-		rc = srpt_rx_mgmt_fn_tag(send_ioctx, srp_tsk->task_tag);
-		if (rc < 0) {
-			send_ioctx->cmd.se_tmr_req->response =
-					TMR_TASK_DOES_NOT_EXIST;
-			goto fail;
-		}
-		tag = srp_tsk->task_tag;
-	}
 	rc = target_submit_tmr(&send_ioctx->cmd, sess, NULL, unpacked_lun,
-				srp_tsk, tcm_tmr, GFP_KERNEL, tag,
+				srp_tsk, tcm_tmr, GFP_KERNEL, srp_tsk->task_tag,
 				TARGET_SCF_ACK_KREF);
 	if (rc != 0) {
 		send_ioctx->cmd.se_tmr_req->response = TMR_FUNCTION_REJECTED;

From 32bb1185093c67c2280e440685ebc6b23fd47743 Mon Sep 17 00:00:00 2001
From: Eric Wheeler <git@linux.ewheeler.net>
Date: Fri, 26 Feb 2016 14:33:56 -0800
Subject: [PATCH 221/797] bcache: cleaned up error handling around
 register_cache()

commit 9b299728ed777428b3908ac72ace5f8f84b97789 upstream.

Fix null pointer dereference by changing register_cache() to return an int
instead of being void.  This allows it to return -ENOMEM or -ENODEV and
enables upper layers to handle the OOM case without NULL pointer issues.

See this thread:
  http://thread.gmane.org/gmane.linux.kernel.bcache.devel/3521

Fixes this error:
  gargamel:/sys/block/md5/bcache# echo /dev/sdh2 > /sys/fs/bcache/register

  bcache: register_cache() error opening sdh2: cannot allocate memory
  BUG: unable to handle kernel NULL pointer dereference at 00000000000009b8
  IP: [<ffffffffc05a7e8d>] cache_set_flush+0x102/0x15c [bcache]
  PGD 120dff067 PUD 1119a3067 PMD 0
  Oops: 0000 [#1] SMP
  Modules linked in: veth ip6table_filter ip6_tables
  (...)
  CPU: 4 PID: 3371 Comm: kworker/4:3 Not tainted 4.4.2-amd64-i915-volpreempt-20160213bc1 #3
  Hardware name: System manufacturer System Product Name/P8H67-M PRO, BIOS 3904 04/27/2013
  Workqueue: events cache_set_flush [bcache]
  task: ffff88020d5dc280 ti: ffff88020b6f8000 task.ti: ffff88020b6f8000
  RIP: 0010:[<ffffffffc05a7e8d>]  [<ffffffffc05a7e8d>] cache_set_flush+0x102/0x15c [bcache]

Signed-off-by: Eric Wheeler <bcache@linux.ewheeler.net>
Tested-by: Marc MERLIN <marc@merlins.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/bcache/super.c | 34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 8d0ead98eb6e..f3f98c3d7f67 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1828,11 +1828,12 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca)
 	return 0;
 }
 
-static void register_cache(struct cache_sb *sb, struct page *sb_page,
+static int register_cache(struct cache_sb *sb, struct page *sb_page,
 				struct block_device *bdev, struct cache *ca)
 {
 	char name[BDEVNAME_SIZE];
-	const char *err = "cannot allocate memory";
+	const char *err = NULL;
+	int ret = 0;
 
 	memcpy(&ca->sb, sb, sizeof(struct cache_sb));
 	ca->bdev = bdev;
@@ -1847,27 +1848,35 @@ static void register_cache(struct cache_sb *sb, struct page *sb_page,
 	if (blk_queue_discard(bdev_get_queue(ca->bdev)))
 		ca->discard = CACHE_DISCARD(&ca->sb);
 
-	if (cache_alloc(sb, ca) != 0)
+	ret = cache_alloc(sb, ca);
+	if (ret != 0)
 		goto err;
 
-	err = "error creating kobject";
-	if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache"))
-		goto err;
+	if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache")) {
+		err = "error calling kobject_add";
+		ret = -ENOMEM;
+		goto out;
+	}
 
 	mutex_lock(&bch_register_lock);
 	err = register_cache_set(ca);
 	mutex_unlock(&bch_register_lock);
 
-	if (err)
-		goto err;
+	if (err) {
+		ret = -ENODEV;
+		goto out;
+	}
 
 	pr_info("registered cache device %s", bdevname(bdev, name));
+
 out:
 	kobject_put(&ca->kobj);
-	return;
+
 err:
-	pr_notice("error opening %s: %s", bdevname(bdev, name), err);
-	goto out;
+	if (err)
+		pr_notice("error opening %s: %s", bdevname(bdev, name), err);
+
+	return ret;
 }
 
 /* Global interfaces/init */
@@ -1965,7 +1974,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
 		if (!ca)
 			goto err_close;
 
-		register_cache(sb, sb_page, bdev, ca);
+		if (register_cache(sb, sb_page, bdev, ca) != 0)
+			goto err_close;
 	}
 out:
 	if (sb_page)

From b58e781068d9a5fd6b0ee77f595c3dbaa0d2b7aa Mon Sep 17 00:00:00 2001
From: Eric Wheeler <git@linux.ewheeler.net>
Date: Fri, 26 Feb 2016 14:39:06 -0800
Subject: [PATCH 222/797] bcache: fix race of writeback thread starting before
 complete initialization

commit 07cc6ef8edc47f8b4fc1e276d31127a0a5863d4d upstream.

The bch_writeback_thread might BUG_ON in read_dirty() if
dc->sb==BDEV_STATE_DIRTY and bch_sectors_dirty_init has not yet completed
its related initialization.  This patch downs the dc->writeback_lock until
after initialization is complete, thus preventing bch_writeback_thread
from proceeding prematurely.

See this thread:
  http://thread.gmane.org/gmane.linux.kernel.bcache.devel/3453

Signed-off-by: Eric Wheeler <bcache@linux.ewheeler.net>
Tested-by: Marc MERLIN <marc@merlins.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/bcache/super.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index f3f98c3d7f67..6b07a0c8c729 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1015,8 +1015,12 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
 	 */
 	atomic_set(&dc->count, 1);
 
-	if (bch_cached_dev_writeback_start(dc))
+	/* Block writeback thread, but spawn it */
+	down_write(&dc->writeback_lock);
+	if (bch_cached_dev_writeback_start(dc)) {
+		up_write(&dc->writeback_lock);
 		return -ENOMEM;
+	}
 
 	if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
 		bch_sectors_dirty_init(dc);
@@ -1028,6 +1032,9 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
 	bch_cached_dev_run(dc);
 	bcache_device_link(&dc->disk, c, "bdev");
 
+	/* Allow the writeback thread to proceed */
+	up_write(&dc->writeback_lock);
+
 	pr_info("Caching %s as %s on set %pU",
 		bdevname(dc->bdev, buf), dc->disk.disk->disk_name,
 		dc->disk.c->sb.set_uuid);

From ca75edc44088cc40792161fd2ca650c5cfe8ee9f Mon Sep 17 00:00:00 2001
From: Eric Wheeler <git@linux.ewheeler.net>
Date: Mon, 7 Mar 2016 15:17:50 -0800
Subject: [PATCH 223/797] bcache: fix cache_set_flush() NULL pointer
 dereference on OOM

commit f8b11260a445169989d01df75d35af0f56178f95 upstream.

When bch_cache_set_alloc() fails to kzalloc the cache_set, the
asyncronous closure handling tries to dereference a cache_set that
hadn't yet been allocated inside of cache_set_flush() which is called
by __cache_set_unregister() during cleanup.  This appears to happen only
during an OOM condition on bcache_register.

Signed-off-by: Eric Wheeler <bcache@linux.ewheeler.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/bcache/super.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 6b07a0c8c729..a296425a7270 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1373,6 +1373,9 @@ static void cache_set_flush(struct closure *cl)
 	struct btree *b;
 	unsigned i;
 
+	if (!c)
+		closure_return(cl);
+
 	bch_cache_accounting_destroy(&c->accounting);
 
 	kobject_put(&c->internal);

From 8b42fc47e1b64cb661fed8d96f874effbdf1d7f1 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 17 Mar 2016 14:20:25 -0700
Subject: [PATCH 224/797] mm: memcontrol: reclaim when shrinking memory.high
 below usage

commit 588083bb37a3cea8533c392370a554417c8f29cb upstream.

When setting memory.high below usage, nothing happens until the next
charge comes along, and then it will only reclaim its own charge and not
the now potentially huge excess of the new memory.high.  This can cause
groups to stay in excess of their memory.high indefinitely.

To fix that, when shrinking memory.high, kick off a reclaim cycle that
goes after the delta.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov@virtuozzo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/memcontrol.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ee6acd279953..5d081ff28663 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5121,6 +5121,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
 				 char *buf, size_t nbytes, loff_t off)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+	unsigned long nr_pages;
 	unsigned long high;
 	int err;
 
@@ -5131,6 +5132,11 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
 
 	memcg->high = high;
 
+	nr_pages = page_counter_read(&memcg->memory);
+	if (nr_pages > high)
+		try_to_free_mem_cgroup_pages(memcg, nr_pages - high,
+					     GFP_KERNEL, true);
+
 	memcg_wb_domain_size_changed(memcg);
 	return nbytes;
 }

From 0ccab5b139971a2a3f48df24d1ee8be2dbf84042 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 17 Mar 2016 14:20:28 -0700
Subject: [PATCH 225/797] mm: memcontrol: reclaim and OOM kill when shrinking
 memory.max below usage

commit b6e6edcfa40561e9c8abe5eecf1c96f8e5fd9c6f upstream.

Setting the original memory.limit_in_bytes hardlimit is subject to a
race condition when the desired value is below the current usage.  The
code tries a few times to first reclaim and then see if the usage has
dropped to where we would like it to be, but there is no locking, and
the workload is free to continue making new charges up to the old limit.
Thus, attempting to shrink a workload relies on pure luck and hope that
the workload happens to cooperate.

To fix this in the cgroup2 memory.max knob, do it the other way round:
set the limit first, then try enforcement.  And if reclaim is not able
to succeed, trigger OOM kills in the group.  Keep going until the new
limit is met, we run out of OOM victims and there's only unreclaimable
memory left, or the task writing to memory.max is killed.  This allows
users to shrink groups reliably, and the behavior is consistent with
what happens when new charges are attempted in excess of memory.max.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov@virtuozzo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/memcontrol.c | 38 ++++++++++++++++++++++++++++++++++----
 1 file changed, 34 insertions(+), 4 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 5d081ff28663..fc0bcc41d57f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1332,7 +1332,7 @@ static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
 	return limit;
 }
 
-static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
+static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 				     int order)
 {
 	struct oom_control oc = {
@@ -1410,6 +1410,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 	}
 unlock:
 	mutex_unlock(&oom_lock);
+	return chosen;
 }
 
 #if MAX_NUMNODES > 1
@@ -5158,6 +5159,8 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
 				char *buf, size_t nbytes, loff_t off)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+	unsigned int nr_reclaims = MEM_CGROUP_RECLAIM_RETRIES;
+	bool drained = false;
 	unsigned long max;
 	int err;
 
@@ -5166,9 +5169,36 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
 	if (err)
 		return err;
 
-	err = mem_cgroup_resize_limit(memcg, max);
-	if (err)
-		return err;
+	xchg(&memcg->memory.limit, max);
+
+	for (;;) {
+		unsigned long nr_pages = page_counter_read(&memcg->memory);
+
+		if (nr_pages <= max)
+			break;
+
+		if (signal_pending(current)) {
+			err = -EINTR;
+			break;
+		}
+
+		if (!drained) {
+			drain_all_stock(memcg);
+			drained = true;
+			continue;
+		}
+
+		if (nr_reclaims) {
+			if (!try_to_free_mem_cgroup_pages(memcg, nr_pages - max,
+							  GFP_KERNEL, true))
+				nr_reclaims--;
+			continue;
+		}
+
+		mem_cgroup_events(memcg, MEMCG_OOM, 1);
+		if (!mem_cgroup_out_of_memory(memcg, GFP_KERNEL, 0))
+			break;
+	}
 
 	memcg_wb_domain_size_changed(memcg);
 	return nbytes;

From ee52f62c636c0c151ea92ffbf5ef940be51b0d22 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Thu, 17 Mar 2016 14:17:16 -0700
Subject: [PATCH 226/797] ia64: define ioremap_uc()

commit b0f84ac352762ed02d7ea9f284942a8cab7f9077 upstream.

All architectures now need ioremap_uc(), ia64 seems defines this already
through its ioremap_nocache() and it already ensures it *only* uses UC.

This is needed since v4.3 to complete an allyesconfig compile on ia64,
there were others archs that needed this, and this one seems to have
fallen through the cracks.

Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Acked-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/ia64/include/asm/io.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/ia64/include/asm/io.h b/arch/ia64/include/asm/io.h
index 9041bbe2b7b4..8fdb9c7eeb66 100644
--- a/arch/ia64/include/asm/io.h
+++ b/arch/ia64/include/asm/io.h
@@ -436,6 +436,7 @@ static inline void __iomem * ioremap_cache (unsigned long phys_addr, unsigned lo
 	return ioremap(phys_addr, size);
 }
 #define ioremap_cache ioremap_cache
+#define ioremap_uc ioremap_nocache
 
 
 /*

From 5f4a82d5e3492c26fb0263ca7f007180612e8b54 Mon Sep 17 00:00:00 2001
From: Joshua Hunt <johunt@akamai.com>
Date: Thu, 17 Mar 2016 14:17:23 -0700
Subject: [PATCH 227/797] watchdog: don't run proc_watchdog_update if new value
 is same as old

commit a1ee1932aa6bea0bb074f5e3ced112664e4637ed upstream.

While working on a script to restore all sysctl params before a series of
tests I found that writing any value into the
/proc/sys/kernel/{nmi_watchdog,soft_watchdog,watchdog,watchdog_thresh}
causes them to call proc_watchdog_update().

  NMI watchdog: enabled on all CPUs, permanently consumes one hw-PMU counter.
  NMI watchdog: enabled on all CPUs, permanently consumes one hw-PMU counter.
  NMI watchdog: enabled on all CPUs, permanently consumes one hw-PMU counter.
  NMI watchdog: enabled on all CPUs, permanently consumes one hw-PMU counter.

There doesn't appear to be a reason for doing this work every time a write
occurs, so only do it when the values change.

Signed-off-by: Josh Hunt <johunt@akamai.com>
Acked-by: Don Zickus <dzickus@redhat.com>
Reviewed-by: Aaron Tomlin <atomlin@redhat.com>
Cc: Ulrich Obergfell <uobergfe@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/watchdog.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 18f34cf75f74..198137b1cadc 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -907,6 +907,9 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write,
 		 * both lockup detectors are disabled if proc_watchdog_update()
 		 * returns an error.
 		 */
+		if (old == new)
+			goto out;
+
 		err = proc_watchdog_update();
 	}
 out:
@@ -951,7 +954,7 @@ int proc_soft_watchdog(struct ctl_table *table, int write,
 int proc_watchdog_thresh(struct ctl_table *table, int write,
 			 void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-	int err, old;
+	int err, old, new;
 
 	get_online_cpus();
 	mutex_lock(&watchdog_proc_mutex);
@@ -971,6 +974,10 @@ int proc_watchdog_thresh(struct ctl_table *table, int write,
 	/*
 	 * Update the sample period. Restore on failure.
 	 */
+	new = ACCESS_ONCE(watchdog_thresh);
+	if (old == new)
+		goto out;
+
 	set_sample_period();
 	err = proc_watchdog_update();
 	if (err) {

From 6a4cdbf56201f983f8177e3845fcb47d25d4fcb0 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sun, 28 Feb 2016 17:44:09 +0200
Subject: [PATCH 228/797] watchdog: rc32434_wdt: fix ioctl error handling

commit 10e7ac22cdd4d211cef99afcb9371b70cb175be6 upstream.

Calling return copy_to_user(...) in an ioctl will not do the right thing
if there's a pagefault: copy_to_user returns the number of bytes not
copied in this case.

Fix up watchdog/rc32434_wdt to do
	return copy_to_user(...)) ?  -EFAULT : 0;

instead.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/watchdog/rc32434_wdt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/watchdog/rc32434_wdt.c b/drivers/watchdog/rc32434_wdt.c
index 71e78ef4b736..3a75f3b53452 100644
--- a/drivers/watchdog/rc32434_wdt.c
+++ b/drivers/watchdog/rc32434_wdt.c
@@ -237,7 +237,7 @@ static long rc32434_wdt_ioctl(struct file *file, unsigned int cmd,
 			return -EINVAL;
 		/* Fall through */
 	case WDIOC_GETTIMEOUT:
-		return copy_to_user(argp, &timeout, sizeof(int));
+		return copy_to_user(argp, &timeout, sizeof(int)) ? -EFAULT : 0;
 	default:
 		return -ENOTTY;
 	}

From 7435429a8a7f3a83f760a1f72958ee53db408e27 Mon Sep 17 00:00:00 2001
From: Dmitry Tunin <hanipouspilot@gmail.com>
Date: Wed, 10 Feb 2016 00:49:11 +0300
Subject: [PATCH 229/797] Bluetooth: Add new AR3012 ID 0489:e095

commit 28c971d82fb58ef7cba22e5308be6d2d2590473d upstream.

T: Bus=01 Lev=01 Prnt=01 Port=04 Cnt=02 Dev#= 3 Spd=12 MxCh= 0
D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1
P: Vendor=0489 ProdID=e095 Rev=00.01
C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA
I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb

This device requires ar3k/AthrBT_0x31010100.dfu and
ar3k/ramps_0x31010100_40.dfu firmware files that are not in
linux-firmware yet.

BugLink: https://bugs.launchpad.net/bugs/1542944

Signed-off-by: Dmitry Tunin <hanipouspilot@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/bluetooth/ath3k.c | 2 ++
 drivers/bluetooth/btusb.c | 1 +
 2 files changed, 3 insertions(+)

diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
index 0c4a748fef7a..0beaa52df66b 100644
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c
@@ -82,6 +82,7 @@ static const struct usb_device_id ath3k_table[] = {
 	{ USB_DEVICE(0x0489, 0xe05f) },
 	{ USB_DEVICE(0x0489, 0xe076) },
 	{ USB_DEVICE(0x0489, 0xe078) },
+	{ USB_DEVICE(0x0489, 0xe095) },
 	{ USB_DEVICE(0x04c5, 0x1330) },
 	{ USB_DEVICE(0x04CA, 0x3004) },
 	{ USB_DEVICE(0x04CA, 0x3005) },
@@ -147,6 +148,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = {
 	{ USB_DEVICE(0x0489, 0xe05f), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0489, 0xe076), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0489, 0xe078), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x0489, 0xe095), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04c5, 0x1330), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x3004), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x3005), .driver_info = BTUSB_ATH3012 },
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 342ec8d203e3..79107597a594 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -196,6 +196,7 @@ static const struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x0489, 0xe05f), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0489, 0xe076), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0489, 0xe078), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x0489, 0xe095), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04c5, 0x1330), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x3004), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x3005), .driver_info = BTUSB_ATH3012 },

From f0de3cec40ee0f365751e11a477e47dd5568c34e Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 11 Mar 2016 09:56:33 +0200
Subject: [PATCH 230/797] Bluetooth: Fix potential buffer overflow with Add
 Advertising

commit 6a0e78072c2ae7b20b14e0249d8108441ea928d2 upstream.

The Add Advertising command handler does the appropriate checks for
the AD and Scan Response data, however fails to take into account the
general length of the mgmt command itself, which could lead to
potential buffer overflows. This patch adds the necessary check that
the mgmt command length is consistent with the given ad and scan_rsp
lengths.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/bluetooth/mgmt.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 7f22119276f3..b1b0a1c0bd8d 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -7155,6 +7155,10 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING,
 				       status);
 
+	if (data_len != sizeof(*cp) + cp->adv_data_len + cp->scan_rsp_len)
+		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING,
+				       MGMT_STATUS_INVALID_PARAMS);
+
 	flags = __le32_to_cpu(cp->flags);
 	timeout = __le16_to_cpu(cp->timeout);
 	duration = __le16_to_cpu(cp->duration);

From 36591ef19ab6e82cfb3580880ba1fefd843aa8ed Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 15 Mar 2016 20:43:04 -0400
Subject: [PATCH 231/797] cgroup: ignore css_sets associated with dead cgroups
 during migration

commit 2b021cbf3cb6208f0d40fd2f1869f237934340ed upstream.

Before 2e91fa7f6d45 ("cgroup: keep zombies associated with their
original cgroups"), all dead tasks were associated with init_css_set.
If a zombie task is requested for migration, while migration prep
operations would still be performed on init_css_set, the actual
migration would ignore zombie tasks.  As init_css_set is always valid,
this worked fine.

However, after 2e91fa7f6d45, zombie tasks stay with the css_set it was
associated with at the time of death.  Let's say a task T associated
with cgroup A on hierarchy H-1 and cgroup B on hiearchy H-2.  After T
becomes a zombie, it would still remain associated with A and B.  If A
only contains zombie tasks, it can be removed.  On removal, A gets
marked offline but stays pinned until all zombies are drained.  At
this point, if migration is initiated on T to a cgroup C on hierarchy
H-2, migration path would try to prepare T's css_set for migration and
trigger the following.

 WARNING: CPU: 0 PID: 1576 at kernel/cgroup.c:474 cgroup_get+0x121/0x160()
 CPU: 0 PID: 1576 Comm: bash Not tainted 4.4.0-work+ #289
 ...
 Call Trace:
  [<ffffffff8127e63c>] dump_stack+0x4e/0x82
  [<ffffffff810445e8>] warn_slowpath_common+0x78/0xb0
  [<ffffffff810446d5>] warn_slowpath_null+0x15/0x20
  [<ffffffff810c33e1>] cgroup_get+0x121/0x160
  [<ffffffff810c349b>] link_css_set+0x7b/0x90
  [<ffffffff810c4fbc>] find_css_set+0x3bc/0x5e0
  [<ffffffff810c5269>] cgroup_migrate_prepare_dst+0x89/0x1f0
  [<ffffffff810c7547>] cgroup_attach_task+0x157/0x230
  [<ffffffff810c7a17>] __cgroup_procs_write+0x2b7/0x470
  [<ffffffff810c7bdc>] cgroup_tasks_write+0xc/0x10
  [<ffffffff810c4790>] cgroup_file_write+0x30/0x1b0
  [<ffffffff811c68fc>] kernfs_fop_write+0x13c/0x180
  [<ffffffff81151673>] __vfs_write+0x23/0xe0
  [<ffffffff81152494>] vfs_write+0xa4/0x1a0
  [<ffffffff811532d4>] SyS_write+0x44/0xa0
  [<ffffffff814af2d7>] entry_SYSCALL_64_fastpath+0x12/0x6f

It doesn't make sense to prepare migration for css_sets pointing to
dead cgroups as they are guaranteed to contain only zombies which are
ignored later during migration.  This patch makes cgroup destruction
path mark all affected css_sets as dead and updates the migration path
to ignore them during preparation.

Signed-off-by: Tejun Heo <tj@kernel.org>
Fixes: 2e91fa7f6d45 ("cgroup: keep zombies associated with their original cgroups")
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/cgroup-defs.h |  3 +++
 kernel/cgroup.c             | 20 ++++++++++++++++++--
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 8e30faeab183..a7c7f74808a4 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -216,6 +216,9 @@ struct css_set {
 	/* all css_task_iters currently walking this cset */
 	struct list_head task_iters;
 
+	/* dead and being drained, ignore for migration */
+	bool dead;
+
 	/* For RCU-protected deletion */
 	struct rcu_head rcu_head;
 };
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index fb1ecfd2decd..dc94f8beb097 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2498,6 +2498,14 @@ static void cgroup_migrate_add_src(struct css_set *src_cset,
 	lockdep_assert_held(&cgroup_mutex);
 	lockdep_assert_held(&css_set_lock);
 
+	/*
+	 * If ->dead, @src_set is associated with one or more dead cgroups
+	 * and doesn't contain any migratable tasks.  Ignore it early so
+	 * that the rest of migration path doesn't get confused by it.
+	 */
+	if (src_cset->dead)
+		return;
+
 	src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root);
 
 	if (!list_empty(&src_cset->mg_preload_node))
@@ -5131,6 +5139,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	__releases(&cgroup_mutex) __acquires(&cgroup_mutex)
 {
 	struct cgroup_subsys_state *css;
+	struct cgrp_cset_link *link;
 	int ssid;
 
 	lockdep_assert_held(&cgroup_mutex);
@@ -5151,11 +5160,18 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 		return -EBUSY;
 
 	/*
-	 * Mark @cgrp dead.  This prevents further task migration and child
-	 * creation by disabling cgroup_lock_live_group().
+	 * Mark @cgrp and the associated csets dead.  The former prevents
+	 * further task migration and child creation by disabling
+	 * cgroup_lock_live_group().  The latter makes the csets ignored by
+	 * the migration path.
 	 */
 	cgrp->self.flags &= ~CSS_ONLINE;
 
+	spin_lock_bh(&css_set_lock);
+	list_for_each_entry(link, &cgrp->cset_links, cset_link)
+		link->cset->dead = true;
+	spin_unlock_bh(&css_set_lock);
+
 	/* initiate massacre of all css's */
 	for_each_css(css, ssid, cgrp)
 		kill_css(css);

From 9b4a50fae597168ed1eb10e0ca60e73ac649963f Mon Sep 17 00:00:00 2001
From: Dmitri Epshtein <dima@marvell.com>
Date: Sat, 12 Mar 2016 18:44:18 +0100
Subject: [PATCH 232/797] net: mvneta: enable change MAC address when interface
 is up

commit 928b6519afeb2a5e2dc61154380b545ed66c476a upstream.

Function eth_prepare_mac_addr_change() is called as part of MAC
address change. This function check if interface is running.
To enable change MAC address when interface is running:
IFF_LIVE_ADDR_CHANGE flag must be set to dev->priv_flags field

Fixes: c5aff18204da ("net: mvneta: driver for Marvell Armada 370/XP
network unit")
Signed-off-by: Dmitri Epshtein <dima@marvell.com>
Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/marvell/mvneta.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index ed622fa29dfa..a4ac6fedac75 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -3404,7 +3404,7 @@ static int mvneta_probe(struct platform_device *pdev)
 	dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
 	dev->hw_features |= dev->features;
 	dev->vlan_features |= dev->features;
-	dev->priv_flags |= IFF_UNICAST_FLT;
+	dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE;
 	dev->gso_max_segs = MVNETA_MAX_TSO_SEGS;
 
 	err = register_netdev(dev);

From da191fac4bfa7ba6372e10365591a2759a3298ea Mon Sep 17 00:00:00 2001
From: Vinayak Menon <vinmenon@codeaurora.org>
Date: Mon, 22 Feb 2016 19:15:44 +0530
Subject: [PATCH 233/797] of: alloc anywhere from memblock if range not
 specified

commit e53b50c0cbe392c946807abf7d07615a3c588642 upstream.

early_init_dt_alloc_reserved_memory_arch passes end as 0 to
__memblock_alloc_base, when limits are not specified. But
__memblock_alloc_base takes end value of 0 as MEMBLOCK_ALLOC_ACCESSIBLE
and limits the end to memblock.current_limit. This results in regions
never being placed in HIGHMEM area, for e.g. CMA.
Let __memblock_alloc_base allocate from anywhere in memory if limits are
not specified.

Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Vinayak Menon <vinmenon@codeaurora.org>
Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/of/of_reserved_mem.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 1a3556a9e9ea..ed01c0172e4a 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -32,11 +32,13 @@ int __init __weak early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
 	phys_addr_t align, phys_addr_t start, phys_addr_t end, bool nomap,
 	phys_addr_t *res_base)
 {
+	phys_addr_t base;
 	/*
 	 * We use __memblock_alloc_base() because memblock_alloc_base()
 	 * panic()s on allocation failure.
 	 */
-	phys_addr_t base = __memblock_alloc_base(size, align, end);
+	end = !end ? MEMBLOCK_ALLOC_ANYWHERE : end;
+	base = __memblock_alloc_base(size, align, end);
 	if (!base)
 		return -ENOMEM;
 

From 994f9db39113a268394fcba06537cabfbb40cb2b Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@altlinux.org>
Date: Thu, 19 Mar 2015 11:10:54 +0000
Subject: [PATCH 234/797] vfs: show_vfsstat: do not ignore errors from
 show_devname method

commit 5f8d498d4364f544fee17125787a47553db02afa upstream.

Explicitly check show_devname method return code and bail out in case
of an error.  This fixes regression introduced by commit 9d4d65748a5c.

Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/proc_namespace.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 8ebd9a334085..87645955990d 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -197,6 +197,8 @@ static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt)
 	if (sb->s_op->show_devname) {
 		seq_puts(m, "device ");
 		err = sb->s_op->show_devname(m, mnt_path.dentry);
+		if (err)
+			goto out;
 	} else {
 		if (r->mnt_devname) {
 			seq_puts(m, "device ");

From ab14444f6f3dd3cd0a47ad4bcc35fed256d1e9a7 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin@rab.in>
Date: Thu, 10 Mar 2016 21:19:06 +0100
Subject: [PATCH 235/797] splice: handle zero nr_pages in splice_to_pipe()

commit d6785d9152147596f60234157da2b02540c3e60f upstream.

Running the following command:

 busybox cat /sys/kernel/debug/tracing/trace_pipe > /dev/null

with any tracing enabled pretty very quickly leads to various NULL
pointer dereferences and VM BUG_ON()s, such as these:

 BUG: unable to handle kernel NULL pointer dereference at 0000000000000020
 IP: [<ffffffff8119df6c>] generic_pipe_buf_release+0xc/0x40
 Call Trace:
  [<ffffffff811c48a3>] splice_direct_to_actor+0x143/0x1e0
  [<ffffffff811c42e0>] ? generic_pipe_buf_nosteal+0x10/0x10
  [<ffffffff811c49cf>] do_splice_direct+0x8f/0xb0
  [<ffffffff81196869>] do_sendfile+0x199/0x380
  [<ffffffff81197600>] SyS_sendfile64+0x90/0xa0
  [<ffffffff8192cbee>] entry_SYSCALL_64_fastpath+0x12/0x6d

 page dumped because: VM_BUG_ON_PAGE(atomic_read(&page->_count) == 0)
 kernel BUG at include/linux/mm.h:367!
 invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC
 RIP: [<ffffffff8119df9c>] generic_pipe_buf_release+0x3c/0x40
 Call Trace:
  [<ffffffff811c48a3>] splice_direct_to_actor+0x143/0x1e0
  [<ffffffff811c42e0>] ? generic_pipe_buf_nosteal+0x10/0x10
  [<ffffffff811c49cf>] do_splice_direct+0x8f/0xb0
  [<ffffffff81196869>] do_sendfile+0x199/0x380
  [<ffffffff81197600>] SyS_sendfile64+0x90/0xa0
  [<ffffffff8192cd1e>] tracesys_phase2+0x84/0x89

(busybox's cat uses sendfile(2), unlike the coreutils version)

This is because tracing_splice_read_pipe() can call splice_to_pipe()
with spd->nr_pages == 0.  spd_pages underflows in splice_to_pipe() and
we fill the page pointers and the other fields of the pipe_buffers with
garbage.

All other callers of splice_to_pipe() avoid calling it when nr_pages ==
0, and we could make tracing_splice_read_pipe() do that too, but it
seems reasonable to have splice_to_page() handle this condition
gracefully.

Signed-off-by: Rabin Vincent <rabin@rab.in>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/splice.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/splice.c b/fs/splice.c
index 4cf700d50b40..0f77e9682857 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -185,6 +185,9 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
 	unsigned int spd_pages = spd->nr_pages;
 	int ret, do_wakeup, page_nr;
 
+	if (!spd_pages)
+		return 0;
+
 	ret = 0;
 	do_wakeup = 0;
 	page_nr = 0;

From f2e1e0a0afd3549c87c97ec2b3363b7325c6e3d5 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Tue, 9 Feb 2016 01:02:38 +0300
Subject: [PATCH 236/797] xtensa: ISS: don't hang if stdin EOF is reached

commit 362014c8d9d51d504c167c44ac280169457732be upstream.

Simulator stdin may be connected to a file, when its end is reached
kernel hangs in infinite loop inside rs_poll, because simc_poll always
signals that descriptor 0 is readable and simc_read always returns 0.
Check simc_read return value and exit loop if it's not positive. Also
don't rewind polling timer if it's zero.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/xtensa/platforms/iss/console.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/xtensa/platforms/iss/console.c b/arch/xtensa/platforms/iss/console.c
index 70cb408bc20d..92d785fefb6d 100644
--- a/arch/xtensa/platforms/iss/console.c
+++ b/arch/xtensa/platforms/iss/console.c
@@ -100,21 +100,23 @@ static void rs_poll(unsigned long priv)
 {
 	struct tty_port *port = (struct tty_port *)priv;
 	int i = 0;
+	int rd = 1;
 	unsigned char c;
 
 	spin_lock(&timer_lock);
 
 	while (simc_poll(0)) {
-		simc_read(0, &c, 1);
+		rd = simc_read(0, &c, 1);
+		if (rd <= 0)
+			break;
 		tty_insert_flip_char(port, c, TTY_NORMAL);
 		i++;
 	}
 
 	if (i)
 		tty_flip_buffer_push(port);
-
-
-	mod_timer(&serial_timer, jiffies + SERIAL_TIMER_VALUE);
+	if (rd)
+		mod_timer(&serial_timer, jiffies + SERIAL_TIMER_VALUE);
 	spin_unlock(&timer_lock);
 }
 

From cd8af682db6a51a5130f48b07d211c808f321997 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Thu, 25 Feb 2016 23:27:51 +0300
Subject: [PATCH 237/797] xtensa: fix preemption in {clear,copy}_user_highpage

commit a67cc9aa2dfc6e66addf240bbd79e16e01565e81 upstream.

Disabling pagefault makes little sense there, preemption disabling is
what was meant.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/xtensa/mm/cache.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c
index d75aa1476da7..1a804a2f9a5b 100644
--- a/arch/xtensa/mm/cache.c
+++ b/arch/xtensa/mm/cache.c
@@ -97,11 +97,11 @@ void clear_user_highpage(struct page *page, unsigned long vaddr)
 	unsigned long paddr;
 	void *kvaddr = coherent_kvaddr(page, TLBTEMP_BASE_1, vaddr, &paddr);
 
-	pagefault_disable();
+	preempt_disable();
 	kmap_invalidate_coherent(page, vaddr);
 	set_bit(PG_arch_1, &page->flags);
 	clear_page_alias(kvaddr, paddr);
-	pagefault_enable();
+	preempt_enable();
 }
 
 void copy_user_highpage(struct page *dst, struct page *src,
@@ -113,11 +113,11 @@ void copy_user_highpage(struct page *dst, struct page *src,
 	void *src_vaddr = coherent_kvaddr(src, TLBTEMP_BASE_2, vaddr,
 					  &src_paddr);
 
-	pagefault_disable();
+	preempt_disable();
 	kmap_invalidate_coherent(dst, vaddr);
 	set_bit(PG_arch_1, &dst->flags);
 	copy_page_alias(dst_vaddr, src_vaddr, dst_paddr, src_paddr);
-	pagefault_enable();
+	preempt_enable();
 }
 
 #endif /* DCACHE_WAY_SIZE > PAGE_SIZE */

From 1f841628aca5d4133ad2aba0e2f279c8ab65b9d8 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Thu, 3 Mar 2016 18:34:29 +0300
Subject: [PATCH 238/797] xtensa: clear all DBREAKC registers on start

commit 7de7ac785ae18a2cdc78d7560f48e3213d9ea0ab upstream.

There are XCHAL_NUM_DBREAK registers, clear them all.
This also fixes cryptic assembler error message with binutils 2.25 when
XCHAL_NUM_DBREAK is 0:

  as: out of memory allocating 18446744073709551575 bytes after a total
  of 495616 bytes

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/xtensa/kernel/head.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S
index 9ed55649ac8e..05e1df943856 100644
--- a/arch/xtensa/kernel/head.S
+++ b/arch/xtensa/kernel/head.S
@@ -128,7 +128,7 @@ ENTRY(_startup)
 	wsr	a0, icountlevel
 
 	.set	_index, 0
-	.rept	XCHAL_NUM_DBREAK - 1
+	.rept	XCHAL_NUM_DBREAK
 	wsr	a0, SREG_DBREAKC + _index
 	.set	_index, _index + 1
 	.endr

From f640dae8943ecbcc9ee6710aec89bba594512336 Mon Sep 17 00:00:00 2001
From: Lada Trimasova <ltrimas@synopsys.com>
Date: Wed, 9 Mar 2016 20:21:04 +0300
Subject: [PATCH 239/797] ARC: [BE] readl()/writel() to work in Big Endian CPU
 configuration

commit f778cc65717687a3d3f26dd21bef62cd059f1b8b upstream.

read{l,w}() write{l,w}() primitives should use le{16,32}_to_cpu() and
cpu_to_le{16,32}() respectively to ensure device registers are read
correctly in Big Endian CPU configuration.

Per Arnd Bergmann
| Most drivers using readl() or readl_relaxed() expect those to perform byte
| swaps on big-endian architectures, as the registers tend to be fixed endian

This was needed for getting UART to work correctly on a Big Endian ARC.

The ARC accessors originally were fine, and the bug got introduced
inadventently by commit b8a033023994 ("ARCv2: barriers")

Fixes: b8a033023994 ("ARCv2: barriers")
Link: http://lkml.kernel.org/r/201603100845.30602.arnd@arndb.de
Cc: Alexey Brodkin <abrodkin@synopsys.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Lada Trimasova <ltrimas@synopsys.com>
[vgupta: beefed up changelog, added Fixes/stable tags]
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arc/include/asm/io.h | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h
index 694ece8a0243..27b17adea50d 100644
--- a/arch/arc/include/asm/io.h
+++ b/arch/arc/include/asm/io.h
@@ -129,15 +129,23 @@ static inline void __raw_writel(u32 w, volatile void __iomem *addr)
 #define writel(v,c)		({ __iowmb(); writel_relaxed(v,c); })
 
 /*
- * Relaxed API for drivers which can handle any ordering themselves
+ * Relaxed API for drivers which can handle barrier ordering themselves
+ *
+ * Also these are defined to perform little endian accesses.
+ * To provide the typical device register semantics of fixed endian,
+ * swap the byte order for Big Endian
+ *
+ * http://lkml.kernel.org/r/201603100845.30602.arnd@arndb.de
  */
 #define readb_relaxed(c)	__raw_readb(c)
-#define readw_relaxed(c)	__raw_readw(c)
-#define readl_relaxed(c)	__raw_readl(c)
+#define readw_relaxed(c) ({ u16 __r = le16_to_cpu((__force __le16) \
+					__raw_readw(c)); __r; })
+#define readl_relaxed(c) ({ u32 __r = le32_to_cpu((__force __le32) \
+					__raw_readl(c)); __r; })
 
 #define writeb_relaxed(v,c)	__raw_writeb(v,c)
-#define writew_relaxed(v,c)	__raw_writew(v,c)
-#define writel_relaxed(v,c)	__raw_writel(v,c)
+#define writew_relaxed(v,c)	__raw_writew((__force u16) cpu_to_le16(v),c)
+#define writel_relaxed(v,c)	__raw_writel((__force u32) cpu_to_le32(v),c)
 
 #include <asm-generic/io.h>
 

From f3c5b82c36e98876ab507d3bc062100eecaba158 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Tue, 8 Mar 2016 19:31:24 +0530
Subject: [PATCH 240/797] ARC: bitops: Remove non relevant comments

commit 2a41b6dc28dc71c1a3f1622612a26edc58f7561e upstream.

commit 80f420842ff42 removed the ARC bitops microoptimization but failed
to prune the comments to same effect

Fixes: 80f420842ff42 ("ARC: Make ARC bitops "safer" (add anti-optimization)")
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arc/include/asm/bitops.h | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h
index 57c1f33844d4..0352fb8d21b9 100644
--- a/arch/arc/include/asm/bitops.h
+++ b/arch/arc/include/asm/bitops.h
@@ -35,21 +35,6 @@ static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
 									\
 	m += nr >> 5;							\
 									\
-	/*								\
-	 * ARC ISA micro-optimization:					\
-	 *								\
-	 * Instructions dealing with bitpos only consider lower 5 bits	\
-	 * e.g (x << 33) is handled like (x << 1) by ASL instruction	\
-	 *  (mem pointer still needs adjustment to point to next word)	\
-	 *								\
-	 * Hence the masking to clamp @nr arg can be elided in general.	\
-	 *								\
-	 * However if @nr is a constant (above assumed in a register),	\
-	 * and greater than 31, gcc can optimize away (x << 33) to 0,	\
-	 * as overflow, given the 32-bit ISA. Thus masking needs to be	\
-	 * done for const @nr, but no code is generated due to gcc	\
-	 * const prop.							\
-	 */								\
 	nr &= 0x1f;							\
 									\
 	__asm__ __volatile__(						\

From 6b3ae95f03d9ff3877549e601c88f7650e3ada29 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <kernel@kyup.com>
Date: Thu, 3 Mar 2016 10:54:57 +0100
Subject: [PATCH 241/797] quota: Fix possible GPF due to uninitialised pointers

commit ab73ef46398e2c0159f3a71de834586422d2a44a upstream.

When dqget() in __dquot_initialize() fails e.g. due to IO error,
__dquot_initialize() will pass an array of uninitialized pointers to
dqput_all() and thus can lead to deference of random data. Fix the
problem by properly initializing the array.

Signed-off-by: Nikolay Borisov <kernel@kyup.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/quota/dquot.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index ef0d64b2a6d9..353ff31dcee1 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1398,7 +1398,7 @@ static int dquot_active(const struct inode *inode)
 static int __dquot_initialize(struct inode *inode, int type)
 {
 	int cnt, init_needed = 0;
-	struct dquot **dquots, *got[MAXQUOTAS];
+	struct dquot **dquots, *got[MAXQUOTAS] = {};
 	struct super_block *sb = inode->i_sb;
 	qsize_t rsv;
 	int ret = 0;
@@ -1415,7 +1415,6 @@ static int __dquot_initialize(struct inode *inode, int type)
 		int rc;
 		struct dquot *dquot;
 
-		got[cnt] = NULL;
 		if (type != -1 && cnt != type)
 			continue;
 		/*

From b34291f71d0fb3d09728fd43d6da350ff6a179d6 Mon Sep 17 00:00:00 2001
From: Mateusz Guzik <mguzik@redhat.com>
Date: Wed, 2 Mar 2016 09:51:09 +1100
Subject: [PATCH 242/797] xfs: fix two memory leaks in xfs_attr_list.c error
 paths

commit 2e83b79b2d6c78bf1b4aa227938a214dcbddc83f upstream.

This plugs 2 trivial leaks in xfs_attr_shortform_list and
xfs_attr3_leaf_list_int.

Signed-off-by: Mateusz Guzik <mguzik@redhat.com>
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/xfs/xfs_attr_list.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 0ef7c2ed3f8a..4fa14820e2e2 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -202,8 +202,10 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
 					sbp->namelen,
 					sbp->valuelen,
 					&sbp->name[sbp->namelen]);
-		if (error)
+		if (error) {
+			kmem_free(sbuf);
 			return error;
+		}
 		if (context->seen_enough)
 			break;
 		cursor->offset++;
@@ -454,14 +456,13 @@ xfs_attr3_leaf_list_int(
 				args.rmtblkcnt = xfs_attr3_rmt_blocks(
 							args.dp->i_mount, valuelen);
 				retval = xfs_attr_rmtval_get(&args);
-				if (retval)
-					return retval;
-				retval = context->put_listent(context,
-						entry->flags,
-						name_rmt->name,
-						(int)name_rmt->namelen,
-						valuelen,
-						args.value);
+				if (!retval)
+					retval = context->put_listent(context,
+							entry->flags,
+							name_rmt->name,
+							(int)name_rmt->namelen,
+							valuelen,
+							args.value);
 				kmem_free(args.value);
 			} else {
 				retval = context->put_listent(context,

From 2b9eb2b2234c44c509384549a023c3f3bde5c59a Mon Sep 17 00:00:00 2001
From: Nate Dailey <nate.dailey@stratus.com>
Date: Mon, 29 Feb 2016 10:43:58 -0500
Subject: [PATCH 243/797] raid1: include bio_end_io_list in nr_queued to
 prevent freeze_array hang

commit ccfc7bf1f09d6190ef86693ddc761d5fe3fa47cb upstream.

If raid1d is handling a mix of read and write errors, handle_read_error's
call to freeze_array can get stuck.

This can happen because, though the bio_end_io_list is initially drained,
writes can be added to it via handle_write_finished as the retry_list
is processed. These writes contribute to nr_pending but are not included
in nr_queued.

If a later entry on the retry_list triggers a call to handle_read_error,
freeze array hangs waiting for nr_pending == nr_queued+extra. The writes
on the bio_end_io_list aren't included in nr_queued so the condition will
never be satisfied.

To prevent the hang, include bio_end_io_list writes in nr_queued.

There's probably a better way to handle decrementing nr_queued, but this
seemed like the safest way to avoid breaking surrounding code.

I'm happy to supply the script I used to repro this hang.

Fixes: 55ce74d4bfe1b(md/raid1: ensure device failure recorded before write request returns.)
Signed-off-by: Nate Dailey <nate.dailey@stratus.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/raid1.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index c4b913409226..515554c7365b 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -2274,6 +2274,7 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
 	if (fail) {
 		spin_lock_irq(&conf->device_lock);
 		list_add(&r1_bio->retry_list, &conf->bio_end_io_list);
+		conf->nr_queued++;
 		spin_unlock_irq(&conf->device_lock);
 		md_wakeup_thread(conf->mddev->thread);
 	} else {
@@ -2391,8 +2392,10 @@ static void raid1d(struct md_thread *thread)
 		LIST_HEAD(tmp);
 		spin_lock_irqsave(&conf->device_lock, flags);
 		if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
-			list_add(&tmp, &conf->bio_end_io_list);
-			list_del_init(&conf->bio_end_io_list);
+			while (!list_empty(&conf->bio_end_io_list)) {
+				list_move(conf->bio_end_io_list.prev, &tmp);
+				conf->nr_queued--;
+			}
 		}
 		spin_unlock_irqrestore(&conf->device_lock, flags);
 		while (!list_empty(&tmp)) {

From fad8b6fc040bf1d9807ee0e3358275d06b14cf96 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <Jes.Sorensen@redhat.com>
Date: Tue, 16 Feb 2016 16:44:24 -0500
Subject: [PATCH 244/797] md/raid5: Compare apples to apples (or sectors to
 sectors)

commit e7597e69dec59b65c5525db1626b9d34afdfa678 upstream.

'max_discard_sectors' is in sectors, while 'stripe' is in bytes.

This fixes the problem where DISCARD would get disabled on some larger
RAID5 configurations (6 or more drives in my testing), while it worked
as expected with smaller configurations.

Fixes: 620125f2bf8 ("MD: raid5 trim support")
Signed-off-by: Jes Sorensen <Jes.Sorensen@redhat.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/raid5.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 704ef7fcfbf8..c13921adec93 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -7015,8 +7015,8 @@ static int run(struct mddev *mddev)
 		}
 
 		if (discard_supported &&
-		   mddev->queue->limits.max_discard_sectors >= stripe &&
-		   mddev->queue->limits.discard_granularity >= stripe)
+		    mddev->queue->limits.max_discard_sectors >= (stripe >> 9) &&
+		    mddev->queue->limits.discard_granularity >= stripe)
 			queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
 						mddev->queue);
 		else

From 8568767fe1d63fbbdeacf2f69da6f84ad277d4f7 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Wed, 24 Feb 2016 17:38:28 -0800
Subject: [PATCH 245/797] RAID5: check_reshape() shouldn't call mddev_suspend

commit 27a353c026a879a1001e5eac4bda75b16262c44a upstream.

check_reshape() is called from raid5d thread. raid5d thread shouldn't
call mddev_suspend(), because mddev_suspend() waits for all IO finish
but IO is handled in raid5d thread, we could easily deadlock here.

This issue is introduced by
738a273 ("md/raid5: fix allocation of 'scribble' array.")

Reported-and-tested-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/raid5.c | 18 ++++++++++++++++++
 drivers/md/raid5.h |  2 ++
 2 files changed, 20 insertions(+)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index c13921adec93..d3e747cd0b34 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2091,6 +2091,14 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors)
 	unsigned long cpu;
 	int err = 0;
 
+	/*
+	 * Never shrink. And mddev_suspend() could deadlock if this is called
+	 * from raid5d. In that case, scribble_disks and scribble_sectors
+	 * should equal to new_disks and new_sectors
+	 */
+	if (conf->scribble_disks >= new_disks &&
+	    conf->scribble_sectors >= new_sectors)
+		return 0;
 	mddev_suspend(conf->mddev);
 	get_online_cpus();
 	for_each_present_cpu(cpu) {
@@ -2112,6 +2120,10 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors)
 	}
 	put_online_cpus();
 	mddev_resume(conf->mddev);
+	if (!err) {
+		conf->scribble_disks = new_disks;
+		conf->scribble_sectors = new_sectors;
+	}
 	return err;
 }
 
@@ -6414,6 +6426,12 @@ static int raid5_alloc_percpu(struct r5conf *conf)
 	}
 	put_online_cpus();
 
+	if (!err) {
+		conf->scribble_disks = max(conf->raid_disks,
+			conf->previous_raid_disks);
+		conf->scribble_sectors = max(conf->chunk_sectors,
+			conf->prev_chunk_sectors);
+	}
 	return err;
 }
 
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index a415e1cd39b8..ae6068deefdf 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -510,6 +510,8 @@ struct r5conf {
 					      * conversions
 					      */
 	} __percpu *percpu;
+	int scribble_disks;
+	int scribble_sectors;
 #ifdef CONFIG_HOTPLUG_CPU
 	struct notifier_block	cpu_notify;
 #endif

From ad072f606586a70eab8fd168465a6df4c695a9e9 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Thu, 25 Feb 2016 16:24:42 -0800
Subject: [PATCH 246/797] RAID5: revert e9e4c377e2f563 to fix a livelock

commit 6ab2a4b806ae21b6c3e47c5ff1285ec06d505325 upstream.

Revert commit
e9e4c377e2f563(md/raid5: per hash value and exclusive wait_for_stripe)

The problem is raid5_get_active_stripe waits on
conf->wait_for_stripe[hash]. Assume hash is 0. My test release stripes
in this order:
- release all stripes with hash 0
- raid5_get_active_stripe still sleeps since active_stripes >
  max_nr_stripes * 3 / 4
- release all stripes with hash other than 0. active_stripes becomes 0
- raid5_get_active_stripe still sleeps, since nobody wakes up
  wait_for_stripe[0]
The system live locks. The problem is active_stripes isn't a per-hash
count. Revert the patch makes the live lock go away.

Cc: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Cc: NeilBrown <neilb@suse.de>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/raid5.c | 27 ++++++++-------------------
 drivers/md/raid5.h |  2 +-
 2 files changed, 9 insertions(+), 20 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index d3e747cd0b34..22bac0188af6 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -340,8 +340,7 @@ static void release_inactive_stripe_list(struct r5conf *conf,
 					 int hash)
 {
 	int size;
-	unsigned long do_wakeup = 0;
-	int i = 0;
+	bool do_wakeup = false;
 	unsigned long flags;
 
 	if (hash == NR_STRIPE_HASH_LOCKS) {
@@ -362,19 +361,15 @@ static void release_inactive_stripe_list(struct r5conf *conf,
 			    !list_empty(list))
 				atomic_dec(&conf->empty_inactive_list_nr);
 			list_splice_tail_init(list, conf->inactive_list + hash);
-			do_wakeup |= 1 << hash;
+			do_wakeup = true;
 			spin_unlock_irqrestore(conf->hash_locks + hash, flags);
 		}
 		size--;
 		hash--;
 	}
 
-	for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) {
-		if (do_wakeup & (1 << i))
-			wake_up(&conf->wait_for_stripe[i]);
-	}
-
 	if (do_wakeup) {
+		wake_up(&conf->wait_for_stripe);
 		if (atomic_read(&conf->active_stripes) == 0)
 			wake_up(&conf->wait_for_quiescent);
 		if (conf->retry_read_aligned)
@@ -687,15 +682,14 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
 			if (!sh) {
 				set_bit(R5_INACTIVE_BLOCKED,
 					&conf->cache_state);
-				wait_event_exclusive_cmd(
-					conf->wait_for_stripe[hash],
+				wait_event_lock_irq(
+					conf->wait_for_stripe,
 					!list_empty(conf->inactive_list + hash) &&
 					(atomic_read(&conf->active_stripes)
 					 < (conf->max_nr_stripes * 3 / 4)
 					 || !test_bit(R5_INACTIVE_BLOCKED,
 						      &conf->cache_state)),
-					spin_unlock_irq(conf->hash_locks + hash),
-					spin_lock_irq(conf->hash_locks + hash));
+					*(conf->hash_locks + hash));
 				clear_bit(R5_INACTIVE_BLOCKED,
 					  &conf->cache_state);
 			} else {
@@ -720,9 +714,6 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
 		}
 	} while (sh == NULL);
 
-	if (!list_empty(conf->inactive_list + hash))
-		wake_up(&conf->wait_for_stripe[hash]);
-
 	spin_unlock_irq(conf->hash_locks + hash);
 	return sh;
 }
@@ -2204,7 +2195,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
 	cnt = 0;
 	list_for_each_entry(nsh, &newstripes, lru) {
 		lock_device_hash_lock(conf, hash);
-		wait_event_exclusive_cmd(conf->wait_for_stripe[hash],
+		wait_event_cmd(conf->wait_for_stripe,
 				    !list_empty(conf->inactive_list + hash),
 				    unlock_device_hash_lock(conf, hash),
 				    lock_device_hash_lock(conf, hash));
@@ -6522,9 +6513,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
 	seqcount_init(&conf->gen_lock);
 	mutex_init(&conf->cache_size_mutex);
 	init_waitqueue_head(&conf->wait_for_quiescent);
-	for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) {
-		init_waitqueue_head(&conf->wait_for_stripe[i]);
-	}
+	init_waitqueue_head(&conf->wait_for_stripe);
 	init_waitqueue_head(&conf->wait_for_overlap);
 	INIT_LIST_HEAD(&conf->handle_list);
 	INIT_LIST_HEAD(&conf->hold_list);
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index ae6068deefdf..517d4b68a1be 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -524,7 +524,7 @@ struct r5conf {
 	atomic_t		empty_inactive_list_nr;
 	struct llist_head	released_stripes;
 	wait_queue_head_t	wait_for_quiescent;
-	wait_queue_head_t	wait_for_stripe[NR_STRIPE_HASH_LOCKS];
+	wait_queue_head_t	wait_for_stripe;
 	wait_queue_head_t	wait_for_overlap;
 	unsigned long		cache_state;
 #define R5_INACTIVE_BLOCKED	1	/* release of inactive stripes blocked,

From b82ed7dc00f3472d4a7bca472e05ed9744881bc8 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Mon, 14 Mar 2016 11:49:32 -0700
Subject: [PATCH 247/797] raid10: include bio_end_io_list in nr_queued to
 prevent freeze_array hang

commit 23ddba80ebe836476bb2fa1f5ef305dd1c63dc0b upstream.

This is the raid10 counterpart of the bug fixed by Nate
(raid1: include bio_end_io_list in nr_queued to prevent freeze_array hang)

Fixes: 95af587e95(md/raid10: ensure device failure recorded before write request returns)
Cc: Nate Dailey <nate.dailey@stratus.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/raid10.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index ce959b4ae4df..ebb0dd612ebd 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2664,6 +2664,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
 		if (fail) {
 			spin_lock_irq(&conf->device_lock);
 			list_add(&r10_bio->retry_list, &conf->bio_end_io_list);
+			conf->nr_queued++;
 			spin_unlock_irq(&conf->device_lock);
 			md_wakeup_thread(conf->mddev->thread);
 		} else {
@@ -2691,8 +2692,10 @@ static void raid10d(struct md_thread *thread)
 		LIST_HEAD(tmp);
 		spin_lock_irqsave(&conf->device_lock, flags);
 		if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
-			list_add(&tmp, &conf->bio_end_io_list);
-			list_del_init(&conf->bio_end_io_list);
+			while (!list_empty(&conf->bio_end_io_list)) {
+				list_move(conf->bio_end_io_list.prev, &tmp);
+				conf->nr_queued--;
+			}
 		}
 		spin_unlock_irqrestore(&conf->device_lock, flags);
 		while (!list_empty(&tmp)) {

From d5e30f2b934bf9964e1bf5fb1c1bd4d1bc865b5d Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Wed, 9 Mar 2016 12:58:25 +1100
Subject: [PATCH 248/797] md/raid5: preserve STRIPE_PREREAD_ACTIVE in
 break_stripe_batch_list

commit 550da24f8d62fe81f3c13e3ec27602d6e44d43dc upstream.

break_stripe_batch_list breaks up a batch and copies some flags from
the batch head to the members, preserving others.

It doesn't preserve or copy STRIPE_PREREAD_ACTIVE.  This is not
normally a problem as STRIPE_PREREAD_ACTIVE is cleared when a
stripe_head is added to a batch, and is not set on stripe_heads
already in a batch.

However there is no locking to ensure one thread doesn't set the flag
after it has just been cleared in another.  This does occasionally happen.

md/raid5 maintains a count of the number of stripe_heads with
STRIPE_PREREAD_ACTIVE set: conf->preread_active_stripes.  When
break_stripe_batch_list clears STRIPE_PREREAD_ACTIVE inadvertently
this could becomes incorrect and will never again return to zero.

md/raid5 delays the handling of some stripe_heads until
preread_active_stripes becomes zero.  So when the above mention race
happens, those stripe_heads become blocked and never progress,
resulting is write to the array handing.

So: change break_stripe_batch_list to preserve STRIPE_PREREAD_ACTIVE
in the members of a batch.

URL: https://bugzilla.kernel.org/show_bug.cgi?id=108741
URL: https://bugzilla.redhat.com/show_bug.cgi?id=1258153
URL: http://thread.gmane.org/5649C0E9.2030204@zoner.cz
Reported-by: Martin Svec <martin.svec@zoner.cz> (and others)
Tested-by: Tom Weber <linux@junkyard.4t2.com>
Fixes: 1b956f7a8f9a ("md/raid5: be more selective about distributing flags across batch.")
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/raid5.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 22bac0188af6..10ce885445f6 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -4241,7 +4241,6 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
 		WARN_ON_ONCE(sh->state & ((1 << STRIPE_ACTIVE) |
 					  (1 << STRIPE_SYNCING) |
 					  (1 << STRIPE_REPLACED) |
-					  (1 << STRIPE_PREREAD_ACTIVE) |
 					  (1 << STRIPE_DELAYED) |
 					  (1 << STRIPE_BIT_DELAY) |
 					  (1 << STRIPE_FULL_WRITE) |
@@ -4256,6 +4255,7 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
 					      (1 << STRIPE_REPLACED)));
 
 		set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS |
+					    (1 << STRIPE_PREREAD_ACTIVE) |
 					    (1 << STRIPE_DEGRADED)),
 			      head_sh->state & (1 << STRIPE_INSYNC));
 

From 2775a60447ae12350711b443be1083117b6f13b8 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Sat, 12 Mar 2016 09:29:40 +0800
Subject: [PATCH 249/797] md: multipath: don't hardcopy bio in .make_request
 path

commit fafcde3ac1a418688a734365203a12483b83907a upstream.

Inside multipath_make_request(), multipath maps the incoming
bio into low level device's bio, but it is totally wrong to
copy the bio into mapped bio via '*mapped_bio = *bio'. For
example, .__bi_remaining is kept in the copy, especially if
the incoming bio is chained to via bio splitting, so .bi_end_io
can't be called for the mapped bio at all in the completing path
in this kind of situation.

This patch fixes the issue by using clone style.

Reported-and-tested-by: Andrea Righi <righi.andrea@gmail.com>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/multipath.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 0a72ab6e6c20..dd483bb2e111 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -129,7 +129,9 @@ static void multipath_make_request(struct mddev *mddev, struct bio * bio)
 	}
 	multipath = conf->multipaths + mp_bh->path;
 
-	mp_bh->bio = *bio;
+	bio_init(&mp_bh->bio);
+	__bio_clone_fast(&mp_bh->bio, bio);
+
 	mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset;
 	mp_bh->bio.bi_bdev = multipath->rdev->bdev;
 	mp_bh->bio.bi_rw |= REQ_FAILFAST_TRANSPORT;

From 32b9807433507058fce25092a0d11abac494d00d Mon Sep 17 00:00:00 2001
From: Robert Doebbelin <robert@quobyte.com>
Date: Mon, 7 Mar 2016 09:50:56 +0100
Subject: [PATCH 250/797] fuse: do not use iocb after it may have been freed

commit 7cabc61e01a0a8b663bd2b4c982aa53048218734 upstream.

There's a race in fuse_direct_IO(), whereby is_sync_kiocb() is called on an
iocb that could have been freed if async io has already completed.  The fix
in this case is simple and obvious: cache the result before starting io.

It was discovered by KASan:

kernel: ==================================================================
kernel: BUG: KASan: use after free in fuse_direct_IO+0xb1a/0xcc0 at addr ffff88036c414390

Signed-off-by: Robert Doebbelin <robert@quobyte.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Fixes: bcba24ccdc82 ("fuse: enable asynchronous processing direct IO")
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/fuse/file.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 570ca4053c80..6991b5cc0056 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2786,6 +2786,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
 	loff_t i_size;
 	size_t count = iov_iter_count(iter);
 	struct fuse_io_priv *io;
+	bool is_sync = is_sync_kiocb(iocb);
 
 	pos = offset;
 	inode = file->f_mapping->host;
@@ -2825,11 +2826,11 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
 	 * to wait on real async I/O requests, so we must submit this request
 	 * synchronously.
 	 */
-	if (!is_sync_kiocb(iocb) && (offset + count > i_size) &&
+	if (!is_sync && (offset + count > i_size) &&
 	    iov_iter_rw(iter) == WRITE)
 		io->async = false;
 
-	if (io->async && is_sync_kiocb(iocb))
+	if (io->async && is_sync)
 		io->done = &wait;
 
 	if (iov_iter_rw(iter) == WRITE) {
@@ -2843,7 +2844,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
 		fuse_aio_complete(io, ret < 0 ? ret : 0, -1);
 
 		/* we have a non-extending, async request, so return */
-		if (!is_sync_kiocb(iocb))
+		if (!is_sync)
 			return -EIOCBQUEUED;
 
 		wait_for_completion(&wait);

From 37bd8c883ea5f3b90ae1788a1f76c93cb0dbeba5 Mon Sep 17 00:00:00 2001
From: Seth Forshee <seth.forshee@canonical.com>
Date: Fri, 11 Mar 2016 10:35:34 -0600
Subject: [PATCH 251/797] fuse: Add reference counting for fuse_io_priv

commit 744742d692e37ad5c20630e57d526c8f2e2fe3c9 upstream.

The 'reqs' member of fuse_io_priv serves two purposes. First is to track
the number of oustanding async requests to the server and to signal that
the io request is completed. The second is to be a reference count on the
structure to know when it can be freed.

For sync io requests these purposes can be at odds.  fuse_direct_IO() wants
to block until the request is done, and since the signal is sent when
'reqs' reaches 0 it cannot keep a reference to the object. Yet it needs to
use the object after the userspace server has completed processing
requests. This leads to some handshaking and special casing that it
needlessly complicated and responsible for at least one race condition.

It's much cleaner and safer to maintain a separate reference count for the
object lifecycle and to let 'reqs' just be a count of outstanding requests
to the userspace server. Then we can know for sure when it is safe to free
the object without any handshaking or special cases.

The catch here is that most of the time these objects are stack allocated
and should not be freed. Initializing these objects with a single reference
that is never released prevents accidental attempts to free the objects.

Fixes: 9d5722b7777e ("fuse: handle synchronous iocbs internally")
Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/fuse/cuse.c   |  4 ++--
 fs/fuse/file.c   | 28 +++++++++++++++++++++-------
 fs/fuse/fuse_i.h |  9 +++++++++
 3 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 8e3ee1936c7e..c5b6b7165489 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -90,7 +90,7 @@ static struct list_head *cuse_conntbl_head(dev_t devt)
 
 static ssize_t cuse_read_iter(struct kiocb *kiocb, struct iov_iter *to)
 {
-	struct fuse_io_priv io = { .async = 0, .file = kiocb->ki_filp };
+	struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(kiocb->ki_filp);
 	loff_t pos = 0;
 
 	return fuse_direct_io(&io, to, &pos, FUSE_DIO_CUSE);
@@ -98,7 +98,7 @@ static ssize_t cuse_read_iter(struct kiocb *kiocb, struct iov_iter *to)
 
 static ssize_t cuse_write_iter(struct kiocb *kiocb, struct iov_iter *from)
 {
-	struct fuse_io_priv io = { .async = 0, .file = kiocb->ki_filp };
+	struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(kiocb->ki_filp);
 	loff_t pos = 0;
 	/*
 	 * No locking or generic_write_checks(), the server is
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 6991b5cc0056..c2e340d6ec6e 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -528,6 +528,11 @@ static void fuse_release_user_pages(struct fuse_req *req, int write)
 	}
 }
 
+static void fuse_io_release(struct kref *kref)
+{
+	kfree(container_of(kref, struct fuse_io_priv, refcnt));
+}
+
 static ssize_t fuse_get_res_by_io(struct fuse_io_priv *io)
 {
 	if (io->err)
@@ -585,8 +590,9 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
 		}
 
 		io->iocb->ki_complete(io->iocb, res, 0);
-		kfree(io);
 	}
+
+	kref_put(&io->refcnt, fuse_io_release);
 }
 
 static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_req *req)
@@ -613,6 +619,7 @@ static size_t fuse_async_req_send(struct fuse_conn *fc, struct fuse_req *req,
 		size_t num_bytes, struct fuse_io_priv *io)
 {
 	spin_lock(&io->lock);
+	kref_get(&io->refcnt);
 	io->size += num_bytes;
 	io->reqs++;
 	spin_unlock(&io->lock);
@@ -691,7 +698,7 @@ static void fuse_short_read(struct fuse_req *req, struct inode *inode,
 
 static int fuse_do_readpage(struct file *file, struct page *page)
 {
-	struct fuse_io_priv io = { .async = 0, .file = file };
+	struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(file);
 	struct inode *inode = page->mapping->host;
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_req *req;
@@ -984,7 +991,7 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
 	size_t res;
 	unsigned offset;
 	unsigned i;
-	struct fuse_io_priv io = { .async = 0, .file = file };
+	struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(file);
 
 	for (i = 0; i < req->num_pages; i++)
 		fuse_wait_on_page_writeback(inode, req->pages[i]->index);
@@ -1398,7 +1405,7 @@ static ssize_t __fuse_direct_read(struct fuse_io_priv *io,
 
 static ssize_t fuse_direct_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
-	struct fuse_io_priv io = { .async = 0, .file = iocb->ki_filp };
+	struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb->ki_filp);
 	return __fuse_direct_read(&io, to, &iocb->ki_pos);
 }
 
@@ -1406,7 +1413,7 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file_inode(file);
-	struct fuse_io_priv io = { .async = 0, .file = file };
+	struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(file);
 	ssize_t res;
 
 	if (is_bad_inode(inode))
@@ -2807,6 +2814,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
 	if (!io)
 		return -ENOMEM;
 	spin_lock_init(&io->lock);
+	kref_init(&io->refcnt);
 	io->reqs = 1;
 	io->bytes = -1;
 	io->size = 0;
@@ -2830,8 +2838,14 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
 	    iov_iter_rw(iter) == WRITE)
 		io->async = false;
 
-	if (io->async && is_sync)
+	if (io->async && is_sync) {
+		/*
+		 * Additional reference to keep io around after
+		 * calling fuse_aio_complete()
+		 */
+		kref_get(&io->refcnt);
 		io->done = &wait;
+	}
 
 	if (iov_iter_rw(iter) == WRITE) {
 		ret = fuse_direct_io(io, iter, &pos, FUSE_DIO_WRITE);
@@ -2851,7 +2865,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
 		ret = fuse_get_res_by_io(io);
 	}
 
-	kfree(io);
+	kref_put(&io->refcnt, fuse_io_release);
 
 	if (iov_iter_rw(iter) == WRITE) {
 		if (ret > 0)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 405113101db8..604cd42dafef 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -22,6 +22,7 @@
 #include <linux/rbtree.h>
 #include <linux/poll.h>
 #include <linux/workqueue.h>
+#include <linux/kref.h>
 
 /** Max number of pages that can be used in a single read request */
 #define FUSE_MAX_PAGES_PER_REQ 32
@@ -243,6 +244,7 @@ struct fuse_args {
 
 /** The request IO state (for asynchronous processing) */
 struct fuse_io_priv {
+	struct kref refcnt;
 	int async;
 	spinlock_t lock;
 	unsigned reqs;
@@ -256,6 +258,13 @@ struct fuse_io_priv {
 	struct completion *done;
 };
 
+#define FUSE_IO_PRIV_SYNC(f) \
+{					\
+	.refcnt = { ATOMIC_INIT(1) },	\
+	.async = 0,			\
+	.file = f,			\
+}
+
 /**
  * Request flags
  *

From 74b23f79f16802d01315db8b028518ef0abd7bc8 Mon Sep 17 00:00:00 2001
From: Jann Horn <jann@thejh.net>
Date: Tue, 22 Mar 2016 14:25:36 -0700
Subject: [PATCH 252/797] fs/coredump: prevent fsuid=0 dumps into
 user-controlled directories

commit 378c6520e7d29280f400ef2ceaf155c86f05a71a upstream.

This commit fixes the following security hole affecting systems where
all of the following conditions are fulfilled:

 - The fs.suid_dumpable sysctl is set to 2.
 - The kernel.core_pattern sysctl's value starts with "/". (Systems
   where kernel.core_pattern starts with "|/" are not affected.)
 - Unprivileged user namespace creation is permitted. (This is
   true on Linux >=3.8, but some distributions disallow it by
   default using a distro patch.)

Under these conditions, if a program executes under secure exec rules,
causing it to run with the SUID_DUMP_ROOT flag, then unshares its user
namespace, changes its root directory and crashes, the coredump will be
written using fsuid=0 and a path derived from kernel.core_pattern - but
this path is interpreted relative to the root directory of the process,
allowing the attacker to control where a coredump will be written with
root privileges.

To fix the security issue, always interpret core_pattern for dumps that
are written under SUID_DUMP_ROOT relative to the root directory of init.

Signed-off-by: Jann Horn <jann@thejh.net>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/um/drivers/mconsole_kern.c |  2 +-
 fs/coredump.c                   | 30 ++++++++++++++++++++++++++----
 fs/fhandle.c                    |  2 +-
 fs/open.c                       |  6 ++----
 include/linux/fs.h              |  2 +-
 kernel/sysctl_binary.c          |  2 +-
 6 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 29880c9b324e..e22e57298522 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -133,7 +133,7 @@ void mconsole_proc(struct mc_request *req)
 	ptr += strlen("proc");
 	ptr = skip_spaces(ptr);
 
-	file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY);
+	file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY, 0);
 	if (IS_ERR(file)) {
 		mconsole_reply(req, "Failed to open file", 1, 0);
 		printk(KERN_ERR "open /proc/%s: %ld\n", ptr, PTR_ERR(file));
diff --git a/fs/coredump.c b/fs/coredump.c
index 1777331eee76..dfc87c5f5a54 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -32,6 +32,9 @@
 #include <linux/pipe_fs_i.h>
 #include <linux/oom.h>
 #include <linux/compat.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/path.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -627,6 +630,8 @@ void do_coredump(const siginfo_t *siginfo)
 		}
 	} else {
 		struct inode *inode;
+		int open_flags = O_CREAT | O_RDWR | O_NOFOLLOW |
+				 O_LARGEFILE | O_EXCL;
 
 		if (cprm.limit < binfmt->min_coredump)
 			goto fail_unlock;
@@ -665,10 +670,27 @@ void do_coredump(const siginfo_t *siginfo)
 		 * what matters is that at least one of the two processes
 		 * writes its coredump successfully, not which one.
 		 */
-		cprm.file = filp_open(cn.corename,
-				 O_CREAT | 2 | O_NOFOLLOW |
-				 O_LARGEFILE | O_EXCL,
-				 0600);
+		if (need_suid_safe) {
+			/*
+			 * Using user namespaces, normal user tasks can change
+			 * their current->fs->root to point to arbitrary
+			 * directories. Since the intention of the "only dump
+			 * with a fully qualified path" rule is to control where
+			 * coredumps may be placed using root privileges,
+			 * current->fs->root must not be used. Instead, use the
+			 * root directory of init_task.
+			 */
+			struct path root;
+
+			task_lock(&init_task);
+			get_fs_root(init_task.fs, &root);
+			task_unlock(&init_task);
+			cprm.file = file_open_root(root.dentry, root.mnt,
+				cn.corename, open_flags, 0600);
+			path_put(&root);
+		} else {
+			cprm.file = filp_open(cn.corename, open_flags, 0600);
+		}
 		if (IS_ERR(cprm.file))
 			goto fail_unlock;
 
diff --git a/fs/fhandle.c b/fs/fhandle.c
index d59712dfa3e7..ca3c3dd01789 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -228,7 +228,7 @@ long do_handle_open(int mountdirfd,
 		path_put(&path);
 		return fd;
 	}
-	file = file_open_root(path.dentry, path.mnt, "", open_flag);
+	file = file_open_root(path.dentry, path.mnt, "", open_flag, 0);
 	if (IS_ERR(file)) {
 		put_unused_fd(fd);
 		retval =  PTR_ERR(file);
diff --git a/fs/open.c b/fs/open.c
index b6f1e96a7c0b..6a24f988d253 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -995,14 +995,12 @@ struct file *filp_open(const char *filename, int flags, umode_t mode)
 EXPORT_SYMBOL(filp_open);
 
 struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
-			    const char *filename, int flags)
+			    const char *filename, int flags, umode_t mode)
 {
 	struct open_flags op;
-	int err = build_open_flags(flags, 0, &op);
+	int err = build_open_flags(flags, mode, &op);
 	if (err)
 		return ERR_PTR(err);
-	if (flags & O_CREAT)
-		return ERR_PTR(-EINVAL);
 	return do_file_open_root(dentry, mnt, filename, &op);
 }
 EXPORT_SYMBOL(file_open_root);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3aa514254161..22c5a0cf16e3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2217,7 +2217,7 @@ extern long do_sys_open(int dfd, const char __user *filename, int flags,
 extern struct file *file_open_name(struct filename *, int, umode_t);
 extern struct file *filp_open(const char *, int, umode_t);
 extern struct file *file_open_root(struct dentry *, struct vfsmount *,
-				   const char *, int);
+				   const char *, int, umode_t);
 extern struct file * dentry_open(const struct path *, int, const struct cred *);
 extern int filp_close(struct file *, fl_owner_t id);
 
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 7e7746a42a62..10a1d7dc9313 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1321,7 +1321,7 @@ static ssize_t binary_sysctl(const int *name, int nlen,
 	}
 
 	mnt = task_active_pid_ns(current)->proc_mnt;
-	file = file_open_root(mnt->mnt_root, mnt, pathname, flags);
+	file = file_open_root(mnt->mnt_root, mnt, pathname, flags, 0);
 	result = PTR_ERR(file);
 	if (IS_ERR(file))
 		goto out_putname;

From 91b95d59aa38ca5a1b9835c108ae1aed533e5e76 Mon Sep 17 00:00:00 2001
From: Aurelien Jacquiot <a-jacquiot@ti.com>
Date: Tue, 22 Mar 2016 14:25:42 -0700
Subject: [PATCH 253/797] rapidio/rionet: fix deadlock on SMP

commit 36915976eca58f2eefa040ba8f9939672564df61 upstream.

Fix deadlocking during concurrent receive and transmit operations on SMP
platforms caused by the use of incorrect lock: on transmit 'tx_lock'
spinlock should be used instead of 'lock' which is used for receive
operation.

This fix is applicable to kernel versions starting from v2.15.

Signed-off-by: Aurelien Jacquiot <a-jacquiot@ti.com>
Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Andre van Herk <andre.van.herk@prodrive-technologies.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/rionet.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
index 01f08a7751f7..e7034c55e796 100644
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c
@@ -280,7 +280,7 @@ static void rionet_outb_msg_event(struct rio_mport *mport, void *dev_id, int mbo
 	struct net_device *ndev = dev_id;
 	struct rionet_private *rnet = netdev_priv(ndev);
 
-	spin_lock(&rnet->lock);
+	spin_lock(&rnet->tx_lock);
 
 	if (netif_msg_intr(rnet))
 		printk(KERN_INFO
@@ -299,7 +299,7 @@ static void rionet_outb_msg_event(struct rio_mport *mport, void *dev_id, int mbo
 	if (rnet->tx_cnt < RIONET_TX_RING_SIZE)
 		netif_wake_queue(ndev);
 
-	spin_unlock(&rnet->lock);
+	spin_unlock(&rnet->tx_lock);
 }
 
 static int rionet_open(struct net_device *ndev)

From a918d2bcea6aab6e671bfb0901cbecc3cf68fca1 Mon Sep 17 00:00:00 2001
From: Insu Yun <wuninsu@gmail.com>
Date: Wed, 6 Jan 2016 12:44:01 -0500
Subject: [PATCH 254/797] ipr: Fix out-of-bounds null overwrite

commit d63c7dd5bcb9441af0526d370c43a65ca2c980d9 upstream.

Return value of snprintf is not bound by size value, 2nd argument.
(https://www.kernel.org/doc/htmldocs/kernel-api/API-snprintf.html).
Return value is number of printed chars, can be larger than 2nd
argument.  Therefore, it can write null byte out of bounds ofbuffer.
Since snprintf puts null, it does not need to put additional null byte.

Signed-off-by: Insu Yun <wuninsu@gmail.com>
Reviewed-by: Shane Seymour <shane.seymour@hpe.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/ipr.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 536cd5a80422..1c3759bab80b 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -4003,13 +4003,12 @@ static ssize_t ipr_store_update_fw(struct device *dev,
 	struct ipr_sglist *sglist;
 	char fname[100];
 	char *src;
-	int len, result, dnld_size;
+	int result, dnld_size;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
 
-	len = snprintf(fname, 99, "%s", buf);
-	fname[len-1] = '\0';
+	snprintf(fname, sizeof(fname), "%s", buf);
 
 	if (request_firmware(&fw_entry, fname, &ioa_cfg->pdev->dev)) {
 		dev_err(&ioa_cfg->pdev->dev, "Firmware file %s not found\n", fname);

From 6677a2ab036f28134b60ad4ed6fd2e72db579b8a Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@linux.vnet.ibm.com>
Date: Thu, 25 Feb 2016 13:54:20 -0300
Subject: [PATCH 255/797] ipr: Fix regression when loading firmware

commit 21b81716c6bff24cda52dc75588455f879ddbfe9 upstream.

Commit d63c7dd5bcb9 ("ipr: Fix out-of-bounds null overwrite") removed
the end of line handling when storing the update_fw sysfs attribute.
This changed the userpace API because it started refusing writes
terminated by a line feed, which broke the update tools we already have.

This patch re-adds that handling, so both a write terminated by a line
feed or not can make it through with the update.

Fixes: d63c7dd5bcb9 ("ipr: Fix out-of-bounds null overwrite")
Signed-off-by: Gabriel Krisman Bertazi <krisman@linux.vnet.ibm.com>
Cc: Insu Yun <wuninsu@gmail.com>
Acked-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/ipr.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 1c3759bab80b..43ac62623bf2 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -4003,6 +4003,7 @@ static ssize_t ipr_store_update_fw(struct device *dev,
 	struct ipr_sglist *sglist;
 	char fname[100];
 	char *src;
+	char *endline;
 	int result, dnld_size;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -4010,6 +4011,10 @@ static ssize_t ipr_store_update_fw(struct device *dev,
 
 	snprintf(fname, sizeof(fname), "%s", buf);
 
+	endline = strchr(fname, '\n');
+	if (endline)
+		*endline = '\0';
+
 	if (request_firmware(&fw_entry, fname, &ioa_cfg->pdev->dev)) {
 		dev_err(&ioa_cfg->pdev->dev, "Firmware file %s not found\n", fname);
 		return -EIO;

From 0ae6554c2bbfa89218cf3a6e8d8d10581334f551 Mon Sep 17 00:00:00 2001
From: Matti Gottlieb <matti.gottlieb@intel.com>
Date: Sun, 14 Feb 2016 17:05:39 +0200
Subject: [PATCH 256/797] iwlwifi: mvm: Fix paging memory leak

commit 905e36ae172c83a30894a3adefab7d4f850fcf54 upstream.

If the opmode is stopped and started again we did not free
the paging buffers. Fix that.
In addition when freeing the firmware's paging download
buffer, set the pointer to NULL.

Signed-off-by: Matti Gottlieb <matti.gottlieb@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/iwlwifi/mvm/fw.c  | 4 +++-
 drivers/net/wireless/iwlwifi/mvm/mvm.h | 3 +++
 drivers/net/wireless/iwlwifi/mvm/ops.c | 2 ++
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/iwlwifi/mvm/fw.c b/drivers/net/wireless/iwlwifi/mvm/fw.c
index d906fa13ba97..610c442c7ab2 100644
--- a/drivers/net/wireless/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/iwlwifi/mvm/fw.c
@@ -106,7 +106,7 @@ static int iwl_send_tx_ant_cfg(struct iwl_mvm *mvm, u8 valid_tx_ant)
 				    sizeof(tx_ant_cmd), &tx_ant_cmd);
 }
 
-static void iwl_free_fw_paging(struct iwl_mvm *mvm)
+void iwl_free_fw_paging(struct iwl_mvm *mvm)
 {
 	int i;
 
@@ -126,6 +126,8 @@ static void iwl_free_fw_paging(struct iwl_mvm *mvm)
 			     get_order(mvm->fw_paging_db[i].fw_paging_size));
 	}
 	kfree(mvm->trans->paging_download_buf);
+	mvm->trans->paging_download_buf = NULL;
+
 	memset(mvm->fw_paging_db, 0, sizeof(mvm->fw_paging_db));
 }
 
diff --git a/drivers/net/wireless/iwlwifi/mvm/mvm.h b/drivers/net/wireless/iwlwifi/mvm/mvm.h
index 4bde2d027dcd..244e26c26821 100644
--- a/drivers/net/wireless/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/iwlwifi/mvm/mvm.h
@@ -1190,6 +1190,9 @@ void iwl_mvm_rx_umac_scan_complete_notif(struct iwl_mvm *mvm,
 void iwl_mvm_rx_umac_scan_iter_complete_notif(struct iwl_mvm *mvm,
 					      struct iwl_rx_cmd_buffer *rxb);
 
+/* Paging */
+void iwl_free_fw_paging(struct iwl_mvm *mvm);
+
 /* MVM debugfs */
 #ifdef CONFIG_IWLWIFI_DEBUGFS
 int iwl_mvm_dbgfs_register(struct iwl_mvm *mvm, struct dentry *dbgfs_dir);
diff --git a/drivers/net/wireless/iwlwifi/mvm/ops.c b/drivers/net/wireless/iwlwifi/mvm/ops.c
index 13c97f665ba8..c3adf2bcdc85 100644
--- a/drivers/net/wireless/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/iwlwifi/mvm/ops.c
@@ -645,6 +645,8 @@ static void iwl_op_mode_mvm_stop(struct iwl_op_mode *op_mode)
 	for (i = 0; i < NVM_MAX_NUM_SECTIONS; i++)
 		kfree(mvm->nvm_sections[i].data);
 
+	iwl_free_fw_paging(mvm);
+
 	iwl_mvm_tof_clean(mvm);
 
 	ieee80211_free_hw(mvm->hw);

From 791b5b0d2d01542a87af4b5f8fb2504ce2d5b352 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 2 Mar 2016 11:47:29 -0500
Subject: [PATCH 257/797] drm/radeon: disable runtime pm on PX laptops without
 dGPU power control
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit e64c952efb8e0c15ae82cec8e455ab4910690ef1 upstream.

Some PX laptops don't provide an ACPI method to control dGPU power.  On
those systems, the driver is responsible for handling the dGPU power
state.  Disable runtime PM on them until support for this is implemented.

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/radeon/radeon_atpx_handler.c | 8 ++++----
 drivers/gpu/drm/radeon/radeon_device.c       | 8 +++++++-
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
index c4b4f298a283..9bc408c9f9f6 100644
--- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c
+++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
@@ -62,6 +62,10 @@ bool radeon_has_atpx(void) {
 	return radeon_atpx_priv.atpx_detected;
 }
 
+bool radeon_has_atpx_dgpu_power_cntl(void) {
+	return radeon_atpx_priv.atpx.functions.power_cntl;
+}
+
 /**
  * radeon_atpx_call - call an ATPX method
  *
@@ -141,10 +145,6 @@ static void radeon_atpx_parse_functions(struct radeon_atpx_functions *f, u32 mas
  */
 static int radeon_atpx_validate(struct radeon_atpx *atpx)
 {
-	/* make sure required functions are enabled */
-	/* dGPU power control is required */
-	atpx->functions.power_cntl = true;
-
 	if (atpx->functions.px_params) {
 		union acpi_object *info;
 		struct atpx_px_params output;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index c566993a2ec3..f78f111e68de 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -103,6 +103,12 @@ static const char radeon_family_name[][16] = {
 	"LAST",
 };
 
+#if defined(CONFIG_VGA_SWITCHEROO)
+bool radeon_has_atpx_dgpu_power_cntl(void);
+#else
+static inline bool radeon_has_atpx_dgpu_power_cntl(void) { return false; }
+#endif
+
 #define RADEON_PX_QUIRK_DISABLE_PX  (1 << 0)
 #define RADEON_PX_QUIRK_LONG_WAKEUP (1 << 1)
 
@@ -1433,7 +1439,7 @@ int radeon_device_init(struct radeon_device *rdev,
 	 * ignore it */
 	vga_client_register(rdev->pdev, rdev, NULL, radeon_vga_set_decode);
 
-	if (rdev->flags & RADEON_IS_PX)
+	if ((rdev->flags & RADEON_IS_PX) && radeon_has_atpx_dgpu_power_cntl())
 		runtime = true;
 	vga_switcheroo_register_client(rdev->pdev, &radeon_switcheroo_ops, runtime);
 	if (runtime)

From a4ecd0324b3cdea3abc4b0f5d7e2829530400317 Mon Sep 17 00:00:00 2001
From: Mario Kleiner <mario.kleiner.de@gmail.com>
Date: Sun, 6 Mar 2016 02:39:53 +0100
Subject: [PATCH 258/797] drm/radeon: Don't drop DP 2.7 Ghz link setup on some
 cards.

commit 459ee1c3fd097ab56ababd8ff4bb7ef6a792de33 upstream.

As observed on Apple iMac10,1, DCE-3.2, RV-730,
link rate of 2.7 Ghz is not selected, because
the args.v1.ucConfig flag setting for 2.7 Ghz
gets overwritten by a following assignment of
the transmitter to use.

Move link rate setup a few lines down to fix this.
In practice this didn't have any positive or
negative effect on display setup on the tested
iMac10,1 so i don't know if backporting to stable
makes sense or not.

Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/radeon/atombios_encoders.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
index bb292143997e..adf74f4366bb 100644
--- a/drivers/gpu/drm/radeon/atombios_encoders.c
+++ b/drivers/gpu/drm/radeon/atombios_encoders.c
@@ -892,8 +892,6 @@ atombios_dig_encoder_setup2(struct drm_encoder *encoder, int action, int panel_m
 			else
 				args.v1.ucLaneNum = 4;
 
-			if (ENCODER_MODE_IS_DP(args.v1.ucEncoderMode) && (dp_clock == 270000))
-				args.v1.ucConfig |= ATOM_ENCODER_CONFIG_DPLINKRATE_2_70GHZ;
 			switch (radeon_encoder->encoder_id) {
 			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
 				args.v1.ucConfig = ATOM_ENCODER_CONFIG_V2_TRANSMITTER1;
@@ -910,6 +908,10 @@ atombios_dig_encoder_setup2(struct drm_encoder *encoder, int action, int panel_m
 				args.v1.ucConfig |= ATOM_ENCODER_CONFIG_LINKB;
 			else
 				args.v1.ucConfig |= ATOM_ENCODER_CONFIG_LINKA;
+
+			if (ENCODER_MODE_IS_DP(args.v1.ucEncoderMode) && (dp_clock == 270000))
+				args.v1.ucConfig |= ATOM_ENCODER_CONFIG_DPLINKRATE_2_70GHZ;
+
 			break;
 		case 2:
 		case 3:

From 5b5abb9b85e97630e07b2b6d33f4739a2eb4e872 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 2 Mar 2016 12:10:20 -0500
Subject: [PATCH 259/797] drm/amdgpu: disable runtime pm on PX laptops without
 dGPU power control
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit bedf2a65c1aa8fb29ba8527fd00c0f68ec1f55f1 upstream.

Some PX laptops don't provide an ACPI method to control dGPU power.  On
those systems, the driver is responsible for handling the dGPU power
state.  Disable runtime PM on them until support for this is implemented.

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c | 8 ++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c       | 8 +++++++-
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index 5a8fbadbd27b..8ac49812a716 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -63,6 +63,10 @@ bool amdgpu_has_atpx(void) {
 	return amdgpu_atpx_priv.atpx_detected;
 }
 
+bool amdgpu_has_atpx_dgpu_power_cntl(void) {
+	return amdgpu_atpx_priv.atpx.functions.power_cntl;
+}
+
 /**
  * amdgpu_atpx_call - call an ATPX method
  *
@@ -142,10 +146,6 @@ static void amdgpu_atpx_parse_functions(struct amdgpu_atpx_functions *f, u32 mas
  */
 static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
 {
-	/* make sure required functions are enabled */
-	/* dGPU power control is required */
-	atpx->functions.power_cntl = true;
-
 	if (atpx->functions.px_params) {
 		union acpi_object *info;
 		struct atpx_px_params output;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index c961fe093e12..9d88023df836 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -61,6 +61,12 @@ static const char *amdgpu_asic_name[] = {
 	"LAST",
 };
 
+#if defined(CONFIG_VGA_SWITCHEROO)
+bool amdgpu_has_atpx_dgpu_power_cntl(void);
+#else
+static inline bool amdgpu_has_atpx_dgpu_power_cntl(void) { return false; }
+#endif
+
 bool amdgpu_device_is_px(struct drm_device *dev)
 {
 	struct amdgpu_device *adev = dev->dev_private;
@@ -1469,7 +1475,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
 	if (amdgpu_runtime_pm == 1)
 		runtime = true;
-	if (amdgpu_device_is_px(ddev))
+	if (amdgpu_device_is_px(ddev) && amdgpu_has_atpx_dgpu_power_cntl())
 		runtime = true;
 	vga_switcheroo_register_client(adev->pdev, &amdgpu_switcheroo_ops, runtime);
 	if (runtime)

From 044ebd1d5a691b1638ae622994d02f74f6e43591 Mon Sep 17 00:00:00 2001
From: Ken Wang <Qingqing.Wang@amd.com>
Date: Thu, 17 Mar 2016 17:26:57 +0800
Subject: [PATCH 260/797] drm/amdgpu: include the right version of gmc header
 files for iceland

commit 16a8a49be1b878ef6dd5d1663d456e254e54ae3d upstream.

Signed-off-by: Ken Wang <Qingqing.Wang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 2cf50180cc51..b1c7a9b3631b 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -32,8 +32,8 @@
 #include "oss/oss_2_4_d.h"
 #include "oss/oss_2_4_sh_mask.h"
 
-#include "gmc/gmc_8_1_d.h"
-#include "gmc/gmc_8_1_sh_mask.h"
+#include "gmc/gmc_7_1_d.h"
+#include "gmc/gmc_7_1_sh_mask.h"
 
 #include "gca/gfx_8_0_d.h"
 #include "gca/gfx_8_0_enum.h"

From bcda0fd9a8e18ea72f32542d9a1c4b010f89c5d2 Mon Sep 17 00:00:00 2001
From: Alex Estrin <alex.estrin@intel.com>
Date: Thu, 11 Feb 2016 16:30:51 -0500
Subject: [PATCH 261/797] IB/ipoib: fix for rare multicast join race condition

commit 08bc327629cbd63bb2f66677e4b33b643695097c upstream.

A narrow window for race condition still exist between
multicast join thread and *dev_flush workers.
A kernel crash caused by prolong erratic link state changes
was observed (most likely a faulty cabling):

[167275.656270] BUG: unable to handle kernel NULL pointer dereference at
0000000000000020
[167275.665973] IP: [<ffffffffa05f8f2e>] ipoib_mcast_join+0xae/0x1d0 [ib_ipoib]
[167275.674443] PGD 0
[167275.677373] Oops: 0000 [#1] SMP
...
[167275.977530] Call Trace:
[167275.982225]  [<ffffffffa05f92f0>] ? ipoib_mcast_free+0x200/0x200 [ib_ipoib]
[167275.992024]  [<ffffffffa05fa1b7>] ipoib_mcast_join_task+0x2a7/0x490
[ib_ipoib]
[167276.002149]  [<ffffffff8109d5fb>] process_one_work+0x17b/0x470
[167276.010754]  [<ffffffff8109e3cb>] worker_thread+0x11b/0x400
[167276.019088]  [<ffffffff8109e2b0>] ? rescuer_thread+0x400/0x400
[167276.027737]  [<ffffffff810a5aef>] kthread+0xcf/0xe0
Here was a hit spot:
ipoib_mcast_join() {
..............
      rec.qkey      = priv->broadcast->mcmember.qkey;
                                       ^^^^^^^
.....
 }
Proposed patch should prevent multicast join task to continue
if link state change is detected.

Signed-off-by: Alex Estrin <alex.estrin@intel.com>
Changes from v4:
- as suggested by Doug Ledford, optimized spinlock usage,
i.e. ipoib_mcast_join() is called with lock held.
Changes from v3:
- sync with priv->lock before flag check.
Chages from v2:
- Move check for OPER_UP flag state to mcast_join() to
ensure no event worker is in progress.
- minor style fixes.
Changes from v1:
- No need to lock again if error detected.
Signed-off-by: Doug Ledford <dledford@redhat.com>
Cc: Nikolay Borisov <kernel@kyup.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../infiniband/ulp/ipoib/ipoib_multicast.c    | 24 +++++++++++++------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index f357ca67a41c..87799de90a1d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -456,7 +456,10 @@ static int ipoib_mcast_join_complete(int status,
 	return status;
 }
 
-static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
+/*
+ * Caller must hold 'priv->lock'
+ */
+static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ib_sa_multicast *multicast;
@@ -466,6 +469,10 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
 	ib_sa_comp_mask comp_mask;
 	int ret = 0;
 
+	if (!priv->broadcast ||
+	    !test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
+		return -EINVAL;
+
 	ipoib_dbg_mcast(priv, "joining MGID %pI6\n", mcast->mcmember.mgid.raw);
 
 	rec.mgid     = mcast->mcmember.mgid;
@@ -525,20 +532,23 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
 			rec.join_state = 4;
 #endif
 	}
+	spin_unlock_irq(&priv->lock);
 
 	multicast = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
 					 &rec, comp_mask, GFP_KERNEL,
 					 ipoib_mcast_join_complete, mcast);
+	spin_lock_irq(&priv->lock);
 	if (IS_ERR(multicast)) {
 		ret = PTR_ERR(multicast);
 		ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
-		spin_lock_irq(&priv->lock);
 		/* Requeue this join task with a backoff delay */
 		__ipoib_mcast_schedule_join_thread(priv, mcast, 1);
 		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
 		spin_unlock_irq(&priv->lock);
 		complete(&mcast->done);
+		spin_lock_irq(&priv->lock);
 	}
+	return 0;
 }
 
 void ipoib_mcast_join_task(struct work_struct *work)
@@ -620,9 +630,10 @@ void ipoib_mcast_join_task(struct work_struct *work)
 				/* Found the next unjoined group */
 				init_completion(&mcast->done);
 				set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
-				spin_unlock_irq(&priv->lock);
-				ipoib_mcast_join(dev, mcast);
-				spin_lock_irq(&priv->lock);
+				if (ipoib_mcast_join(dev, mcast)) {
+					spin_unlock_irq(&priv->lock);
+					return;
+				}
 			} else if (!delay_until ||
 				 time_before(mcast->delay_until, delay_until))
 				delay_until = mcast->delay_until;
@@ -641,10 +652,9 @@ void ipoib_mcast_join_task(struct work_struct *work)
 	if (mcast) {
 		init_completion(&mcast->done);
 		set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+		ipoib_mcast_join(dev, mcast);
 	}
 	spin_unlock_irq(&priv->lock);
-	if (mcast)
-		ipoib_mcast_join(dev, mcast);
 }
 
 int ipoib_mcast_start_thread(struct net_device *dev)

From aa60f652eeebb8345ed8c543d1a8d6b71f902de3 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Fri, 18 Mar 2016 12:27:43 -0400
Subject: [PATCH 262/797] tracing: Have preempt(irqs)off trace preempt disabled
 functions

commit cb86e05390debcc084cfdb0a71ed4c5dbbec517d upstream.

Joel Fernandes reported that the function tracing of preempt disabled
sections was not being reported when running either the preemptirqsoff or
preemptoff tracers. This was due to the fact that the function tracer
callback for those tracers checked if irqs were disabled before tracing. But
this fails when we want to trace preempt off locations as well.

Joel explained that he wanted to see funcitons where interrupts are enabled
but preemption was disabled. The expected output he wanted:

   <...>-2265    1d.h1 3419us : preempt_count_sub <-irq_exit
   <...>-2265    1d..1 3419us : __do_softirq <-irq_exit
   <...>-2265    1d..1 3419us : msecs_to_jiffies <-__do_softirq
   <...>-2265    1d..1 3420us : irqtime_account_irq <-__do_softirq
   <...>-2265    1d..1 3420us : __local_bh_disable_ip <-__do_softirq
   <...>-2265    1..s1 3421us : run_timer_softirq <-__do_softirq
   <...>-2265    1..s1 3421us : hrtimer_run_pending <-run_timer_softirq
   <...>-2265    1..s1 3421us : _raw_spin_lock_irq <-run_timer_softirq
   <...>-2265    1d.s1 3422us : preempt_count_add <-_raw_spin_lock_irq
   <...>-2265    1d.s2 3422us : _raw_spin_unlock_irq <-run_timer_softirq
   <...>-2265    1..s2 3422us : preempt_count_sub <-_raw_spin_unlock_irq
   <...>-2265    1..s1 3423us : rcu_bh_qs <-__do_softirq
   <...>-2265    1d.s1 3423us : irqtime_account_irq <-__do_softirq
   <...>-2265    1d.s1 3423us : __local_bh_enable <-__do_softirq

There's a comment saying that the irq disabled check is because there's a
possible race that tracing_cpu may be set when the function is executed. But
I don't remember that race. For now, I added a check for preemption being
enabled too to not record the function, as there would be no race if that
was the case. I need to re-investigate this, as I'm now thinking that the
tracing_cpu will always be correct. But no harm in keeping the check for
now, except for the slight performance hit.

Link: http://lkml.kernel.org/r/1457770386-88717-1-git-send-email-agnel.joel@gmail.com

Fixes: 5e6d2b9cfa3a "tracing: Use one prologue for the preempt irqs off tracer function tracers"
Reported-by: Joel Fernandes <agnel.joel@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/trace/trace_irqsoff.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index e4e56589ec1d..be3222b7d72e 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -109,8 +109,12 @@ static int func_prolog_dec(struct trace_array *tr,
 		return 0;
 
 	local_save_flags(*flags);
-	/* slight chance to get a false positive on tracing_cpu */
-	if (!irqs_disabled_flags(*flags))
+	/*
+	 * Slight chance to get a false positive on tracing_cpu,
+	 * although I'm starting to think there isn't a chance.
+	 * Leave this for now just to be paranoid.
+	 */
+	if (!irqs_disabled_flags(*flags) && !preempt_count())
 		return 0;
 
 	*data = per_cpu_ptr(tr->trace_buffer.data, cpu);

From aab3ba82f8445abe99f33e743e7316d0d55fee7b Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Fri, 18 Mar 2016 15:46:48 -0400
Subject: [PATCH 263/797] tracing: Fix crash from reading trace_pipe with
 sendfile

commit a29054d9478d0435ab01b7544da4f674ab13f533 upstream.

If tracing contains data and the trace_pipe file is read with sendfile(),
then it can trigger a NULL pointer dereference and various BUG_ON within the
VM code.

There's a patch to fix this in the splice_to_pipe() code, but it's also a
good idea to not let that happen from trace_pipe either.

Link: http://lkml.kernel.org/r/1457641146-9068-1-git-send-email-rabin@rab.in

Reported-by: Rabin Vincent <rabin.vincent@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/trace/trace.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d9293402ee68..8305cbb2d5a2 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4949,7 +4949,10 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
 
 	spd.nr_pages = i;
 
-	ret = splice_to_pipe(pipe, &spd);
+	if (i)
+		ret = splice_to_pipe(pipe, &spd);
+	else
+		ret = 0;
 out:
 	splice_shrink_spd(&spd);
 	return ret;

From 3dba3f672dfd0d5d961a294dac5bee18759cda6a Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 22 Mar 2016 17:30:58 -0400
Subject: [PATCH 264/797] tracing: Fix trace_printk() to print when not using
 bprintk()

commit 3debb0a9ddb16526de8b456491b7db60114f7b5e upstream.

The trace_printk() code will allocate extra buffers if the compile detects
that a trace_printk() is used. To do this, the format of the trace_printk()
is saved to the __trace_printk_fmt section, and if that section is bigger
than zero, the buffers are allocated (along with a message that this has
happened).

If trace_printk() uses a format that is not a constant, and thus something
not guaranteed to be around when the print happens, the compiler optimizes
the fmt out, as it is not used, and the __trace_printk_fmt section is not
filled. This means the kernel will not allocate the special buffers needed
for the trace_printk() and the trace_printk() will not write anything to the
tracing buffer.

Adding a "__used" to the variable in the __trace_printk_fmt section will
keep it around, even though it is set to NULL. This will keep the string
from being printed in the debugfs/tracing/printk_formats section as it is
not needed.

Reported-by: Vlastimil Babka <vbabka@suse.cz>
Fixes: 07d777fe8c398 "tracing: Add percpu buffers for trace_printk()"
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kernel.h      | 6 +++---
 kernel/trace/trace_printk.c | 3 +++
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 350dfb08aee3..924853d33a13 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -607,7 +607,7 @@ do {							\
 
 #define do_trace_printk(fmt, args...)					\
 do {									\
-	static const char *trace_printk_fmt				\
+	static const char *trace_printk_fmt __used			\
 		__attribute__((section("__trace_printk_fmt"))) =	\
 		__builtin_constant_p(fmt) ? fmt : NULL;			\
 									\
@@ -651,7 +651,7 @@ int __trace_printk(unsigned long ip, const char *fmt, ...);
  */
 
 #define trace_puts(str) ({						\
-	static const char *trace_printk_fmt				\
+	static const char *trace_printk_fmt __used			\
 		__attribute__((section("__trace_printk_fmt"))) =	\
 		__builtin_constant_p(str) ? str : NULL;			\
 									\
@@ -673,7 +673,7 @@ extern void trace_dump_stack(int skip);
 #define ftrace_vprintk(fmt, vargs)					\
 do {									\
 	if (__builtin_constant_p(fmt)) {				\
-		static const char *trace_printk_fmt			\
+		static const char *trace_printk_fmt __used		\
 		  __attribute__((section("__trace_printk_fmt"))) =	\
 			__builtin_constant_p(fmt) ? fmt : NULL;		\
 									\
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 060df67dbdd1..f96f0383f6c6 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -296,6 +296,9 @@ static int t_show(struct seq_file *m, void *v)
 	const char *str = *fmt;
 	int i;
 
+	if (!*fmt)
+		return 0;
+
 	seq_printf(m, "0x%lx : \"", *(unsigned long *)fmt);
 
 	/*

From 2bfb8435386c44f89753b0417303110ff596dba9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 9 Mar 2016 12:40:54 +0100
Subject: [PATCH 265/797] bitops: Do not default to __clear_bit() for
 __clear_bit_unlock()

commit f75d48644c56a31731d17fa693c8175328957e1d upstream.

__clear_bit_unlock() is a special little snowflake. While it carries the
non-atomic '__' prefix, it is specifically documented to pair with
test_and_set_bit() and therefore should be 'somewhat' atomic.

Therefore the generic implementation of __clear_bit_unlock() cannot use
the fully non-atomic __clear_bit() as a default.

If an arch is able to do better; is must provide an implementation of
__clear_bit_unlock() itself.

Specifically, this came up as a result of hackbench livelock'ing in
slab_lock() on ARC with SMP + SLUB + !LLSC.

The issue was incorrect pairing of atomic ops.

 slab_lock() -> bit_spin_lock() -> test_and_set_bit()
 slab_unlock() -> __bit_spin_unlock() -> __clear_bit()

The non serializing __clear_bit() was getting "lost"

 80543b8e:	ld_s       r2,[r13,0] <--- (A) Finds PG_locked is set
 80543b90:	or         r3,r2,1    <--- (B) other core unlocks right here
 80543b94:	st_s       r3,[r13,0] <--- (C) sets PG_locked (overwrites unlock)

Fixes ARC STAR 9000817404 (and probably more).

Reported-by: Vineet Gupta <Vineet.Gupta1@synopsys.com>
Tested-by: Vineet Gupta <Vineet.Gupta1@synopsys.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Helge Deller <deller@gmx.de>
Cc: James E.J. Bottomley <jejb@parisc-linux.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Noam Camus <noamc@ezchip.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20160309114054.GJ6356@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/asm-generic/bitops/lock.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/asm-generic/bitops/lock.h b/include/asm-generic/bitops/lock.h
index c30266e94806..8ef0ccbf8167 100644
--- a/include/asm-generic/bitops/lock.h
+++ b/include/asm-generic/bitops/lock.h
@@ -29,16 +29,16 @@ do {					\
  * @nr: the bit to set
  * @addr: the address to start counting from
  *
- * This operation is like clear_bit_unlock, however it is not atomic.
- * It does provide release barrier semantics so it can be used to unlock
- * a bit lock, however it would only be used if no other CPU can modify
- * any bits in the memory until the lock is released (a good example is
- * if the bit lock itself protects access to the other bits in the word).
+ * A weaker form of clear_bit_unlock() as used by __bit_lock_unlock(). If all
+ * the bits in the word are protected by this lock some archs can use weaker
+ * ops to safely unlock.
+ *
+ * See for example x86's implementation.
  */
 #define __clear_bit_unlock(nr, addr)	\
 do {					\
-	smp_mb();			\
-	__clear_bit(nr, addr);		\
+	smp_mb__before_atomic();	\
+	clear_bit(nr, addr);		\
 } while (0)
 
 #endif /* _ASM_GENERIC_BITOPS_LOCK_H_ */

From 0a10eaa1a91a738523ce2f9e55cd9f6f655e31f1 Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Thu, 18 Feb 2016 00:16:14 +0100
Subject: [PATCH 266/797] scripts/coccinelle: modernize &

commit 1b669e713f277a4d4b3cec84e13d16544ac8286d upstream.

& is no longer allowed in column 0, since Coccinelle 1.0.4.

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Tested-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Michal Marek <mmarek@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 scripts/coccinelle/iterators/use_after_iter.cocci | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/coccinelle/iterators/use_after_iter.cocci b/scripts/coccinelle/iterators/use_after_iter.cocci
index f085f5968c52..ce8cc9c006e5 100644
--- a/scripts/coccinelle/iterators/use_after_iter.cocci
+++ b/scripts/coccinelle/iterators/use_after_iter.cocci
@@ -123,7 +123,7 @@ list_remove_head(x,c,...)
 |
 sizeof(<+...c...+>)
 |
-&c->member
+ &c->member
 |
 c = E
 |

From a42c9e4f1b3751743dd58773d45d2eb3ee4ca5ac Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Mon, 25 Jan 2016 09:45:47 -0700
Subject: [PATCH 267/797] scripts/kconfig: allow building with make 3.80 again

commit 42f9d3c6888bceef6dc7ba72c77acf47347dcf05 upstream.

Documentation/Changes still lists this as the minimal required version,
so it ought to remain usable for the time being.

Fixes: d2036f30cf ("scripts/kconfig/Makefile: Allow KBUILD_DEFCONFIG to be a target")
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Michal Marek <mmarek@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 scripts/kconfig/Makefile | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile
index d79cba4ce3eb..ebced77deb9c 100644
--- a/scripts/kconfig/Makefile
+++ b/scripts/kconfig/Makefile
@@ -96,13 +96,15 @@ savedefconfig: $(obj)/conf
 defconfig: $(obj)/conf
 ifeq ($(KBUILD_DEFCONFIG),)
 	$< $(silent) --defconfig $(Kconfig)
-else ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/configs/$(KBUILD_DEFCONFIG)),)
+else
+ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/configs/$(KBUILD_DEFCONFIG)),)
 	@$(kecho) "*** Default configuration is based on '$(KBUILD_DEFCONFIG)'"
 	$(Q)$< $(silent) --defconfig=arch/$(SRCARCH)/configs/$(KBUILD_DEFCONFIG) $(Kconfig)
 else
 	@$(kecho) "*** Default configuration is based on target '$(KBUILD_DEFCONFIG)'"
 	$(Q)$(MAKE) -f $(srctree)/Makefile $(KBUILD_DEFCONFIG)
 endif
+endif
 
 %_defconfig: $(obj)/conf
 	$(Q)$< $(silent) --defconfig=arch/$(SRCARCH)/configs/$@ $(Kconfig)

From f24fe0da244641bc4a19cff1cb7c7cb3dd50b88a Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Fri, 26 Feb 2016 16:15:17 +0100
Subject: [PATCH 268/797] kbuild/mkspec: fix grub2 installkernel issue

commit c8b08ca558c0067bc9e15ce3f1e70af260410bb2 upstream.

mkspec is copying built kernel to temporrary location

	/boot/vmlinuz-$KERNELRELEASE-rpm

and runs installkernel on it. This however directly leads to grub2
menuentry for this suffixed binary being generated as well during the run
of installkernel script.

Later in the process the temporary -rpm suffixed files are removed, and
therefore we end up with spurious (and non-functional) grub2 menu entries
for each installed kernel RPM.

Fix that by using a different temporary name (prefixed by '.'), so that
the binary is not recognized as an actual kernel binary and no menuentry
is created for it.

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Fixes: 3c9c7a14b627 ("rpm-pkg: add %post section to create initramfs and grub hooks")
Signed-off-by: Michal Marek <mmarek@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 scripts/package/mkspec | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/package/mkspec b/scripts/package/mkspec
index 71004daefe31..fe44d68e9344 100755
--- a/scripts/package/mkspec
+++ b/scripts/package/mkspec
@@ -131,11 +131,11 @@ echo 'rm -rf $RPM_BUILD_ROOT'
 echo ""
 echo "%post"
 echo "if [ -x /sbin/installkernel -a -r /boot/vmlinuz-$KERNELRELEASE -a -r /boot/System.map-$KERNELRELEASE ]; then"
-echo "cp /boot/vmlinuz-$KERNELRELEASE /boot/vmlinuz-$KERNELRELEASE-rpm"
-echo "cp /boot/System.map-$KERNELRELEASE /boot/System.map-$KERNELRELEASE-rpm"
+echo "cp /boot/vmlinuz-$KERNELRELEASE /boot/.vmlinuz-$KERNELRELEASE-rpm"
+echo "cp /boot/System.map-$KERNELRELEASE /boot/.System.map-$KERNELRELEASE-rpm"
 echo "rm -f /boot/vmlinuz-$KERNELRELEASE /boot/System.map-$KERNELRELEASE"
-echo "/sbin/installkernel $KERNELRELEASE /boot/vmlinuz-$KERNELRELEASE-rpm /boot/System.map-$KERNELRELEASE-rpm"
-echo "rm -f /boot/vmlinuz-$KERNELRELEASE-rpm /boot/System.map-$KERNELRELEASE-rpm"
+echo "/sbin/installkernel $KERNELRELEASE /boot/.vmlinuz-$KERNELRELEASE-rpm /boot/.System.map-$KERNELRELEASE-rpm"
+echo "rm -f /boot/.vmlinuz-$KERNELRELEASE-rpm /boot/.System.map-$KERNELRELEASE-rpm"
 echo "fi"
 echo ""
 echo "%files"

From 507fafe0e83fcc011bcb23af37179b7d412624b3 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 22 Mar 2016 15:11:03 -0700
Subject: [PATCH 269/797] MAINTAINERS: Update mailing list and web page for
 hwmon subsystem

commit 968ce1b1f45a7d76b5471b19bd035dbecc72f32d upstream.

The old web page for the hwmon subsystem is no longer operational,
and the mailing list has become unreliable. Move both to kernel.org.

Reviewed-by: Jean Delvare <jdelvare@suse.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 MAINTAINERS | 96 ++++++++++++++++++++++++++---------------------------
 1 file changed, 48 insertions(+), 48 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index d826f1b9eb02..4c3e1d2ac31b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -230,13 +230,13 @@ F:	kernel/sys_ni.c
 
 ABIT UGURU 1,2 HARDWARE MONITOR DRIVER
 M:	Hans de Goede <hdegoede@redhat.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	drivers/hwmon/abituguru.c
 
 ABIT UGURU 3 HARDWARE MONITOR DRIVER
 M:	Alistair John Strachan <alistair@devzero.co.uk>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	drivers/hwmon/abituguru3.c
 
@@ -373,14 +373,14 @@ S:	Maintained
 
 ADM1025 HARDWARE MONITOR DRIVER
 M:	Jean Delvare <jdelvare@suse.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/adm1025
 F:	drivers/hwmon/adm1025.c
 
 ADM1029 HARDWARE MONITOR DRIVER
 M:	Corentin Labbe <clabbe.montjoie@gmail.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	drivers/hwmon/adm1029.c
 
@@ -425,7 +425,7 @@ F:	drivers/video/backlight/adp8860_bl.c
 
 ADS1015 HARDWARE MONITOR DRIVER
 M:	Dirk Eibach <eibach@gdsys.de>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/ads1015
 F:	drivers/hwmon/ads1015.c
@@ -438,7 +438,7 @@ F:	drivers/macintosh/therm_adt746x.c
 
 ADT7475 HARDWARE MONITOR DRIVER
 M:	Jean Delvare <jdelvare@suse.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/adt7475
 F:	drivers/hwmon/adt7475.c
@@ -615,7 +615,7 @@ F:	include/linux/ccp.h
 
 AMD FAM15H PROCESSOR POWER MONITORING DRIVER
 M:	Andreas Herrmann <herrmann.der.user@googlemail.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/fam15h_power
 F:	drivers/hwmon/fam15h_power.c
@@ -779,7 +779,7 @@ F:	drivers/input/mouse/bcm5974.c
 
 APPLE SMC DRIVER
 M:	Henrik Rydberg <rydberg@bitmath.org>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Odd fixes
 F:	drivers/hwmon/applesmc.c
 
@@ -1777,7 +1777,7 @@ F:	include/media/as3645a.h
 
 ASC7621 HARDWARE MONITOR DRIVER
 M:	George Joseph <george.joseph@fairview5.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/asc7621
 F:	drivers/hwmon/asc7621.c
@@ -1864,7 +1864,7 @@ F:	drivers/net/wireless/ath/carl9170/
 
 ATK0110 HWMON DRIVER
 M:	Luca Tettamanti <kronos.it@gmail.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	drivers/hwmon/asus_atk0110.c
 
@@ -2984,7 +2984,7 @@ F:	mm/swap_cgroup.c
 
 CORETEMP HARDWARE MONITORING DRIVER
 M:	Fenghua Yu <fenghua.yu@intel.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/coretemp
 F:	drivers/hwmon/coretemp.c
@@ -3549,7 +3549,7 @@ T:	git git://git.infradead.org/users/vkoul/slave-dma.git
 
 DME1737 HARDWARE MONITOR DRIVER
 M:	Juerg Haefliger <juergh@gmail.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/dme1737
 F:	drivers/hwmon/dme1737.c
@@ -4262,7 +4262,7 @@ F:	include/video/exynos_mipi*
 
 F71805F HARDWARE MONITORING DRIVER
 M:	Jean Delvare <jdelvare@suse.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/f71805f
 F:	drivers/hwmon/f71805f.c
@@ -4341,7 +4341,7 @@ F:	fs/*
 
 FINTEK F75375S HARDWARE MONITOR AND FAN CONTROLLER DRIVER
 M:	Riku Voipio <riku.voipio@iki.fi>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	drivers/hwmon/f75375s.c
 F:	include/linux/f75375s.h
@@ -4883,8 +4883,8 @@ F:	drivers/media/usb/hackrf/
 HARDWARE MONITORING
 M:	Jean Delvare <jdelvare@suse.com>
 M:	Guenter Roeck <linux@roeck-us.net>
-L:	lm-sensors@lm-sensors.org
-W:	http://www.lm-sensors.org/
+L:	linux-hwmon@vger.kernel.org
+W:	http://hwmon.wiki.kernel.org/
 T:	quilt http://jdelvare.nerim.net/devel/linux/jdelvare-hwmon/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging.git
 S:	Maintained
@@ -5393,7 +5393,7 @@ F:	drivers/usb/atm/ueagle-atm.c
 
 INA209 HARDWARE MONITOR DRIVER
 M:	Guenter Roeck <linux@roeck-us.net>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/ina209
 F:	Documentation/devicetree/bindings/i2c/ina209.txt
@@ -5401,7 +5401,7 @@ F:	drivers/hwmon/ina209.c
 
 INA2XX HARDWARE MONITOR DRIVER
 M:	Guenter Roeck <linux@roeck-us.net>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/ina2xx
 F:	drivers/hwmon/ina2xx.c
@@ -5884,7 +5884,7 @@ F:	drivers/isdn/hardware/eicon/
 
 IT87 HARDWARE MONITORING DRIVER
 M:	Jean Delvare <jdelvare@suse.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/it87
 F:	drivers/hwmon/it87.c
@@ -5920,7 +5920,7 @@ F:	drivers/media/dvb-frontends/ix2505v*
 
 JC42.4 TEMPERATURE SENSOR DRIVER
 M:	Guenter Roeck <linux@roeck-us.net>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	drivers/hwmon/jc42.c
 F:	Documentation/hwmon/jc42
@@ -5970,14 +5970,14 @@ F:	drivers/tty/serial/jsm/
 
 K10TEMP HARDWARE MONITORING DRIVER
 M:	Clemens Ladisch <clemens@ladisch.de>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/k10temp
 F:	drivers/hwmon/k10temp.c
 
 K8TEMP HARDWARE MONITORING DRIVER
 M:	Rudolf Marek <r.marek@assembler.cz>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/k8temp
 F:	drivers/hwmon/k8temp.c
@@ -6485,27 +6485,27 @@ F:	net/llc/
 
 LM73 HARDWARE MONITOR DRIVER
 M:	Guillaume Ligneul <guillaume.ligneul@gmail.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	drivers/hwmon/lm73.c
 
 LM78 HARDWARE MONITOR DRIVER
 M:	Jean Delvare <jdelvare@suse.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/lm78
 F:	drivers/hwmon/lm78.c
 
 LM83 HARDWARE MONITOR DRIVER
 M:	Jean Delvare <jdelvare@suse.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/lm83
 F:	drivers/hwmon/lm83.c
 
 LM90 HARDWARE MONITOR DRIVER
 M:	Jean Delvare <jdelvare@suse.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/lm90
 F:	Documentation/devicetree/bindings/hwmon/lm90.txt
@@ -6513,7 +6513,7 @@ F:	drivers/hwmon/lm90.c
 
 LM95234 HARDWARE MONITOR DRIVER
 M:	Guenter Roeck <linux@roeck-us.net>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/lm95234
 F:	drivers/hwmon/lm95234.c
@@ -6580,7 +6580,7 @@ F:	drivers/scsi/sym53c8xx_2/
 
 LTC4261 HARDWARE MONITOR DRIVER
 M:	Guenter Roeck <linux@roeck-us.net>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/ltc4261
 F:	drivers/hwmon/ltc4261.c
@@ -6749,28 +6749,28 @@ F:	include/uapi/linux/matroxfb.h
 
 MAX16065 HARDWARE MONITOR DRIVER
 M:	Guenter Roeck <linux@roeck-us.net>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/max16065
 F:	drivers/hwmon/max16065.c
 
 MAX20751 HARDWARE MONITOR DRIVER
 M:	Guenter Roeck <linux@roeck-us.net>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/max20751
 F:	drivers/hwmon/max20751.c
 
 MAX6650 HARDWARE MONITOR AND FAN CONTROLLER DRIVER
 M:	"Hans J. Koch" <hjk@hansjkoch.de>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/max6650
 F:	drivers/hwmon/max6650.c
 
 MAX6697 HARDWARE MONITOR DRIVER
 M:	Guenter Roeck <linux@roeck-us.net>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/max6697
 F:	Documentation/devicetree/bindings/i2c/max6697.txt
@@ -7303,7 +7303,7 @@ F:	drivers/scsi/NCR_D700.*
 
 NCT6775 HARDWARE MONITOR DRIVER
 M:	Guenter Roeck <linux@roeck-us.net>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/nct6775
 F:	drivers/hwmon/nct6775.c
@@ -8064,7 +8064,7 @@ F:	drivers/video/logo/logo_parisc*
 
 PC87360 HARDWARE MONITORING DRIVER
 M:	Jim Cromie <jim.cromie@gmail.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/pc87360
 F:	drivers/hwmon/pc87360.c
@@ -8076,7 +8076,7 @@ F:	drivers/char/pc8736x_gpio.c
 
 PC87427 HARDWARE MONITORING DRIVER
 M:	Jean Delvare <jdelvare@suse.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/pc87427
 F:	drivers/hwmon/pc87427.c
@@ -8415,8 +8415,8 @@ F:	drivers/rtc/rtc-puv3.c
 
 PMBUS HARDWARE MONITORING DRIVERS
 M:	Guenter Roeck <linux@roeck-us.net>
-L:	lm-sensors@lm-sensors.org
-W:	http://www.lm-sensors.org/
+L:	linux-hwmon@vger.kernel.org
+W:	http://hwmon.wiki.kernel.org/
 W:	http://www.roeck-us.net/linux/drivers/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging.git
 S:	Maintained
@@ -8610,7 +8610,7 @@ F:	drivers/media/usb/pwc/*
 
 PWM FAN DRIVER
 M:	Kamil Debski <k.debski@samsung.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Supported
 F:	Documentation/devicetree/bindings/hwmon/pwm-fan.txt
 F:	Documentation/hwmon/pwm-fan
@@ -9882,28 +9882,28 @@ F:	Documentation/devicetree/bindings/media/i2c/nokia,smia.txt
 
 SMM665 HARDWARE MONITOR DRIVER
 M:	Guenter Roeck <linux@roeck-us.net>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/smm665
 F:	drivers/hwmon/smm665.c
 
 SMSC EMC2103 HARDWARE MONITOR DRIVER
 M:	Steve Glendinning <steve.glendinning@shawell.net>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/emc2103
 F:	drivers/hwmon/emc2103.c
 
 SMSC SCH5627 HARDWARE MONITOR DRIVER
 M:	Hans de Goede <hdegoede@redhat.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Supported
 F:	Documentation/hwmon/sch5627
 F:	drivers/hwmon/sch5627.c
 
 SMSC47B397 HARDWARE MONITOR DRIVER
 M:	Jean Delvare <jdelvare@suse.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/smsc47b397
 F:	drivers/hwmon/smsc47b397.c
@@ -10830,7 +10830,7 @@ F:	include/linux/mmc/sh_mobile_sdhi.h
 
 TMP401 HARDWARE MONITOR DRIVER
 M:	Guenter Roeck <linux@roeck-us.net>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/tmp401
 F:	drivers/hwmon/tmp401.c
@@ -11564,14 +11564,14 @@ F:	Documentation/networking/vrf.txt
 
 VT1211 HARDWARE MONITOR DRIVER
 M:	Juerg Haefliger <juergh@gmail.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/vt1211
 F:	drivers/hwmon/vt1211.c
 
 VT8231 HARDWARE MONITOR DRIVER
 M:	Roger Lucas <vt8231@hiddenengine.co.uk>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	drivers/hwmon/vt8231.c
 
@@ -11590,21 +11590,21 @@ F:	drivers/w1/
 
 W83791D HARDWARE MONITORING DRIVER
 M:	Marc Hulsman <m.hulsman@tudelft.nl>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/w83791d
 F:	drivers/hwmon/w83791d.c
 
 W83793 HARDWARE MONITORING DRIVER
 M:	Rudolf Marek <r.marek@assembler.cz>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/w83793
 F:	drivers/hwmon/w83793.c
 
 W83795 HARDWARE MONITORING DRIVER
 M:	Jean Delvare <jdelvare@suse.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	drivers/hwmon/w83795.c
 

From 65b55d179b275cec668b2fffa23f884b55575726 Mon Sep 17 00:00:00 2001
From: John Dahlstrom <jodarom@SDF.ORG>
Date: Sat, 27 Feb 2016 00:09:58 -0600
Subject: [PATCH 270/797] ideapad-laptop: Add ideapad Y700 (15) to the
 no_hw_rfkill DMI list

commit 4db9675d927a71faa66e5ab128d2390d6329750b upstream.

Some Lenovo ideapad models lack a physical rfkill switch.
On Lenovo models ideapad Y700 Touch-15ISK and ideapad Y700-15ISK,
ideapad-laptop would wrongly report all radios as blocked by
hardware which caused wireless network connections to fail.

Add these models without an rfkill switch to the no_hw_rfkill list.

Signed-off-by: John Dahlstrom <jodarom@sdf.org>
Cc: <stable@vger.kernel.org> # 3.17.x-: 4fa9dab: ideapad_laptop: Lenovo G50-30 fix rfkill reports wireless blocked
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/platform/x86/ideapad-laptop.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index d78ee151c9e4..be3bc2f4edd4 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -864,6 +864,20 @@ static const struct dmi_system_id no_hw_rfkill_list[] = {
 			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo G50-30"),
 		},
 	},
+	{
+		.ident = "Lenovo ideapad Y700-15ISK",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad Y700-15ISK"),
+		},
+	},
+	{
+		.ident = "Lenovo ideapad Y700 Touch-15ISK",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad Y700 Touch-15ISK"),
+		},
+	},
 	{
 		.ident = "Lenovo ideapad Y700-17ISK",
 		.matches = {

From d69bc6d28c9662bde7230d88e405c8694d05d94c Mon Sep 17 00:00:00 2001
From: Shawn Lin <shawn.lin@rock-chips.com>
Date: Wed, 16 Mar 2016 18:15:47 +0800
Subject: [PATCH 271/797] mmc: block: fix ABI regression of mmc_blk_ioctl

commit 83c742c344c08c2bbe338d45c6ec63110e9d5e3d upstream.

If mmc_blk_ioctl returns -EINVAL, blkdev_ioctl continues to
work without returning err to user-space. But now we check
CAP_SYS_RAWIO firstly, so we return -EPERM to blkdev_ioctl,
which make blkdev_ioctl return -EPERM to user-space directly.
So this will break all the ioctl with BLKROSET. Now we find
Android-adb suffer it for the following log:

remount of /system failed;
couldn't make block device writable: Operation not permitted
openat(AT_FDCWD, "/dev/block/platform/ff420000.dwmmc/by-name/system", O_RDONLY) = 3
ioctl(3, BLKROSET, 0)  = -1 EPERM (Operation not permitted)

Fixes: a5f5774c55a2 ("mmc: block: Add new ioctl to send multi commands")
Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mmc/card/block.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index d8486168415a..553113eb1bdb 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -596,6 +596,14 @@ static int mmc_blk_ioctl_cmd(struct block_device *bdev,
 	struct mmc_card *card;
 	int err = 0, ioc_err = 0;
 
+	/*
+	 * The caller must have CAP_SYS_RAWIO, and must be calling this on the
+	 * whole block device, not on a partition.  This prevents overspray
+	 * between sibling partitions.
+	 */
+	if ((!capable(CAP_SYS_RAWIO)) || (bdev != bdev->bd_contains))
+		return -EPERM;
+
 	idata = mmc_blk_ioctl_copy_from_user(ic_ptr);
 	if (IS_ERR(idata))
 		return PTR_ERR(idata);
@@ -638,6 +646,14 @@ static int mmc_blk_ioctl_multi_cmd(struct block_device *bdev,
 	int i, err = 0, ioc_err = 0;
 	__u64 num_of_cmds;
 
+	/*
+	 * The caller must have CAP_SYS_RAWIO, and must be calling this on the
+	 * whole block device, not on a partition.  This prevents overspray
+	 * between sibling partitions.
+	 */
+	if ((!capable(CAP_SYS_RAWIO)) || (bdev != bdev->bd_contains))
+		return -EPERM;
+
 	if (copy_from_user(&num_of_cmds, &user->num_of_cmds,
 			   sizeof(num_of_cmds)))
 		return -EFAULT;
@@ -693,14 +709,6 @@ static int mmc_blk_ioctl_multi_cmd(struct block_device *bdev,
 static int mmc_blk_ioctl(struct block_device *bdev, fmode_t mode,
 	unsigned int cmd, unsigned long arg)
 {
-	/*
-	 * The caller must have CAP_SYS_RAWIO, and must be calling this on the
-	 * whole block device, not on a partition.  This prevents overspray
-	 * between sibling partitions.
-	 */
-	if ((!capable(CAP_SYS_RAWIO)) || (bdev != bdev->bd_contains))
-		return -EPERM;
-
 	switch (cmd) {
 	case MMC_IOC_CMD:
 		return mmc_blk_ioctl_cmd(bdev,

From 0a060276f0f429bb0402e7fa6581d472934300fb Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm+renesas@opensource.se>
Date: Tue, 16 Feb 2016 13:06:41 +0900
Subject: [PATCH 272/797] mmc: mmc_spi: Add Card Detect comments and fix CD
 GPIO case

commit bcdc9f260bdce09913db1464be9817170d51044a upstream.

This patch fixes the MMC SPI driver from doing polling card detect when a
CD GPIO that supports interrupts is specified using the gpios DT property.

Without this patch the DT node below results in the following output:

 spi_gpio: spi-gpio { /* SD2 @ CN12 */
         compatible = "spi-gpio";
         #address-cells = <1>;
         #size-cells = <0>;
         gpio-sck = <&gpio6 16 GPIO_ACTIVE_HIGH>;
         gpio-mosi = <&gpio6 17 GPIO_ACTIVE_HIGH>;
         gpio-miso = <&gpio6 18 GPIO_ACTIVE_HIGH>;
         num-chipselects = <1>;
         cs-gpios = <&gpio6 21 GPIO_ACTIVE_LOW>;
         status = "okay";

         spi@0 {
                 compatible = "mmc-spi-slot";
                 reg = <0>;
                 voltage-ranges = <3200 3400>;
                 spi-max-frequency = <25000000>;
                 gpios = <&gpio6 22 GPIO_ACTIVE_LOW>;   /* CD */
         };
 };

 # dmesg | grep mmc
 mmc_spi spi32766.0: SD/MMC host mmc0, no WP, no poweroff, cd polling
 mmc0: host does not support reading read-only switch, assuming write-enable
 mmc0: new SDHC card on SPI
 mmcblk0: mmc0:0000 SU04G 3.69 GiB
 mmcblk0: p1

With this patch applied the "cd polling" portion above disappears.

Signed-off-by: Magnus Damm <damm+renesas@opensource.se>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mmc/host/mmc_spi.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index 1c1b45ef3faf..aad3243a48fc 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -1436,6 +1436,12 @@ static int mmc_spi_probe(struct spi_device *spi)
 					     host->pdata->cd_debounce);
 		if (status != 0)
 			goto fail_add_host;
+
+		/* The platform has a CD GPIO signal that may support
+		 * interrupts, so let mmc_gpiod_request_cd_irq() decide
+		 * if polling is needed or not.
+		 */
+		mmc->caps &= ~MMC_CAP_NEEDS_POLL;
 		mmc_gpiod_request_cd_irq(mmc);
 	}
 

From cd3c71fbb75025f89f64bdd87123d7c247b69a96 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Tue, 26 Jan 2016 13:40:58 +0000
Subject: [PATCH 273/797] mmc: sdhci: fix data timeout (part 1)

commit fafcfda9e78cae8796d1799f14e6457790797555 upstream.

The data timeout gives the minimum amount of time that should be
waited before timing out if no data is received from the card.
Simply dividing the nanosecond part by 1000 does not give this
required guarantee, since such a division rounds down.  Use
DIV_ROUND_UP() to give the desired timeout.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Tested-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mmc/host/sdhci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 8814eb6b83bf..14e118fc0097 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -666,7 +666,7 @@ static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_command *cmd)
 	if (!data)
 		target_timeout = cmd->busy_timeout * 1000;
 	else {
-		target_timeout = data->timeout_ns / 1000;
+		target_timeout = DIV_ROUND_UP(data->timeout_ns, 1000);
 		if (host->clock)
 			target_timeout += data->timeout_clks / host->clock;
 	}

From e113935b8fca965d685084c347864ffb18e32f62 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Tue, 26 Jan 2016 13:41:04 +0000
Subject: [PATCH 274/797] mmc: sdhci: fix data timeout (part 2)

commit 7f05538af71c7d30b5fc821cbe9f318edc645961 upstream.

The calculation for the timeout based on the number of card clocks is
incorrect.  The calculation assumed:

	timeout in microseconds = clock cycles / clock in Hz

which is clearly a several orders of magnitude wrong.  Fix this by
multiplying the clock cycles by 1000000 prior to dividing by the Hz
based clock.  Also, as per part 1, ensure that the division rounds
up.

As this needs 64-bit math via do_div(), avoid it if the clock cycles
is zero.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Tested-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mmc/host/sdhci.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 14e118fc0097..c993d392b470 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -667,8 +667,19 @@ static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_command *cmd)
 		target_timeout = cmd->busy_timeout * 1000;
 	else {
 		target_timeout = DIV_ROUND_UP(data->timeout_ns, 1000);
-		if (host->clock)
-			target_timeout += data->timeout_clks / host->clock;
+		if (host->clock && data->timeout_clks) {
+			unsigned long long val;
+
+			/*
+			 * data->timeout_clks is in units of clock cycles.
+			 * host->clock is in Hz.  target_timeout is in us.
+			 * Hence, us = 1000000 * cycles / Hz.  Round up.
+			 */
+			val = 1000000 * data->timeout_clks;
+			if (do_div(val, host->clock))
+				target_timeout++;
+			target_timeout += val;
+		}
 	}
 
 	/*

From 86beab2febf17b3384f9a1cd333ae7c06a31d0a0 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 7 Mar 2016 13:33:55 +0200
Subject: [PATCH 275/797] mmc: sdhci: Fix override of timeout clk wrt
 max_busy_timeout

commit 995136247915c5cee633d55ba23f6eebf67aa567 upstream.

Normally the timeout clock frequency is read from the capabilities
register.  It is also possible to set the value prior to calling
sdhci_add_host() in which case that value will override the
capabilities register value.  However that was being done after
calculating max_busy_timeout so that max_busy_timeout was being
calculated using the wrong value of timeout_clk.

Fix that by moving the override before max_busy_timeout is
calculated.

The result is that the max_busy_timeout and max_discard
increase for BSW devices so that, for example, the time for
mkfs.ext4 on a 64GB eMMC drops from about 1 minute 40 seconds
to about 20 seconds.

Note, in the future, the capabilities setting will be tidied up
and this override won't be used anymore.  However this fix is
needed for stable.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mmc/host/sdhci.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index c993d392b470..1a802af827ed 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -3106,14 +3106,14 @@ int sdhci_add_host(struct sdhci_host *host)
 		if (caps[0] & SDHCI_TIMEOUT_CLK_UNIT)
 			host->timeout_clk *= 1000;
 
+		if (override_timeout_clk)
+			host->timeout_clk = override_timeout_clk;
+
 		mmc->max_busy_timeout = host->ops->get_max_timeout_count ?
 			host->ops->get_max_timeout_count(host) : 1 << 27;
 		mmc->max_busy_timeout /= host->timeout_clk;
 	}
 
-	if (override_timeout_clk)
-		host->timeout_clk = override_timeout_clk;
-
 	mmc->caps |= MMC_CAP_SDIO_IRQ | MMC_CAP_ERASE | MMC_CAP_CMD23;
 	mmc->caps2 |= MMC_CAP2_SDIO_IRQ_NOTHREAD;
 

From 451e4ff10d95a1390c49afcce5d5c339f97b0048 Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko@sntech.de>
Date: Tue, 19 Jan 2016 10:01:08 +0100
Subject: [PATCH 276/797] clk: rockchip: rk3368: fix cpuclk mux bit of big
 cpu-cluster

commit 535ebd428aeb07c3327947281306f2943f2c9faa upstream.

Both clusters have their mux bit in bit 7 of their respective register.
For whatever reason the big cluster currently lists bit 15 which is
definitly wrong.

Fixes: 3536c97a52db ("clk: rockchip: add rk3368 clock controller")
Reported-by: Zhang Qing <zhangqing@rock-chips.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Reviewed-by: zhangqing <zhangqing@rock-chips.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/rockchip/clk-rk3368.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/rockchip/clk-rk3368.c b/drivers/clk/rockchip/clk-rk3368.c
index 7e6b783e6eee..c3b2da08e015 100644
--- a/drivers/clk/rockchip/clk-rk3368.c
+++ b/drivers/clk/rockchip/clk-rk3368.c
@@ -165,7 +165,7 @@ static const struct rockchip_cpuclk_reg_data rk3368_cpuclkb_data = {
 	.core_reg = RK3368_CLKSEL_CON(0),
 	.div_core_shift = 0,
 	.div_core_mask = 0x1f,
-	.mux_core_shift = 15,
+	.mux_core_shift = 7,
 };
 
 static const struct rockchip_cpuclk_reg_data rk3368_cpuclkl_data = {

From 20c736ddb2819e071dcefa087b5d33e19b462e4e Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko@sntech.de>
Date: Tue, 19 Jan 2016 10:09:22 +0100
Subject: [PATCH 277/797] clk: rockchip: rk3368: fix cpuclk core dividers

commit c6d5fe2ca8286f35a79f7345c9378c39d48a1527 upstream.

Similar to commit 9880d4277f6a ("clk: rockchip: fix rk3288 cpuclk core
dividers") it seems the cpuclk dividers are one to high on the rk3368
as well.

And again similar to the previous fix, we opt to make the divider list
contain the values to be written to use the same paradigm for them on all
supported socs.

Fixes: 3536c97a52db ("clk: rockchip: add rk3368 clock controller")
Reported-by: Zhang Qing <zhangqing@rock-chips.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Reviewed-by: zhangqing <zhangqing@rock-chips.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/rockchip/clk-rk3368.c | 40 +++++++++++++++----------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/drivers/clk/rockchip/clk-rk3368.c b/drivers/clk/rockchip/clk-rk3368.c
index c3b2da08e015..4e9907272b12 100644
--- a/drivers/clk/rockchip/clk-rk3368.c
+++ b/drivers/clk/rockchip/clk-rk3368.c
@@ -218,29 +218,29 @@ static const struct rockchip_cpuclk_reg_data rk3368_cpuclkl_data = {
 	}
 
 static struct rockchip_cpuclk_rate_table rk3368_cpuclkb_rates[] __initdata = {
-	RK3368_CPUCLKB_RATE(1512000000, 2, 6, 6),
-	RK3368_CPUCLKB_RATE(1488000000, 2, 5, 5),
-	RK3368_CPUCLKB_RATE(1416000000, 2, 5, 5),
-	RK3368_CPUCLKB_RATE(1200000000, 2, 4, 4),
-	RK3368_CPUCLKB_RATE(1008000000, 2, 4, 4),
-	RK3368_CPUCLKB_RATE( 816000000, 2, 3, 3),
-	RK3368_CPUCLKB_RATE( 696000000, 2, 3, 3),
-	RK3368_CPUCLKB_RATE( 600000000, 2, 2, 2),
-	RK3368_CPUCLKB_RATE( 408000000, 2, 2, 2),
-	RK3368_CPUCLKB_RATE( 312000000, 2, 2, 2),
+	RK3368_CPUCLKB_RATE(1512000000, 1, 5, 5),
+	RK3368_CPUCLKB_RATE(1488000000, 1, 4, 4),
+	RK3368_CPUCLKB_RATE(1416000000, 1, 4, 4),
+	RK3368_CPUCLKB_RATE(1200000000, 1, 3, 3),
+	RK3368_CPUCLKB_RATE(1008000000, 1, 3, 3),
+	RK3368_CPUCLKB_RATE( 816000000, 1, 2, 2),
+	RK3368_CPUCLKB_RATE( 696000000, 1, 2, 2),
+	RK3368_CPUCLKB_RATE( 600000000, 1, 1, 1),
+	RK3368_CPUCLKB_RATE( 408000000, 1, 1, 1),
+	RK3368_CPUCLKB_RATE( 312000000, 1, 1, 1),
 };
 
 static struct rockchip_cpuclk_rate_table rk3368_cpuclkl_rates[] __initdata = {
-	RK3368_CPUCLKL_RATE(1512000000, 2, 7, 7),
-	RK3368_CPUCLKL_RATE(1488000000, 2, 6, 6),
-	RK3368_CPUCLKL_RATE(1416000000, 2, 6, 6),
-	RK3368_CPUCLKL_RATE(1200000000, 2, 5, 5),
-	RK3368_CPUCLKL_RATE(1008000000, 2, 5, 5),
-	RK3368_CPUCLKL_RATE( 816000000, 2, 4, 4),
-	RK3368_CPUCLKL_RATE( 696000000, 2, 3, 3),
-	RK3368_CPUCLKL_RATE( 600000000, 2, 3, 3),
-	RK3368_CPUCLKL_RATE( 408000000, 2, 2, 2),
-	RK3368_CPUCLKL_RATE( 312000000, 2, 2, 2),
+	RK3368_CPUCLKL_RATE(1512000000, 1, 6, 6),
+	RK3368_CPUCLKL_RATE(1488000000, 1, 5, 5),
+	RK3368_CPUCLKL_RATE(1416000000, 1, 5, 5),
+	RK3368_CPUCLKL_RATE(1200000000, 1, 4, 4),
+	RK3368_CPUCLKL_RATE(1008000000, 1, 4, 4),
+	RK3368_CPUCLKL_RATE( 816000000, 1, 3, 3),
+	RK3368_CPUCLKL_RATE( 696000000, 1, 2, 2),
+	RK3368_CPUCLKL_RATE( 600000000, 1, 2, 2),
+	RK3368_CPUCLKL_RATE( 408000000, 1, 1, 1),
+	RK3368_CPUCLKL_RATE( 312000000, 1, 1, 1),
 };
 
 static struct rockchip_clk_branch rk3368_clk_branches[] __initdata = {

From c7e33d74fbd86e518c6f98b9c6f35badc1d3719d Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko@sntech.de>
Date: Wed, 20 Jan 2016 19:22:38 +0100
Subject: [PATCH 278/797] clk: rockchip: rk3368: fix parents of video
 encoder/decoder

commit 0f28d98463498c61c61a38aacbf9f69e92e85e9d upstream.

The vdpu and vepu clocks can also be parented to the npll and current
parent list also is wrong as it would use the npll as "usbphy" source,
so adapt the parent to the correct one.

Fixes: 3536c97a52db ("clk: rockchip: add rk3368 clock controller")
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Reviewed-by: zhangqing <zhangqing@rock-chips.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/rockchip/clk-rk3368.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/rockchip/clk-rk3368.c b/drivers/clk/rockchip/clk-rk3368.c
index 4e9907272b12..aa419373d35a 100644
--- a/drivers/clk/rockchip/clk-rk3368.c
+++ b/drivers/clk/rockchip/clk-rk3368.c
@@ -384,10 +384,10 @@ static struct rockchip_clk_branch rk3368_clk_branches[] __initdata = {
 	 * Clock-Architecture Diagram 3
 	 */
 
-	COMPOSITE(0, "aclk_vepu", mux_pll_src_cpll_gpll_usb_p, 0,
+	COMPOSITE(0, "aclk_vepu", mux_pll_src_cpll_gpll_npll_usb_p, 0,
 			RK3368_CLKSEL_CON(15), 6, 2, MFLAGS, 0, 5, DFLAGS,
 			RK3368_CLKGATE_CON(4), 6, GFLAGS),
-	COMPOSITE(0, "aclk_vdpu", mux_pll_src_cpll_gpll_usb_p, 0,
+	COMPOSITE(0, "aclk_vdpu", mux_pll_src_cpll_gpll_npll_usb_p, 0,
 			RK3368_CLKSEL_CON(15), 14, 2, MFLAGS, 8, 5, DFLAGS,
 			RK3368_CLKGATE_CON(4), 7, GFLAGS),
 

From afb1f06758abb5cfa01a54298b57e0cce3bf3273 Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko@sntech.de>
Date: Wed, 20 Jan 2016 21:47:57 +0100
Subject: [PATCH 279/797] clk: rockchip: rk3368: fix hdmi_cec gate-register

commit fd0c0740fac17a014704ef89d8c8b1768711ca59 upstream.

Fix a typo making the sclk_hdmi_cec access a wrong register to handle
its gate.

Fixes: 3536c97a52db ("clk: rockchip: add rk3368 clock controller")
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Reviewed-by: zhangqing <zhangqing@rock-chips.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/rockchip/clk-rk3368.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/rockchip/clk-rk3368.c b/drivers/clk/rockchip/clk-rk3368.c
index aa419373d35a..1b148694b633 100644
--- a/drivers/clk/rockchip/clk-rk3368.c
+++ b/drivers/clk/rockchip/clk-rk3368.c
@@ -442,7 +442,7 @@ static struct rockchip_clk_branch rk3368_clk_branches[] __initdata = {
 	GATE(SCLK_HDMI_HDCP, "sclk_hdmi_hdcp", "xin24m", 0,
 			RK3368_CLKGATE_CON(4), 13, GFLAGS),
 	GATE(SCLK_HDMI_CEC, "sclk_hdmi_cec", "xin32k", 0,
-			RK3368_CLKGATE_CON(5), 12, GFLAGS),
+			RK3368_CLKGATE_CON(4), 12, GFLAGS),
 
 	COMPOSITE_NODIV(0, "vip_src", mux_pll_src_cpll_gpll_p, 0,
 			RK3368_CLKSEL_CON(21), 15, 1, MFLAGS,

From 5f9403e710e03098b06c321aee6b31621efca5b1 Mon Sep 17 00:00:00 2001
From: Alexander Kochetkov <al.kochet@gmail.com>
Date: Tue, 26 Jan 2016 16:34:00 +0300
Subject: [PATCH 280/797] clk: rockchip: add hclk_cpubus to the list of rk3188
 critical clocks

commit e8b63288b37dbb8457b510c9d96f6006da4653f6 upstream.

hclk_cpubus needs to keep running because it is needed for devices like
the rom, i2s0 or spdif to be accessible via cpu. Without that all
accesses to devices (readl/writel) return wrong data. So add it
to the list of critical clocks.

Fixes: 78eaf6095cc763c ("clk: rockchip: disable unused clocks")
Signed-off-by: Alexander Kochetkov <al.kochet@gmail.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/rockchip/clk-rk3188.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/clk/rockchip/clk-rk3188.c b/drivers/clk/rockchip/clk-rk3188.c
index abb47608713b..fe728f8dcbe4 100644
--- a/drivers/clk/rockchip/clk-rk3188.c
+++ b/drivers/clk/rockchip/clk-rk3188.c
@@ -718,6 +718,7 @@ static const char *const rk3188_critical_clocks[] __initconst = {
 	"hclk_peri",
 	"pclk_cpu",
 	"pclk_peri",
+	"hclk_cpubus"
 };
 
 static void __init rk3188_common_clk_init(struct device_node *np)

From b3822a1078c87a6f74270741bc4cc660e4f11bae Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 15 Feb 2016 19:03:57 -0800
Subject: [PATCH 281/797] clk: bcm2835: Fix setting of PLL divider clock rates

commit 773b3966dd3cdaeb68e7f2edfe5656abac1dc411 upstream.

Our dividers weren't being set successfully because CM_PASSWORD wasn't
included in the register write.  It looks easier to just compute the
divider to write ourselves than to update clk-divider for the ability
to OR in some arbitrary bits on write.

Fixes about half of the video modes on my HDMI monitor (everything
except 720x400).

Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/bcm/clk-bcm2835.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c
index 39bf5820297e..4f9830c1b121 100644
--- a/drivers/clk/bcm/clk-bcm2835.c
+++ b/drivers/clk/bcm/clk-bcm2835.c
@@ -1097,13 +1097,15 @@ static int bcm2835_pll_divider_set_rate(struct clk_hw *hw,
 	struct bcm2835_pll_divider *divider = bcm2835_pll_divider_from_hw(hw);
 	struct bcm2835_cprman *cprman = divider->cprman;
 	const struct bcm2835_pll_divider_data *data = divider->data;
-	u32 cm;
-	int ret;
+	u32 cm, div, max_div = 1 << A2W_PLL_DIV_BITS;
 
-	ret = clk_divider_ops.set_rate(hw, rate, parent_rate);
-	if (ret)
-		return ret;
+	div = DIV_ROUND_UP_ULL(parent_rate, rate);
 
+	div = min(div, max_div);
+	if (div == max_div)
+		div = 0;
+
+	cprman_write(cprman, data->a2w_reg, div);
 	cm = cprman_read(cprman, data->cm_reg);
 	cprman_write(cprman, data->cm_reg, cm | data->load_mask);
 	cprman_write(cprman, data->cm_reg, cm & ~data->load_mask);

From 9b3e8070857db21b9d3973387cd9aae5944d71ad Mon Sep 17 00:00:00 2001
From: Himanshu Madhani <himanshu.madhani@qlogic.com>
Date: Mon, 14 Mar 2016 22:47:37 -0700
Subject: [PATCH 282/797] target: Fix target_release_cmd_kref shutdown comp
 leak

commit 5e47f1985d7107331c3f64fb3ec83d66fd73577e upstream.

This patch fixes an active I/O shutdown bug for fabric
drivers using target_wait_for_sess_cmds(), where se_cmd
descriptor shutdown would result in hung tasks waiting
indefinitely for se_cmd->cmd_wait_comp to complete().

To address this bug, drop the incorrect list_del_init()
usage in target_wait_for_sess_cmds() and always complete()
during se_cmd target_release_cmd_kref() put, in order to
let caller invoke the final fabric release callback
into se_cmd->se_tfo->release_cmd() code.

Reported-by: Himanshu Madhani <himanshu.madhani@qlogic.com>
Tested-by: Himanshu Madhani <himanshu.madhani@qlogic.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@qlogic.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/target/target_core_transport.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 94f4ffac723f..d151bc3d6971 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -2618,8 +2618,6 @@ void target_wait_for_sess_cmds(struct se_session *se_sess)
 
 	list_for_each_entry_safe(se_cmd, tmp_cmd,
 				&se_sess->sess_wait_list, se_cmd_list) {
-		list_del_init(&se_cmd->se_cmd_list);
-
 		pr_debug("Waiting for se_cmd: %p t_state: %d, fabric state:"
 			" %d\n", se_cmd, se_cmd->t_state,
 			se_cmd->se_tfo->get_cmd_state(se_cmd));

From a91eb042e1bacdc61a2ca5cb184aa99c24ed5cdb Mon Sep 17 00:00:00 2001
From: Jenny Derzhavetz <jennyf@mellanox.com>
Date: Wed, 24 Feb 2016 19:23:58 +0200
Subject: [PATCH 283/797] iser-target: Fix identification of login rx
 descriptor type

commit b89a7c25462b164db280abc3b05d4d9d888d40e9 upstream.

Once connection request is accepted, one rx descriptor
is posted to receive login request. This descriptor has rx type,
but is outside the main pool of rx descriptors, and thus
was mistreated as tx type.

Signed-off-by: Jenny Derzhavetz <jennyf@mellanox.com>
Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/ulp/isert/ib_isert.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 8a51c3b5d657..addb57265cb2 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -2035,7 +2035,8 @@ is_isert_tx_desc(struct isert_conn *isert_conn, void *wr_id)
 	void *start = isert_conn->rx_descs;
 	int len = ISERT_QP_MAX_RECV_DTOS * sizeof(*isert_conn->rx_descs);
 
-	if (wr_id >= start && wr_id < start + len)
+	if ((wr_id >= start && wr_id < start + len) ||
+	    (wr_id == isert_conn->login_req_buf))
 		return false;
 
 	return true;

From b0d31bbebb1f293ef196c4c84e682b58b0e9cf7b Mon Sep 17 00:00:00 2001
From: Jenny Derzhavetz <jennyf@mellanox.com>
Date: Wed, 24 Feb 2016 19:23:59 +0200
Subject: [PATCH 284/797] iser-target: Add new state ISER_CONN_BOUND to
 isert_conn

commit aea92980601f7ddfcb3c54caa53a43726314fe46 upstream.

We need an indication that isert_conn->iscsi_conn binding has
happened so we'll know not to invoke a connection reinstatement
on an unbound connection which will lead to a bogus isert_conn->conn
dereferece.

Signed-off-by: Jenny Derzhavetz <jennyf@mellanox.com>
Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/ulp/isert/ib_isert.c | 7 +++++--
 drivers/infiniband/ulp/isert/ib_isert.h | 1 +
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index addb57265cb2..0919d6add4e5 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -820,7 +820,7 @@ isert_put_conn(struct isert_conn *isert_conn)
  * @isert_conn: isert connection struct
  *
  * Notes:
- * In case the connection state is FULL_FEATURE, move state
+ * In case the connection state is BOUND, move state
  * to TEMINATING and start teardown sequence (rdma_disconnect).
  * In case the connection state is UP, complete flush as well.
  *
@@ -836,6 +836,7 @@ isert_conn_terminate(struct isert_conn *isert_conn)
 	case ISER_CONN_TERMINATING:
 		break;
 	case ISER_CONN_UP:
+	case ISER_CONN_BOUND:
 	case ISER_CONN_FULL_FEATURE: /* FALLTHRU */
 		isert_info("Terminating conn %p state %d\n",
 			   isert_conn, isert_conn->state);
@@ -2062,7 +2063,8 @@ isert_cq_comp_err(struct isert_conn *isert_conn, struct ib_wc *wc)
 			isert_completion_put(desc, isert_cmd, ib_dev, true);
 	} else {
 		isert_conn->post_recv_buf_count--;
-		if (!isert_conn->post_recv_buf_count)
+		if (!isert_conn->post_recv_buf_count &&
+		    isert_conn->state >= ISER_CONN_BOUND)
 			iscsit_cause_connection_reinstatement(isert_conn->conn, 0);
 	}
 }
@@ -3194,6 +3196,7 @@ isert_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)
 
 	conn->context = isert_conn;
 	isert_conn->conn = conn;
+	isert_conn->state = ISER_CONN_BOUND;
 
 	isert_set_conn_info(np, conn, isert_conn);
 
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index 3d7fbc47c343..d9635203be63 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -50,6 +50,7 @@ enum iser_ib_op_code {
 enum iser_conn_state {
 	ISER_CONN_INIT,
 	ISER_CONN_UP,
+	ISER_CONN_BOUND,
 	ISER_CONN_FULL_FEATURE,
 	ISER_CONN_TERMINATING,
 	ISER_CONN_DOWN,

From 60f0f01da74b14a0b27becf30a70155a8db23445 Mon Sep 17 00:00:00 2001
From: Jenny Derzhavetz <jennyf@mellanox.com>
Date: Wed, 24 Feb 2016 19:24:00 +0200
Subject: [PATCH 285/797] iser-target: Separate flows for np listeners and
 connections cma events

commit f81bf458208ef6d12b2fc08091204e3859dcdba4 upstream.

No need to restrict this check to specific events.

Signed-off-by: Jenny Derzhavetz <jennyf@mellanox.com>
Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/ulp/isert/ib_isert.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 0919d6add4e5..48e2394cd8c7 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -884,14 +884,9 @@ isert_disconnected_handler(struct rdma_cm_id *cma_id,
 			   enum rdma_cm_event_type event)
 {
 	struct isert_np *isert_np = cma_id->context;
-	struct isert_conn *isert_conn;
+	struct isert_conn *isert_conn = cma_id->qp->qp_context;
 	bool terminating = false;
 
-	if (isert_np->cm_id == cma_id)
-		return isert_np_cma_handler(cma_id->context, event);
-
-	isert_conn = cma_id->qp->qp_context;
-
 	mutex_lock(&isert_conn->mutex);
 	terminating = (isert_conn->state == ISER_CONN_TERMINATING);
 	isert_conn_terminate(isert_conn);
@@ -930,12 +925,16 @@ isert_connect_error(struct rdma_cm_id *cma_id)
 static int
 isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 {
+	struct isert_np *isert_np = cma_id->context;
 	int ret = 0;
 
 	isert_info("%s (%d): status %d id %p np %p\n",
 		   rdma_event_msg(event->event), event->event,
 		   event->status, cma_id, cma_id->context);
 
+	if (isert_np->cm_id == cma_id)
+		return isert_np_cma_handler(cma_id->context, event->event);
+
 	switch (event->event) {
 	case RDMA_CM_EVENT_CONNECT_REQUEST:
 		ret = isert_connect_request(cma_id, event);

From 48f447bcebd889aab7193659841de7962bf52a56 Mon Sep 17 00:00:00 2001
From: Jenny Derzhavetz <jennyf@mellanox.com>
Date: Wed, 24 Feb 2016 19:24:01 +0200
Subject: [PATCH 286/797] iser-target: Rework connection termination

commit 6d1fba0c2cc7efe42fd761ecbba833ed0ea7b07e upstream.

When we receive an event that triggers connection termination,
we have a a couple of things we may want to do:
1. In case we are already terminating, bailout early
2. In case we are connected but not bound, disconnect and schedule
   a connection cleanup silently (don't reinstate)
3. In case we are connected and bound, disconnect and reinstate the connection

This rework fixes a bug that was detected against a mis-behaved
initiator which rejected our rdma_cm accept, in this stage the
isert_conn is no bound and reinstate caused a bogus dereference.

What's great about this is that we don't need the
post_recv_buf_count anymore, so get rid of it.

Signed-off-by: Jenny Derzhavetz <jennyf@mellanox.com>
Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/ulp/isert/ib_isert.c | 107 ++++++++++++------------
 drivers/infiniband/ulp/isert/ib_isert.h |   1 -
 2 files changed, 52 insertions(+), 56 deletions(-)

diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 48e2394cd8c7..b0edb66a291b 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -66,6 +66,7 @@ isert_rdma_accept(struct isert_conn *isert_conn);
 struct rdma_cm_id *isert_setup_id(struct isert_np *isert_np);
 
 static void isert_release_work(struct work_struct *work);
+static void isert_wait4flush(struct isert_conn *isert_conn);
 
 static inline bool
 isert_prot_cmd(struct isert_conn *conn, struct se_cmd *cmd)
@@ -815,6 +816,25 @@ isert_put_conn(struct isert_conn *isert_conn)
 	kref_put(&isert_conn->kref, isert_release_kref);
 }
 
+static void
+isert_handle_unbound_conn(struct isert_conn *isert_conn)
+{
+	struct isert_np *isert_np = isert_conn->cm_id->context;
+
+	mutex_lock(&isert_np->mutex);
+	if (!list_empty(&isert_conn->node)) {
+		/*
+		 * This means iscsi doesn't know this connection
+		 * so schedule a cleanup ourselves
+		 */
+		list_del_init(&isert_conn->node);
+		isert_put_conn(isert_conn);
+		complete(&isert_conn->wait);
+		queue_work(isert_release_wq, &isert_conn->release_work);
+	}
+	mutex_unlock(&isert_np->mutex);
+}
+
 /**
  * isert_conn_terminate() - Initiate connection termination
  * @isert_conn: isert connection struct
@@ -832,24 +852,19 @@ isert_conn_terminate(struct isert_conn *isert_conn)
 {
 	int err;
 
-	switch (isert_conn->state) {
-	case ISER_CONN_TERMINATING:
-		break;
-	case ISER_CONN_UP:
-	case ISER_CONN_BOUND:
-	case ISER_CONN_FULL_FEATURE: /* FALLTHRU */
-		isert_info("Terminating conn %p state %d\n",
-			   isert_conn, isert_conn->state);
-		isert_conn->state = ISER_CONN_TERMINATING;
-		err = rdma_disconnect(isert_conn->cm_id);
-		if (err)
-			isert_warn("Failed rdma_disconnect isert_conn %p\n",
-				   isert_conn);
-		break;
-	default:
-		isert_warn("conn %p teminating in state %d\n",
-			   isert_conn, isert_conn->state);
-	}
+	if (isert_conn->state >= ISER_CONN_TERMINATING)
+		return;
+
+	isert_info("Terminating conn %p state %d\n",
+		   isert_conn, isert_conn->state);
+	isert_conn->state = ISER_CONN_TERMINATING;
+	err = rdma_disconnect(isert_conn->cm_id);
+	if (err)
+		isert_warn("Failed rdma_disconnect isert_conn %p\n",
+			   isert_conn);
+
+	isert_info("conn %p completing wait\n", isert_conn);
+	complete(&isert_conn->wait);
 }
 
 static int
@@ -883,30 +898,27 @@ static int
 isert_disconnected_handler(struct rdma_cm_id *cma_id,
 			   enum rdma_cm_event_type event)
 {
-	struct isert_np *isert_np = cma_id->context;
 	struct isert_conn *isert_conn = cma_id->qp->qp_context;
-	bool terminating = false;
 
 	mutex_lock(&isert_conn->mutex);
-	terminating = (isert_conn->state == ISER_CONN_TERMINATING);
-	isert_conn_terminate(isert_conn);
+	switch (isert_conn->state) {
+	case ISER_CONN_TERMINATING:
+		break;
+	case ISER_CONN_UP:
+		isert_conn_terminate(isert_conn);
+		isert_wait4flush(isert_conn);
+		isert_handle_unbound_conn(isert_conn);
+		break;
+	case ISER_CONN_BOUND:
+	case ISER_CONN_FULL_FEATURE: /* FALLTHRU */
+		iscsit_cause_connection_reinstatement(isert_conn->conn, 0);
+		break;
+	default:
+		isert_warn("conn %p teminating in state %d\n",
+			   isert_conn, isert_conn->state);
+	}
 	mutex_unlock(&isert_conn->mutex);
 
-	isert_info("conn %p completing wait\n", isert_conn);
-	complete(&isert_conn->wait);
-
-	if (terminating)
-		goto out;
-
-	mutex_lock(&isert_np->mutex);
-	if (!list_empty(&isert_conn->node)) {
-		list_del_init(&isert_conn->node);
-		isert_put_conn(isert_conn);
-		queue_work(isert_release_wq, &isert_conn->release_work);
-	}
-	mutex_unlock(&isert_np->mutex);
-
-out:
 	return 0;
 }
 
@@ -980,13 +992,10 @@ isert_post_recvm(struct isert_conn *isert_conn, u32 count)
 	rx_wr--;
 	rx_wr->next = NULL; /* mark end of work requests list */
 
-	isert_conn->post_recv_buf_count += count;
 	ret = ib_post_recv(isert_conn->qp, isert_conn->rx_wr,
 			   &rx_wr_failed);
-	if (ret) {
+	if (ret)
 		isert_err("ib_post_recv() failed with ret: %d\n", ret);
-		isert_conn->post_recv_buf_count -= count;
-	}
 
 	return ret;
 }
@@ -1002,12 +1011,9 @@ isert_post_recv(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc)
 	rx_wr.num_sge = 1;
 	rx_wr.next = NULL;
 
-	isert_conn->post_recv_buf_count++;
 	ret = ib_post_recv(isert_conn->qp, &rx_wr, &rx_wr_failed);
-	if (ret) {
+	if (ret)
 		isert_err("ib_post_recv() failed with ret: %d\n", ret);
-		isert_conn->post_recv_buf_count--;
-	}
 
 	return ret;
 }
@@ -1120,12 +1126,9 @@ isert_rdma_post_recvl(struct isert_conn *isert_conn)
 	rx_wr.sg_list = &sge;
 	rx_wr.num_sge = 1;
 
-	isert_conn->post_recv_buf_count++;
 	ret = ib_post_recv(isert_conn->qp, &rx_wr, &rx_wr_fail);
-	if (ret) {
+	if (ret)
 		isert_err("ib_post_recv() failed: %d\n", ret);
-		isert_conn->post_recv_buf_count--;
-	}
 
 	return ret;
 }
@@ -1620,7 +1623,6 @@ isert_rcv_completion(struct iser_rx_desc *desc,
 	ib_dma_sync_single_for_device(ib_dev, rx_dma, rx_buflen,
 				      DMA_FROM_DEVICE);
 
-	isert_conn->post_recv_buf_count--;
 }
 
 static int
@@ -2060,11 +2062,6 @@ isert_cq_comp_err(struct isert_conn *isert_conn, struct ib_wc *wc)
 			isert_unmap_tx_desc(desc, ib_dev);
 		else
 			isert_completion_put(desc, isert_cmd, ib_dev, true);
-	} else {
-		isert_conn->post_recv_buf_count--;
-		if (!isert_conn->post_recv_buf_count &&
-		    isert_conn->state >= ISER_CONN_BOUND)
-			iscsit_cause_connection_reinstatement(isert_conn->conn, 0);
 	}
 }
 
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index d9635203be63..1874d21daee0 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -145,7 +145,6 @@ struct isert_device;
 
 struct isert_conn {
 	enum iser_conn_state	state;
-	int			post_recv_buf_count;
 	u32			responder_resources;
 	u32			initiator_depth;
 	bool			pi_support;

From 9ef1ecc409b1e1113bf5e4b6bdd47137e5f9cebb Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 29 Feb 2016 20:21:21 -0500
Subject: [PATCH 287/797] nfsd4: fix bad bounds checking

commit 4aed9c46afb80164401143aa0fdcfe3798baa9d5 upstream.

A number of spots in the xdr decoding follow a pattern like

	n = be32_to_cpup(p++);
	READ_BUF(n + 4);

where n is a u32.  The only bounds checking is done in READ_BUF itself,
but since it's checking (n + 4), it won't catch cases where n is very
large, (u32)(-4) or higher.  I'm not sure exactly what the consequences
are, but we've seen crashes soon after.

Instead, just break these up into two READ_BUF()s.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfsd/nfs4xdr.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 51c9e9ca39a4..12935209deca 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1072,8 +1072,9 @@ nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename
 
 	READ_BUF(4);
 	rename->rn_snamelen = be32_to_cpup(p++);
-	READ_BUF(rename->rn_snamelen + 4);
+	READ_BUF(rename->rn_snamelen);
 	SAVEMEM(rename->rn_sname, rename->rn_snamelen);
+	READ_BUF(4);
 	rename->rn_tnamelen = be32_to_cpup(p++);
 	READ_BUF(rename->rn_tnamelen);
 	SAVEMEM(rename->rn_tname, rename->rn_tnamelen);
@@ -1155,13 +1156,14 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient
 	READ_BUF(8);
 	setclientid->se_callback_prog = be32_to_cpup(p++);
 	setclientid->se_callback_netid_len = be32_to_cpup(p++);
-
-	READ_BUF(setclientid->se_callback_netid_len + 4);
+	READ_BUF(setclientid->se_callback_netid_len);
 	SAVEMEM(setclientid->se_callback_netid_val, setclientid->se_callback_netid_len);
+	READ_BUF(4);
 	setclientid->se_callback_addr_len = be32_to_cpup(p++);
 
-	READ_BUF(setclientid->se_callback_addr_len + 4);
+	READ_BUF(setclientid->se_callback_addr_len);
 	SAVEMEM(setclientid->se_callback_addr_val, setclientid->se_callback_addr_len);
+	READ_BUF(4);
 	setclientid->se_callback_ident = be32_to_cpup(p++);
 
 	DECODE_TAIL;
@@ -1815,8 +1817,9 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 
 	READ_BUF(4);
 	argp->taglen = be32_to_cpup(p++);
-	READ_BUF(argp->taglen + 8);
+	READ_BUF(argp->taglen);
 	SAVEMEM(argp->tag, argp->taglen);
+	READ_BUF(8);
 	argp->minorversion = be32_to_cpup(p++);
 	argp->opcnt = be32_to_cpup(p++);
 	max_reply += 4 + (XDR_QUADLEN(argp->taglen) << 2);

From 56fb92d684cc51baf3a421a3ac19af441a52f413 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 2 Mar 2016 16:36:21 -0800
Subject: [PATCH 288/797] nfsd: fix deadlock secinfo+readdir compound

commit 2f6fc056e899bd0144a08da5cacaecbe8997cd74 upstream.

nfsd_lookup_dentry exits with the parent filehandle locked.  fh_put also
unlocks if necessary (nfsd filehandle locking is probably too lenient),
so it gets unlocked eventually, but if the following op in the compound
needs to lock it again, we can deadlock.

A fuzzer ran into this; normal clients don't send a secinfo followed by
a readdir in the same compound.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfsd/nfs4proc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index a9f096c7e99f..7d5351cd67fb 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -877,6 +877,7 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 				    &exp, &dentry);
 	if (err)
 		return err;
+	fh_unlock(&cstate->current_fh);
 	if (d_really_is_negative(dentry)) {
 		exp_put(exp);
 		err = nfserr_noent;

From d287698c43d7915e422d298985891eb609b511b8 Mon Sep 17 00:00:00 2001
From: Ludovic Desroches <ludovic.desroches@atmel.com>
Date: Fri, 11 Mar 2016 11:43:39 +0100
Subject: [PATCH 289/797] ARM: dts: at91: sama5d3 Xplained: don't disable hsmci
 regulator

commit ae3fc8ea08e405682f1fa959f94b6e4126afbc1b upstream.

If enabling the hsmci regulator on card detection, the board can reboot
on sd card insertion. Keeping the regulator always enabled fixes this
issue.

Signed-off-by: Ludovic Desroches <ludovic.desroches@atmel.com>
Fixes: 1b53e3416dd0 ("ARM: at91/dt: sama5d3 xplained: add fixed regulator for vmmc0")
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/at91-sama5d3_xplained.dts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/boot/dts/at91-sama5d3_xplained.dts b/arch/arm/boot/dts/at91-sama5d3_xplained.dts
index ff888d21c786..f3e2b96c06a3 100644
--- a/arch/arm/boot/dts/at91-sama5d3_xplained.dts
+++ b/arch/arm/boot/dts/at91-sama5d3_xplained.dts
@@ -303,6 +303,7 @@ vcc_mmc0_reg: fixedregulator@0 {
 		regulator-name = "mmc0-card-supply";
 		regulator-min-microvolt = <3300000>;
 		regulator-max-microvolt = <3300000>;
+		regulator-always-on;
 	};
 
 	gpio_keys {

From fe81b4d996fbf36cf4e84869b6c5f4394f5e5af9 Mon Sep 17 00:00:00 2001
From: Ludovic Desroches <ludovic.desroches@atmel.com>
Date: Fri, 11 Mar 2016 11:35:10 +0100
Subject: [PATCH 290/797] ARM: dts: at91: sama5d4 Xplained: don't disable hsmci
 regulator

commit b02acd4e62602a6ab307da84388a16bf60106c48 upstream.

If enabling the hsmci regulator on card detection, the board can reboot
on sd card insertion. Keeping the regulator always enabled fixes this
issue.

Signed-off-by: Ludovic Desroches <ludovic.desroches@atmel.com>
Fixes: 8d545f32bd77 ("ARM: at91/dt: sama5d4 xplained: add regulators for v(q)mmc1 supplies")
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/at91-sama5d4_xplained.dts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/boot/dts/at91-sama5d4_xplained.dts b/arch/arm/boot/dts/at91-sama5d4_xplained.dts
index 569026e8f96c..da84e65b56ef 100644
--- a/arch/arm/boot/dts/at91-sama5d4_xplained.dts
+++ b/arch/arm/boot/dts/at91-sama5d4_xplained.dts
@@ -268,5 +268,6 @@ vcc_mmc1_reg: fixedregulator@1 {
 		regulator-min-microvolt = <3300000>;
 		regulator-max-microvolt = <3300000>;
 		vin-supply = <&vcc_3v3_reg>;
+		regulator-always-on;
 	};
 };

From b05e5a587ddc11255de76657e6b4b0e960783cc3 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Wed, 23 Mar 2016 00:11:20 +0100
Subject: [PATCH 291/797] ACPI / PM: Runtime resume devices when waking from
 hibernate

commit fbda4b38fa3995aa0777fe9cbbdcb223c6292083 upstream.

Commit 58a1fbbb2ee8 ("PM / PCI / ACPI: Kick devices that might have been
reset by firmware") added a runtime resume for devices that were runtime
suspended when the system entered suspend-to-RAM.

Briefly, the motivation was to ensure that devices did not remain in a
reset-power-on state after resume, potentially preventing deep SoC-wide
low-power states from being entered on idle.

Currently we're not doing the same when leaving suspend-to-disk and this
asymmetry is a problem if drivers rely on the automatic resume triggered
by pm_complete_with_resume_check(). Fix it.

Fixes: 58a1fbbb2ee8 (PM / PCI / ACPI: Kick devices that might have been reset by firmware)
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/sleep.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 0d94621dc856..e3322adaaae0 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -714,6 +714,7 @@ static int acpi_hibernation_enter(void)
 
 static void acpi_hibernation_leave(void)
 {
+	pm_set_resume_via_firmware();
 	/*
 	 * If ACPI is not enabled by the BIOS and the boot kernel, we need to
 	 * enable it here.

From d78ddcfbe7ab8c5f4ff0b8f20b2bbda710fc0e91 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 18 Mar 2016 13:50:03 -0400
Subject: [PATCH 292/797] writeback, cgroup: fix premature wb_put() in
 locked_inode_to_wb_and_lock_list()

commit 614a4e3773148a31f58dc174bbf578ceb63510c2 upstream.

locked_inode_to_wb_and_lock_list() wb_get()'s the wb associated with
the target inode, unlocks inode, locks the wb's list_lock and verifies
that the inode is still associated with the wb.  To prevent the wb
going away between dropping inode lock and acquiring list_lock, the wb
is pinned while inode lock is held.  The wb reference is put right
after acquiring list_lock citing that the wb won't be dereferenced
anymore.

This isn't true.  If the inode is still associated with the wb, the
inode has reference and it's safe to return the wb; however, if inode
has been switched, the wb still needs to be unlocked which is a
dereference and can lead to use-after-free if it it races with wb
destruction.

Fix it by putting the reference after releasing list_lock.

Signed-off-by: Tejun Heo <tj@kernel.org>
Fixes: 87e1d789bf55 ("writeback: implement [locked_]inode_to_wb_and_lock_list()")
Tested-by: Tahsin Erdogan <tahsin@google.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/fs-writeback.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 7a8ea1351584..e84b698e1d6c 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -281,13 +281,15 @@ locked_inode_to_wb_and_lock_list(struct inode *inode)
 		wb_get(wb);
 		spin_unlock(&inode->i_lock);
 		spin_lock(&wb->list_lock);
-		wb_put(wb);		/* not gonna deref it anymore */
 
 		/* i_wb may have changed inbetween, can't use inode_to_wb() */
-		if (likely(wb == inode->i_wb))
-			return wb;	/* @inode already has ref */
+		if (likely(wb == inode->i_wb)) {
+			wb_put(wb);	/* @inode already has ref */
+			return wb;
+		}
 
 		spin_unlock(&wb->list_lock);
+		wb_put(wb);
 		cpu_relax();
 		spin_lock(&inode->i_lock);
 	}

From 842ec116c7070c8bfc785b609acb19fb29a59cb0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 18 Mar 2016 13:52:04 -0400
Subject: [PATCH 293/797] writeback, cgroup: fix use of the wrong bdi_writeback
 which mismatches the inode

commit aaf2559332ba272671bb870464a99b909b29a3a1 upstream.

When cgroup writeback is in use, there can be multiple wb's
(bdi_writeback's) per bdi and an inode may switch among them
dynamically.  In a couple places, the wrong wb was used leading to
performing operations on the wrong list under the wrong lock
corrupting the io lists.

* writeback_single_inode() was taking @wb parameter and used it to
  remove the inode from io lists if it becomes clean after writeback.
  The callers of this function were always passing in the root wb
  regardless of the actual wb that the inode was associated with,
  which could also change while writeback is in progress.

  Fix it by dropping the @wb parameter and using
  inode_to_wb_and_lock_list() to determine and lock the associated wb.

* After writeback_sb_inodes() writes out an inode, it re-locks @wb and
  inode to remove it from or move it to the right io list.  It assumes
  that the inode is still associated with @wb; however, the inode may
  have switched to another wb while writeback was in progress.

  Fix it by using inode_to_wb_and_lock_list() to determine and lock
  the associated wb after writeback is complete.  As the function
  requires the original @wb->list_lock locked for the next iteration,
  in the unlikely case where the inode has changed association, switch
  the locks.

Kudos to Tahsin for pinpointing these subtle breakages.

Signed-off-by: Tejun Heo <tj@kernel.org>
Fixes: d10c80955265 ("writeback: implement foreign cgroup inode bdi_writeback switching")
Link: http://lkml.kernel.org/g/CAAeU0aMYeM_39Y2+PaRvyB1nqAPYZSNngJ1eBRmrxn7gKAt2Mg@mail.gmail.com
Reported-and-diagnosed-by: Tahsin Erdogan <tahsin@google.com>
Tested-by: Tahsin Erdogan <tahsin@google.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/fs-writeback.c | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e84b698e1d6c..60d6fc2e0e4b 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1341,10 +1341,10 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
  * we go e.g. from filesystem. Flusher thread uses __writeback_single_inode()
  * and does more profound writeback list handling in writeback_sb_inodes().
  */
-static int
-writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
-		       struct writeback_control *wbc)
+static int writeback_single_inode(struct inode *inode,
+				  struct writeback_control *wbc)
 {
+	struct bdi_writeback *wb;
 	int ret = 0;
 
 	spin_lock(&inode->i_lock);
@@ -1382,7 +1382,8 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
 	ret = __writeback_single_inode(inode, wbc);
 
 	wbc_detach_inode(wbc);
-	spin_lock(&wb->list_lock);
+
+	wb = inode_to_wb_and_lock_list(inode);
 	spin_lock(&inode->i_lock);
 	/*
 	 * If inode is clean, remove it from writeback lists. Otherwise don't
@@ -1457,6 +1458,7 @@ static long writeback_sb_inodes(struct super_block *sb,
 
 	while (!list_empty(&wb->b_io)) {
 		struct inode *inode = wb_inode(wb->b_io.prev);
+		struct bdi_writeback *tmp_wb;
 
 		if (inode->i_sb != sb) {
 			if (work->sb) {
@@ -1547,15 +1549,23 @@ static long writeback_sb_inodes(struct super_block *sb,
 			cond_resched();
 		}
 
-
-		spin_lock(&wb->list_lock);
+		/*
+		 * Requeue @inode if still dirty.  Be careful as @inode may
+		 * have been switched to another wb in the meantime.
+		 */
+		tmp_wb = inode_to_wb_and_lock_list(inode);
 		spin_lock(&inode->i_lock);
 		if (!(inode->i_state & I_DIRTY_ALL))
 			wrote++;
-		requeue_inode(inode, wb, &wbc);
+		requeue_inode(inode, tmp_wb, &wbc);
 		inode_sync_complete(inode);
 		spin_unlock(&inode->i_lock);
 
+		if (unlikely(tmp_wb != wb)) {
+			spin_unlock(&tmp_wb->list_lock);
+			spin_lock(&wb->list_lock);
+		}
+
 		/*
 		 * bail out to wb_writeback() often enough to check
 		 * background threshold and other termination conditions.
@@ -2342,7 +2352,6 @@ EXPORT_SYMBOL(sync_inodes_sb);
  */
 int write_inode_now(struct inode *inode, int sync)
 {
-	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
 	struct writeback_control wbc = {
 		.nr_to_write = LONG_MAX,
 		.sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE,
@@ -2354,7 +2363,7 @@ int write_inode_now(struct inode *inode, int sync)
 		wbc.nr_to_write = 0;
 
 	might_sleep();
-	return writeback_single_inode(inode, wb, &wbc);
+	return writeback_single_inode(inode, &wbc);
 }
 EXPORT_SYMBOL(write_inode_now);
 
@@ -2371,7 +2380,7 @@ EXPORT_SYMBOL(write_inode_now);
  */
 int sync_inode(struct inode *inode, struct writeback_control *wbc)
 {
-	return writeback_single_inode(inode, &inode_to_bdi(inode)->wb, wbc);
+	return writeback_single_inode(inode, wbc);
 }
 EXPORT_SYMBOL(sync_inode);
 

From ed12031e713305ef7836d5672ae2ab4e1446ce08 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Thu, 17 Mar 2016 17:12:54 -0700
Subject: [PATCH 294/797] Input: synaptics - handle spurious release of
 trackstick buttons, again

commit 82be788c96ed5978d3cb4a00079e26b981a3df3f upstream.

Looks like the fimware 8.2 still has the extra buttons spurious release
bug.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=114321
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/mouse/synaptics.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
index 6025eb430c0a..a41d8328c064 100644
--- a/drivers/input/mouse/synaptics.c
+++ b/drivers/input/mouse/synaptics.c
@@ -862,8 +862,9 @@ static void synaptics_report_ext_buttons(struct psmouse *psmouse,
 	if (!SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap))
 		return;
 
-	/* Bug in FW 8.1, buttons are reported only when ExtBit is 1 */
-	if (SYN_ID_FULL(priv->identity) == 0x801 &&
+	/* Bug in FW 8.1 & 8.2, buttons are reported only when ExtBit is 1 */
+	if ((SYN_ID_FULL(priv->identity) == 0x801 ||
+	     SYN_ID_FULL(priv->identity) == 0x802) &&
 	    !((psmouse->packet[0] ^ psmouse->packet[3]) & 0x02))
 		return;
 

From af18c4ca4b1728e2149844656bbf1aa8d7382682 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Thu, 17 Mar 2016 14:00:17 -0700
Subject: [PATCH 295/797] Input: ims-pcu - sanity check against missing
 interfaces

commit a0ad220c96692eda76b2e3fd7279f3dcd1d8a8ff upstream.

A malicious device missing interface can make the driver oops.
Add sanity checking.

Signed-off-by: Oliver Neukum <ONeukum@suse.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/misc/ims-pcu.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/input/misc/ims-pcu.c b/drivers/input/misc/ims-pcu.c
index ac1fa5f44580..9c0ea36913b4 100644
--- a/drivers/input/misc/ims-pcu.c
+++ b/drivers/input/misc/ims-pcu.c
@@ -1663,6 +1663,8 @@ static int ims_pcu_parse_cdc_data(struct usb_interface *intf, struct ims_pcu *pc
 
 	pcu->ctrl_intf = usb_ifnum_to_if(pcu->udev,
 					 union_desc->bMasterInterface0);
+	if (!pcu->ctrl_intf)
+		return -EINVAL;
 
 	alt = pcu->ctrl_intf->cur_altsetting;
 	pcu->ep_ctrl = &alt->endpoint[0].desc;
@@ -1670,6 +1672,8 @@ static int ims_pcu_parse_cdc_data(struct usb_interface *intf, struct ims_pcu *pc
 
 	pcu->data_intf = usb_ifnum_to_if(pcu->udev,
 					 union_desc->bSlaveInterface0);
+	if (!pcu->data_intf)
+		return -EINVAL;
 
 	alt = pcu->data_intf->cur_altsetting;
 	if (alt->desc.bNumEndpoints != 2) {

From a1d0a23831ccde9dbd5279a5d45790a96f18ad32 Mon Sep 17 00:00:00 2001
From: Vladis Dronov <vdronov@redhat.com>
Date: Wed, 23 Mar 2016 11:53:46 -0700
Subject: [PATCH 296/797] Input: ati_remote2 - fix crashes on detecting device
 with invalid descriptor

commit 950336ba3e4a1ffd2ca60d29f6ef386dd2c7351d upstream.

The ati_remote2 driver expects at least two interfaces with one
endpoint each. If given malicious descriptor that specify one
interface or no endpoints, it will crash in the probe function.
Ensure there is at least two interfaces and one endpoint for each
interface before using it.

The full disclosure: http://seclists.org/bugtraq/2016/Mar/90

Reported-by: Ralf Spenneberg <ralf@spenneberg.net>
Signed-off-by: Vladis Dronov <vdronov@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/misc/ati_remote2.c | 36 ++++++++++++++++++++++++++------
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/drivers/input/misc/ati_remote2.c b/drivers/input/misc/ati_remote2.c
index cfd58e87da26..1c5914cae853 100644
--- a/drivers/input/misc/ati_remote2.c
+++ b/drivers/input/misc/ati_remote2.c
@@ -817,26 +817,49 @@ static int ati_remote2_probe(struct usb_interface *interface, const struct usb_d
 
 	ar2->udev = udev;
 
+	/* Sanity check, first interface must have an endpoint */
+	if (alt->desc.bNumEndpoints < 1 || !alt->endpoint) {
+		dev_err(&interface->dev,
+			"%s(): interface 0 must have an endpoint\n", __func__);
+		r = -ENODEV;
+		goto fail1;
+	}
 	ar2->intf[0] = interface;
 	ar2->ep[0] = &alt->endpoint[0].desc;
 
+	/* Sanity check, the device must have two interfaces */
 	ar2->intf[1] = usb_ifnum_to_if(udev, 1);
+	if ((udev->actconfig->desc.bNumInterfaces < 2) || !ar2->intf[1]) {
+		dev_err(&interface->dev, "%s(): need 2 interfaces, found %d\n",
+			__func__, udev->actconfig->desc.bNumInterfaces);
+		r = -ENODEV;
+		goto fail1;
+	}
+
 	r = usb_driver_claim_interface(&ati_remote2_driver, ar2->intf[1], ar2);
 	if (r)
 		goto fail1;
+
+	/* Sanity check, second interface must have an endpoint */
 	alt = ar2->intf[1]->cur_altsetting;
+	if (alt->desc.bNumEndpoints < 1 || !alt->endpoint) {
+		dev_err(&interface->dev,
+			"%s(): interface 1 must have an endpoint\n", __func__);
+		r = -ENODEV;
+		goto fail2;
+	}
 	ar2->ep[1] = &alt->endpoint[0].desc;
 
 	r = ati_remote2_urb_init(ar2);
 	if (r)
-		goto fail2;
+		goto fail3;
 
 	ar2->channel_mask = channel_mask;
 	ar2->mode_mask = mode_mask;
 
 	r = ati_remote2_setup(ar2, ar2->channel_mask);
 	if (r)
-		goto fail2;
+		goto fail3;
 
 	usb_make_path(udev, ar2->phys, sizeof(ar2->phys));
 	strlcat(ar2->phys, "/input0", sizeof(ar2->phys));
@@ -845,11 +868,11 @@ static int ati_remote2_probe(struct usb_interface *interface, const struct usb_d
 
 	r = sysfs_create_group(&udev->dev.kobj, &ati_remote2_attr_group);
 	if (r)
-		goto fail2;
+		goto fail3;
 
 	r = ati_remote2_input_init(ar2);
 	if (r)
-		goto fail3;
+		goto fail4;
 
 	usb_set_intfdata(interface, ar2);
 
@@ -857,10 +880,11 @@ static int ati_remote2_probe(struct usb_interface *interface, const struct usb_d
 
 	return 0;
 
- fail3:
+ fail4:
 	sysfs_remove_group(&udev->dev.kobj, &ati_remote2_attr_group);
- fail2:
+ fail3:
 	ati_remote2_urb_cleanup(ar2);
+ fail2:
 	usb_driver_release_interface(&ati_remote2_driver, ar2->intf[1]);
  fail1:
 	kfree(ar2);

From e4b0e673428391a5ce5827d594d9809ad57fba9e Mon Sep 17 00:00:00 2001
From: Joseph Qi <joseph.qi@huawei.com>
Date: Fri, 25 Mar 2016 14:21:26 -0700
Subject: [PATCH 297/797] ocfs2/dlm: fix race between convert and recovery

commit ac7cf246dfdbec3d8fed296c7bf30e16f5099dac upstream.

There is a race window between dlmconvert_remote and
dlm_move_lockres_to_recovery_list, which will cause a lock with
OCFS2_LOCK_BUSY in grant list, thus system hangs.

dlmconvert_remote
{
        spin_lock(&res->spinlock);
        list_move_tail(&lock->list, &res->converting);
        lock->convert_pending = 1;
        spin_unlock(&res->spinlock);

        status = dlm_send_remote_convert_request();
        >>>>>> race window, master has queued ast and return DLM_NORMAL,
               and then down before sending ast.
               this node detects master down and calls
               dlm_move_lockres_to_recovery_list, which will revert the
               lock to grant list.
               Then OCFS2_LOCK_BUSY won't be cleared as new master won't
               send ast any more because it thinks already be authorized.

        spin_lock(&res->spinlock);
        lock->convert_pending = 0;
        if (status != DLM_NORMAL)
                dlm_revert_pending_convert(res, lock);
        spin_unlock(&res->spinlock);
}

In this case, check if res->state has DLM_LOCK_RES_RECOVERING bit set
(res is still in recovering) or res master changed (new master has
finished recovery), reset the status to DLM_RECOVERING, then it will
retry convert.

Signed-off-by: Joseph Qi <joseph.qi@huawei.com>
Reported-by: Yiwen Jiang <jiangyiwen@huawei.com>
Reviewed-by: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Mark Fasheh <mfasheh@suse.de>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Tariq Saeed <tariq.x.saeed@oracle.com>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ocfs2/dlm/dlmconvert.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index e36d63ff1783..84de55ed865a 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -262,6 +262,7 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
 				  struct dlm_lock *lock, int flags, int type)
 {
 	enum dlm_status status;
+	u8 old_owner = res->owner;
 
 	mlog(0, "type=%d, convert_type=%d, busy=%d\n", lock->ml.type,
 	     lock->ml.convert_type, res->state & DLM_LOCK_RES_IN_PROGRESS);
@@ -316,11 +317,19 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
 	spin_lock(&res->spinlock);
 	res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
 	lock->convert_pending = 0;
-	/* if it failed, move it back to granted queue */
+	/* if it failed, move it back to granted queue.
+	 * if master returns DLM_NORMAL and then down before sending ast,
+	 * it may have already been moved to granted queue, reset to
+	 * DLM_RECOVERING and retry convert */
 	if (status != DLM_NORMAL) {
 		if (status != DLM_NOTQUEUED)
 			dlm_error(status);
 		dlm_revert_pending_convert(res, lock);
+	} else if ((res->state & DLM_LOCK_RES_RECOVERING) ||
+			(old_owner != res->owner)) {
+		mlog(0, "res %.*s is in recovering or has been recovered.\n",
+				res->lockname.len, res->lockname.name);
+		status = DLM_RECOVERING;
 	}
 bail:
 	spin_unlock(&res->spinlock);

From eae2b56828230fe326167d67a0ec6e777e69afee Mon Sep 17 00:00:00 2001
From: Joseph Qi <joseph.qi@huawei.com>
Date: Fri, 25 Mar 2016 14:21:29 -0700
Subject: [PATCH 298/797] ocfs2/dlm: fix BUG in
 dlm_move_lockres_to_recovery_list

commit be12b299a83fc807bbaccd2bcb8ec50cbb0cb55c upstream.

When master handles convert request, it queues ast first and then
returns status.  This may happen that the ast is sent before the request
status because the above two messages are sent by two threads.  And
right after the ast is sent, if master down, it may trigger BUG in
dlm_move_lockres_to_recovery_list in the requested node because ast
handler moves it to grant list without clear lock->convert_pending.  So
remove BUG_ON statement and check if the ast is processed in
dlmconvert_remote.

Signed-off-by: Joseph Qi <joseph.qi@huawei.com>
Reported-by: Yiwen Jiang <jiangyiwen@huawei.com>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Mark Fasheh <mfasheh@suse.de>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Tariq Saeed <tariq.x.saeed@oracle.com>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ocfs2/dlm/dlmconvert.c  | 13 +++++++++++++
 fs/ocfs2/dlm/dlmrecovery.c |  1 -
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index 84de55ed865a..f90931335c6b 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -288,6 +288,19 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
 		status = DLM_DENIED;
 		goto bail;
 	}
+
+	if (lock->ml.type == type && lock->ml.convert_type == LKM_IVMODE) {
+		mlog(0, "last convert request returned DLM_RECOVERING, but "
+		     "owner has already queued and sent ast to me. res %.*s, "
+		     "(cookie=%u:%llu, type=%d, conv=%d)\n",
+		     res->lockname.len, res->lockname.name,
+		     dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+		     dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
+		     lock->ml.type, lock->ml.convert_type);
+		status = DLM_NORMAL;
+		goto bail;
+	}
+
 	res->state |= DLM_LOCK_RES_IN_PROGRESS;
 	/* move lock to local convert queue */
 	/* do not alter lock refcount.  switching lists. */
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 42f0cae93a0a..4a338803e7e9 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2064,7 +2064,6 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm,
 			dlm_lock_get(lock);
 			if (lock->convert_pending) {
 				/* move converting lock back to granted */
-				BUG_ON(i != DLM_CONVERTING_LIST);
 				mlog(0, "node died with convert pending "
 				     "on %.*s. move back to granted list.\n",
 				     res->lockname.len, res->lockname.name);

From 5dc7e939b6b8ca3d18d5814504954014578703e2 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Fri, 25 Mar 2016 14:21:50 -0700
Subject: [PATCH 299/797] mm/page_alloc: prevent merging between isolated and
 other pageblocks

commit d9dddbf556674bf125ecd925b24e43a5cf2a568a upstream.

Hanjun Guo has reported that a CMA stress test causes broken accounting of
CMA and free pages:

> Before the test, I got:
> -bash-4.3# cat /proc/meminfo | grep Cma
> CmaTotal:         204800 kB
> CmaFree:          195044 kB
>
>
> After running the test:
> -bash-4.3# cat /proc/meminfo | grep Cma
> CmaTotal:         204800 kB
> CmaFree:         6602584 kB
>
> So the freed CMA memory is more than total..
>
> Also the the MemFree is more than mem total:
>
> -bash-4.3# cat /proc/meminfo
> MemTotal:       16342016 kB
> MemFree:        22367268 kB
> MemAvailable:   22370528 kB

Laura Abbott has confirmed the issue and suspected the freepage accounting
rewrite around 3.18/4.0 by Joonsoo Kim.  Joonsoo had a theory that this is
caused by unexpected merging between MIGRATE_ISOLATE and MIGRATE_CMA
pageblocks:

> CMA isolates MAX_ORDER aligned blocks, but, during the process,
> partialy isolated block exists. If MAX_ORDER is 11 and
> pageblock_order is 9, two pageblocks make up MAX_ORDER
> aligned block and I can think following scenario because pageblock
> (un)isolation would be done one by one.
>
> (each character means one pageblock. 'C', 'I' means MIGRATE_CMA,
> MIGRATE_ISOLATE, respectively.
>
> CC -> IC -> II (Isolation)
> II -> CI -> CC (Un-isolation)
>
> If some pages are freed at this intermediate state such as IC or CI,
> that page could be merged to the other page that is resident on
> different type of pageblock and it will cause wrong freepage count.

This was supposed to be prevented by CMA operating on MAX_ORDER blocks,
but since it doesn't hold the zone->lock between pageblocks, a race
window does exist.

It's also likely that unexpected merging can occur between
MIGRATE_ISOLATE and non-CMA pageblocks.  This should be prevented in
__free_one_page() since commit 3c605096d315 ("mm/page_alloc: restrict
max order of merging on isolated pageblock").  However, we only check
the migratetype of the pageblock where buddy merging has been initiated,
not the migratetype of the buddy pageblock (or group of pageblocks)
which can be MIGRATE_ISOLATE.

Joonsoo has suggested checking for buddy migratetype as part of
page_is_buddy(), but that would add extra checks in allocator hotpath
and bloat-o-meter has shown significant code bloat (the function is
inline).

This patch reduces the bloat at some expense of more complicated code.
The buddy-merging while-loop in __free_one_page() is initially bounded
to pageblock_border and without any migratetype checks.  The checks are
placed outside, bumping the max_order if merging is allowed, and
returning to the while-loop with a statement which can't be possibly
considered harmful.

This fixes the accounting bug and also removes the arguably weird state
in the original commit 3c605096d315 where buddies could be left
unmerged.

Fixes: 3c605096d315 ("mm/page_alloc: restrict max order of merging on isolated pageblock")
Link: https://lkml.org/lkml/2016/3/2/280
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reported-by: Hanjun Guo <guohanjun@huawei.com>
Tested-by: Hanjun Guo <guohanjun@huawei.com>
Acked-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Debugged-by: Laura Abbott <labbott@redhat.com>
Debugged-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/page_alloc.c | 46 +++++++++++++++++++++++++++++++++-------------
 1 file changed, 33 insertions(+), 13 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9d666df5ef95..c69531afbd8f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -662,34 +662,28 @@ static inline void __free_one_page(struct page *page,
 	unsigned long combined_idx;
 	unsigned long uninitialized_var(buddy_idx);
 	struct page *buddy;
-	unsigned int max_order = MAX_ORDER;
+	unsigned int max_order;
+
+	max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1);
 
 	VM_BUG_ON(!zone_is_initialized(zone));
 	VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page);
 
 	VM_BUG_ON(migratetype == -1);
-	if (is_migrate_isolate(migratetype)) {
-		/*
-		 * We restrict max order of merging to prevent merge
-		 * between freepages on isolate pageblock and normal
-		 * pageblock. Without this, pageblock isolation
-		 * could cause incorrect freepage accounting.
-		 */
-		max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1);
-	} else {
+	if (likely(!is_migrate_isolate(migratetype)))
 		__mod_zone_freepage_state(zone, 1 << order, migratetype);
-	}
 
-	page_idx = pfn & ((1 << max_order) - 1);
+	page_idx = pfn & ((1 << MAX_ORDER) - 1);
 
 	VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page);
 	VM_BUG_ON_PAGE(bad_range(zone, page), page);
 
+continue_merging:
 	while (order < max_order - 1) {
 		buddy_idx = __find_buddy_index(page_idx, order);
 		buddy = page + (buddy_idx - page_idx);
 		if (!page_is_buddy(page, buddy, order))
-			break;
+			goto done_merging;
 		/*
 		 * Our buddy is free or it is CONFIG_DEBUG_PAGEALLOC guard page,
 		 * merge with it and move up one order.
@@ -706,6 +700,32 @@ static inline void __free_one_page(struct page *page,
 		page_idx = combined_idx;
 		order++;
 	}
+	if (max_order < MAX_ORDER) {
+		/* If we are here, it means order is >= pageblock_order.
+		 * We want to prevent merge between freepages on isolate
+		 * pageblock and normal pageblock. Without this, pageblock
+		 * isolation could cause incorrect freepage or CMA accounting.
+		 *
+		 * We don't want to hit this code for the more frequent
+		 * low-order merging.
+		 */
+		if (unlikely(has_isolate_pageblock(zone))) {
+			int buddy_mt;
+
+			buddy_idx = __find_buddy_index(page_idx, order);
+			buddy = page + (buddy_idx - page_idx);
+			buddy_mt = get_pageblock_migratetype(buddy);
+
+			if (migratetype != buddy_mt
+					&& (is_migrate_isolate(migratetype) ||
+						is_migrate_isolate(buddy_mt)))
+				goto done_merging;
+		}
+		max_order++;
+		goto continue_merging;
+	}
+
+done_merging:
 	set_page_order(page, order);
 
 	/*

From ded1db97aca15db72850c78ad222625386af4ea4 Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@iki.fi>
Date: Sat, 20 Feb 2016 22:27:48 +0200
Subject: [PATCH 300/797] mtd: onenand: fix deadlock in onenand_block_markbad

commit 5e64c29e98bfbba1b527b0a164f9493f3db9e8cb upstream.

Commit 5942ddbc500d ("mtd: introduce mtd_block_markbad interface")
incorrectly changed onenand_block_markbad() to call mtd_block_markbad
instead of onenand_chip's block_markbad function. As a result the function
will now recurse and deadlock. Fix by reverting the change.

Fixes: 5942ddbc500d ("mtd: introduce mtd_block_markbad interface")
Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Acked-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/onenand/onenand_base.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index 43b3392ffee7..652d01832873 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c
@@ -2599,6 +2599,7 @@ static int onenand_default_block_markbad(struct mtd_info *mtd, loff_t ofs)
  */
 static int onenand_block_markbad(struct mtd_info *mtd, loff_t ofs)
 {
+	struct onenand_chip *this = mtd->priv;
 	int ret;
 
 	ret = onenand_block_isbad(mtd, ofs);
@@ -2610,7 +2611,7 @@ static int onenand_block_markbad(struct mtd_info *mtd, loff_t ofs)
 	}
 
 	onenand_get_device(mtd, FL_WRITING);
-	ret = mtd_block_markbad(mtd, ofs);
+	ret = this->block_markbad(mtd, ofs);
 	onenand_release_device(mtd);
 	return ret;
 }

From 79d05ce04bfbe9885936ed985c2dd53d8500f617 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sun, 13 Mar 2016 00:33:48 -0500
Subject: [PATCH 301/797] intel_idle: prevent SKL-H boot failure when C8+C9+C10
 enabled

commit d70e28f57e14a481977436695b0c9ba165472431 upstream.

Some SKL-H configurations require "intel_idle.max_cstate=7" to boot.
While that is an effective workaround, it disables C10.

This patch detects the problematic configuration,
and disables C8 and C9, keeping C10 enabled.

Note that enabling SGX in BIOS SETUP can also prevent this issue,
if the system BIOS provides that option.

https://bugzilla.kernel.org/show_bug.cgi?id=109081
"Freezes with Intel i7 6700HQ (Skylake), unless intel_idle.max_cstate=7"

Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/idle/intel_idle.c | 112 ++++++++++++++++++++++++++++++--------
 1 file changed, 88 insertions(+), 24 deletions(-)

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index cd4510a63375..146eed70bdf4 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -65,7 +65,7 @@
 #include <asm/mwait.h>
 #include <asm/msr.h>
 
-#define INTEL_IDLE_VERSION "0.4"
+#define INTEL_IDLE_VERSION "0.4.1"
 #define PREFIX "intel_idle: "
 
 static struct cpuidle_driver intel_idle_driver = {
@@ -993,37 +993,93 @@ static void intel_idle_cpuidle_devices_uninit(void)
 	return;
 }
 
+/*
+ * ivt_idle_state_table_update(void)
+ *
+ * Tune IVT multi-socket targets
+ * Assumption: num_sockets == (max_package_num + 1)
+ */
+static void ivt_idle_state_table_update(void)
+{
+	/* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
+	int cpu, package_num, num_sockets = 1;
+
+	for_each_online_cpu(cpu) {
+		package_num = topology_physical_package_id(cpu);
+		if (package_num + 1 > num_sockets) {
+			num_sockets = package_num + 1;
+
+			if (num_sockets > 4) {
+				cpuidle_state_table = ivt_cstates_8s;
+				return;
+			}
+		}
+	}
+
+	if (num_sockets > 2)
+		cpuidle_state_table = ivt_cstates_4s;
+
+	/* else, 1 and 2 socket systems use default ivt_cstates */
+}
+/*
+ * sklh_idle_state_table_update(void)
+ *
+ * On SKL-H (model 0x5e) disable C8 and C9 if:
+ * C10 is enabled and SGX disabled
+ */
+static void sklh_idle_state_table_update(void)
+{
+	unsigned long long msr;
+	unsigned int eax, ebx, ecx, edx;
+
+
+	/* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
+	if (max_cstate <= 7)
+		return;
+
+	/* if PC10 not present in CPUID.MWAIT.EDX */
+	if ((mwait_substates & (0xF << 28)) == 0)
+		return;
+
+	rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr);
+
+	/* PC10 is not enabled in PKG C-state limit */
+	if ((msr & 0xF) != 8)
+		return;
+
+	ecx = 0;
+	cpuid(7, &eax, &ebx, &ecx, &edx);
+
+	/* if SGX is present */
+	if (ebx & (1 << 2)) {
+
+		rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
+
+		/* if SGX is enabled */
+		if (msr & (1 << 18))
+			return;
+	}
+
+	skl_cstates[5].disabled = 1;	/* C8-SKL */
+	skl_cstates[6].disabled = 1;	/* C9-SKL */
+}
 /*
  * intel_idle_state_table_update()
  *
  * Update the default state_table for this CPU-id
- *
- * Currently used to access tuned IVT multi-socket targets
- * Assumption: num_sockets == (max_package_num + 1)
  */
-void intel_idle_state_table_update(void)
+
+static void intel_idle_state_table_update(void)
 {
-	/* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
-	if (boot_cpu_data.x86_model == 0x3e) { /* IVT */
-		int cpu, package_num, num_sockets = 1;
+	switch (boot_cpu_data.x86_model) {
 
-		for_each_online_cpu(cpu) {
-			package_num = topology_physical_package_id(cpu);
-			if (package_num + 1 > num_sockets) {
-				num_sockets = package_num + 1;
-
-				if (num_sockets > 4) {
-					cpuidle_state_table = ivt_cstates_8s;
-					return;
-				}
-			}
-		}
-
-		if (num_sockets > 2)
-			cpuidle_state_table = ivt_cstates_4s;
-		/* else, 1 and 2 socket systems use default ivt_cstates */
+	case 0x3e: /* IVT */
+		ivt_idle_state_table_update();
+		break;
+	case 0x5e: /* SKL-H */
+		sklh_idle_state_table_update();
+		break;
 	}
-	return;
 }
 
 /*
@@ -1063,6 +1119,14 @@ static int __init intel_idle_cpuidle_driver_init(void)
 		if (num_substates == 0)
 			continue;
 
+		/* if state marked as disabled, skip it */
+		if (cpuidle_state_table[cstate].disabled != 0) {
+			pr_debug(PREFIX "state %s is disabled",
+				cpuidle_state_table[cstate].name);
+			continue;
+		}
+
+
 		if (((mwait_cstate + 1) > 2) &&
 			!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
 			mark_tsc_unstable("TSC halts in idle"

From 4cd4ebbdf533ed316ce377b66ae508cc6d1d0162 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Wed, 23 Mar 2016 00:11:20 +0100
Subject: [PATCH 302/797] PM / sleep: Clear pm_suspend_global_flags upon
 hibernate

commit 276142730c39c9839465a36a90e5674a8c34e839 upstream.

When suspending to RAM, waking up and later suspending to disk,
we gratuitously runtime resume devices after the thaw phase.
This does not occur if we always suspend to RAM or always to disk.

pm_complete_with_resume_check(), which gets called from
pci_pm_complete() among others, schedules a runtime resume
if PM_SUSPEND_FLAG_FW_RESUME is set. The flag is set during
a suspend-to-RAM cycle. It is cleared at the beginning of
the suspend-to-RAM cycle but not afterwards and it is not
cleared during a suspend-to-disk cycle at all. Fix it.

Fixes: ef25ba047601 (PM / sleep: Add flags to indicate platform firmware involvement)
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/power/hibernate.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index b7342a24f559..b7dd5718836e 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -339,6 +339,7 @@ int hibernation_snapshot(int platform_mode)
 	pm_message_t msg;
 	int error;
 
+	pm_suspend_clear_flags();
 	error = platform_begin(platform_mode);
 	if (error)
 		goto Close;

From 9835db39bb8151cf4471a792a878318c8d07bf4d Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Fri, 18 Mar 2016 14:55:38 +0100
Subject: [PATCH 303/797] scsi_common: do not clobber fixed sense information

commit ba08311647892cc7912de74525fd78416caf544a upstream.

For fixed sense the information field is 32 bits, to we need to truncate
the information field to avoid clobbering the sense code.

Fixes: a1524f226a02 ("libata-eh: Set 'information' field for autosense")
Signed-off-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Lee Duncan <lduncan@suse.com>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Ewan D. Milne <emilne@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/scsi_common.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/scsi_common.c b/drivers/scsi/scsi_common.c
index c126966130ab..ce79de822e46 100644
--- a/drivers/scsi/scsi_common.c
+++ b/drivers/scsi/scsi_common.c
@@ -278,8 +278,16 @@ int scsi_set_sense_information(u8 *buf, int buf_len, u64 info)
 		ucp[3] = 0;
 		put_unaligned_be64(info, &ucp[4]);
 	} else if ((buf[0] & 0x7f) == 0x70) {
-		buf[0] |= 0x80;
-		put_unaligned_be64(info, &buf[3]);
+		/*
+		 * Only set the 'VALID' bit if we can represent the value
+		 * correctly; otherwise just fill out the lower bytes and
+		 * clear the 'VALID' flag.
+		 */
+		if (info <= 0xffffffffUL)
+			buf[0] |= 0x80;
+		else
+			buf[0] &= 0x7f;
+		put_unaligned_be32((u32)info, &buf[3]);
 	}
 
 	return 0;

From 2a8225ef46968444fb1c4c632ec28e4cc2be633f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 4 Mar 2016 15:59:42 +0100
Subject: [PATCH 304/797] sched/cputime: Fix steal time accounting vs. CPU
 hotplug

commit e9532e69b8d1d1284e8ecf8d2586de34aec61244 upstream.

On CPU hotplug the steal time accounting can keep a stale rq->prev_steal_time
value over CPU down and up. So after the CPU comes up again the delta
calculation in steal_account_process_tick() wreckages itself due to the
unsigned math:

	 u64 steal = paravirt_steal_clock(smp_processor_id());

	 steal -= this_rq()->prev_steal_time;

So if steal is smaller than rq->prev_steal_time we end up with an insane large
value which then gets added to rq->prev_steal_time, resulting in a permanent
wreckage of the accounting. As a consequence the per CPU stats in /proc/stat
become stale.

Nice trick to tell the world how idle the system is (100%) while the CPU is
100% busy running tasks. Though we prefer realistic numbers.

None of the accounting values which use a previous value to account for
fractions is reset at CPU hotplug time. update_rq_clock_task() has a sanity
check for prev_irq_time and prev_steal_time_rq, but that sanity check solely
deals with clock warps and limits the /proc/stat visible wreckage. The
prev_time values are still wrong.

Solution is simple: Reset rq->prev_*_time when the CPU is plugged in again.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Glauber Costa <glommer@parallels.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Fixes: commit 095c0aa83e52 "sched: adjust scheduler cpu power for stolen time"
Fixes: commit aa483808516c "sched: Remove irq time from available CPU power"
Fixes: commit e6e6685accfa "KVM guest: Steal time accounting"
Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1603041539490.3686@nanos
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/sched/core.c  |  1 +
 kernel/sched/sched.h | 13 +++++++++++++
 2 files changed, 14 insertions(+)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index eb70592f03f6..70e5e09341f1 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5525,6 +5525,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 
 	case CPU_UP_PREPARE:
 		rq->calc_load_update = calc_load_update;
+		account_reset_rq(rq);
 		break;
 
 	case CPU_ONLINE:
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b242775bf670..0517abd7dd73 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1770,3 +1770,16 @@ static inline u64 irq_time_read(int cpu)
 }
 #endif /* CONFIG_64BIT */
 #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
+
+static inline void account_reset_rq(struct rq *rq)
+{
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+	rq->prev_irq_time = 0;
+#endif
+#ifdef CONFIG_PARAVIRT
+	rq->prev_steal_time = 0;
+#endif
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+	rq->prev_steal_time_rq = 0;
+#endif
+}

From d2b56a0758ead5987db9465e120643ede759f3ad Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Thu, 3 Mar 2016 20:50:41 +0100
Subject: [PATCH 305/797] perf/x86/pebs: Add workaround for broken OVFL status
 on HSW+

commit 8077eca079a212f26419c57226f28696b7100683 upstream.

This patch fixes an issue with the GLOBAL_OVERFLOW_STATUS bits on
Haswell, Broadwell and Skylake processors when using PEBS.

The SDM stipulates that when the PEBS iterrupt threshold is crossed,
an interrupt is posted and the kernel is interrupted. The kernel will
find GLOBAL_OVF_SATUS bit 62 set indicating there are PEBS records to
drain. But the bits corresponding to the actual counters should NOT be
set. The kernel follows the SDM and assumes that all PEBS events are
processed in the drain_pebs() callback. The kernel then checks for
remaining overflows on any other (non-PEBS) events and processes these
in the for_each_bit_set(&status) loop.

As it turns out, under certain conditions on HSW and later processors,
on PEBS buffer interrupt, bit 62 is set but the counter bits may be
set as well. In that case, the kernel drains PEBS and generates
SAMPLES with the EXACT tag, then it processes the counter bits, and
generates normal (non-EXACT) SAMPLES.

I ran into this problem by trying to understand why on HSW sampling on
a PEBS event was sometimes returning SAMPLES without the EXACT tag.
This should not happen on user level code because HSW has the
eventing_ip which always point to the instruction that caused the
event.

The workaround in this patch simply ensures that the bits for the
counters used for PEBS events are cleared after the PEBS buffer has
been drained. With this fix 100% of the PEBS samples on my user code
report the EXACT tag.

Before:
  $ perf record -e cpu/event=0xd0,umask=0x81/upp ./multichase
  $ perf report -D | fgrep SAMPLES
  PERF_RECORD_SAMPLE(IP, 0x2): 11775/11775: 0x406de5 period: 73469 addr: 0 exact=Y
                           \--- EXACT tag is missing

After:
  $ perf record -e cpu/event=0xd0,umask=0x81/upp ./multichase
  $ perf report -D | fgrep SAMPLES
  PERF_RECORD_SAMPLE(IP, 0x4002): 11775/11775: 0x406de5 period: 73469 addr: 0 exact=Y
                           \--- EXACT tag is set

The problem tends to appear more often when multiple PEBS events are used.

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: adrian.hunter@intel.com
Cc: kan.liang@intel.com
Cc: namhyung@kernel.org
Link: http://lkml.kernel.org/r/1457034642-21837-3-git-send-email-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index e2a430021e46..ad6768f97b08 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1840,6 +1840,16 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 	if (__test_and_clear_bit(62, (unsigned long *)&status)) {
 		handled++;
 		x86_pmu.drain_pebs(regs);
+		/*
+		 * There are cases where, even though, the PEBS ovfl bit is set
+		 * in GLOBAL_OVF_STATUS, the PEBS events may also have their
+		 * overflow bits set for their counters. We must clear them
+		 * here because they have been processed as exact samples in
+		 * the drain_pebs() routine. They must not be processed again
+		 * in the for_each_bit_set() loop for regular samples below.
+		 */
+		status &= ~cpuc->pebs_enabled;
+		status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;
 	}
 
 	/*

From 886629ebb2acaeafc3102140c1c8b4ad52792484 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 3 Mar 2016 18:07:28 -0500
Subject: [PATCH 306/797] perf/x86/intel: Fix PEBS warning by only restoring
 active PMU in pmi

commit c3d266c8a9838cc141b69548bc3b1b18808ae8c4 upstream.

This patch tries to fix a PEBS warning found in my stress test. The
following perf command can easily trigger the pebs warning or spurious
NMI error on Skylake/Broadwell/Haswell platforms:

  sudo perf record -e 'cpu/umask=0x04,event=0xc4/pp,cycles,branches,ref-cycles,cache-misses,cache-references' --call-graph fp -b -c1000 -a

Also the NMI watchdog must be enabled.

For this case, the events number is larger than counter number. So
perf has to do multiplexing.

In perf_mux_hrtimer_handler, it does perf_pmu_disable(), schedule out
old events, rotate_ctx, schedule in new events and finally
perf_pmu_enable().

If the old events include precise event, the MSR_IA32_PEBS_ENABLE
should be cleared when perf_pmu_disable().  The MSR_IA32_PEBS_ENABLE
should keep 0 until the perf_pmu_enable() is called and the new event is
precise event.

However, there is a corner case which could restore PEBS_ENABLE to
stale value during the above period. In perf_pmu_disable(), GLOBAL_CTRL
will be set to 0 to stop overflow and followed PMI. But there may be
pending PMI from an earlier overflow, which cannot be stopped. So even
GLOBAL_CTRL is cleared, the kernel still be possible to get PMI. At
the end of the PMI handler, __intel_pmu_enable_all() will be called,
which will restore the stale values if old events haven't scheduled
out.

Once the stale pebs value is set, it's impossible to be corrected if
the new events are non-precise. Because the pebs_enabled will be set
to 0. x86_pmu.enable_all() will ignore the MSR_IA32_PEBS_ENABLE
setting. As a result, the following NMI with stale PEBS_ENABLE
trigger pebs warning.

The pending PMI after enabled=0 will become harmless if the NMI handler
does not change the state. This patch checks cpuc->enabled in pmi and
only restore the state when PMU is active.

Here is the dump:

  Call Trace:
   <NMI>  [<ffffffff813c3a2e>] dump_stack+0x63/0x85
   [<ffffffff810a46f2>] warn_slowpath_common+0x82/0xc0
   [<ffffffff810a483a>] warn_slowpath_null+0x1a/0x20
   [<ffffffff8100fe2e>] intel_pmu_drain_pebs_nhm+0x2be/0x320
   [<ffffffff8100caa9>] intel_pmu_handle_irq+0x279/0x460
   [<ffffffff810639b6>] ? native_write_msr_safe+0x6/0x40
   [<ffffffff811f290d>] ? vunmap_page_range+0x20d/0x330
   [<ffffffff811f2f11>] ?  unmap_kernel_range_noflush+0x11/0x20
   [<ffffffff8148379f>] ? ghes_copy_tofrom_phys+0x10f/0x2a0
   [<ffffffff814839c8>] ? ghes_read_estatus+0x98/0x170
   [<ffffffff81005a7d>] perf_event_nmi_handler+0x2d/0x50
   [<ffffffff810310b9>] nmi_handle+0x69/0x120
   [<ffffffff810316f6>] default_do_nmi+0xe6/0x100
   [<ffffffff810317f2>] do_nmi+0xe2/0x130
   [<ffffffff817aea71>] end_repeat_nmi+0x1a/0x1e
   [<ffffffff810639b6>] ? native_write_msr_safe+0x6/0x40
   [<ffffffff810639b6>] ? native_write_msr_safe+0x6/0x40
   [<ffffffff810639b6>] ? native_write_msr_safe+0x6/0x40
   <<EOE>>  <IRQ>  [<ffffffff81006df8>] ?  x86_perf_event_set_period+0xd8/0x180
   [<ffffffff81006eec>] x86_pmu_start+0x4c/0x100
   [<ffffffff8100722d>] x86_pmu_enable+0x28d/0x300
   [<ffffffff811994d7>] perf_pmu_enable.part.81+0x7/0x10
   [<ffffffff8119cb70>] perf_mux_hrtimer_handler+0x200/0x280
   [<ffffffff8119c970>] ?  __perf_install_in_context+0xc0/0xc0
   [<ffffffff8110f92d>] __hrtimer_run_queues+0xfd/0x280
   [<ffffffff811100d8>] hrtimer_interrupt+0xa8/0x190
   [<ffffffff81199080>] ?  __perf_read_group_add.part.61+0x1a0/0x1a0
   [<ffffffff81051bd8>] local_apic_timer_interrupt+0x38/0x60
   [<ffffffff817af01d>] smp_apic_timer_interrupt+0x3d/0x50
   [<ffffffff817ad15c>] apic_timer_interrupt+0x8c/0xa0
   <EOI>  [<ffffffff81199080>] ?  __perf_read_group_add.part.61+0x1a0/0x1a0
   [<ffffffff81123de5>] ?  smp_call_function_single+0xd5/0x130
   [<ffffffff81123ddb>] ?  smp_call_function_single+0xcb/0x130
   [<ffffffff81199080>] ?  __perf_read_group_add.part.61+0x1a0/0x1a0
   [<ffffffff8119765a>] event_function_call+0x10a/0x120
   [<ffffffff8119c660>] ? ctx_resched+0x90/0x90
   [<ffffffff811971e0>] ? cpu_clock_event_read+0x30/0x30
   [<ffffffff811976d0>] ? _perf_event_disable+0x60/0x60
   [<ffffffff8119772b>] _perf_event_enable+0x5b/0x70
   [<ffffffff81197388>] perf_event_for_each_child+0x38/0xa0
   [<ffffffff811976d0>] ? _perf_event_disable+0x60/0x60
   [<ffffffff811a0ffd>] perf_ioctl+0x12d/0x3c0
   [<ffffffff8134d855>] ? selinux_file_ioctl+0x95/0x1e0
   [<ffffffff8124a3a1>] do_vfs_ioctl+0xa1/0x5a0
   [<ffffffff81036d29>] ? sched_clock+0x9/0x10
   [<ffffffff8124a919>] SyS_ioctl+0x79/0x90
   [<ffffffff817ac4b2>] entry_SYSCALL_64_fastpath+0x1a/0xa4
  ---[ end trace aef202839fe9a71d ]---
  Uhhuh. NMI received for unknown reason 2d on CPU 2.
  Do you have a strange power saving mode enabled?

Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1457046448-6184-1-git-send-email-kan.liang@intel.com
[ Fixed various typos and other small details. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/perf_event.c       | 13 +++++++++++++
 arch/x86/kernel/cpu/perf_event_intel.c | 15 +++++++++++++--
 arch/x86/kernel/cpu/perf_event_knc.c   |  4 +++-
 3 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 2bf79d7c97df..a3aeb2cc361e 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -593,6 +593,19 @@ void x86_pmu_disable_all(void)
 	}
 }
 
+/*
+ * There may be PMI landing after enabled=0. The PMI hitting could be before or
+ * after disable_all.
+ *
+ * If PMI hits before disable_all, the PMU will be disabled in the NMI handler.
+ * It will not be re-enabled in the NMI handler again, because enabled=0. After
+ * handling the NMI, disable_all will be called, which will not change the
+ * state either. If PMI hits after disable_all, the PMU is already disabled
+ * before entering NMI handler. The NMI handler will not change the state
+ * either.
+ *
+ * So either situation is harmless.
+ */
 static void x86_pmu_disable(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index ad6768f97b08..98a01fd16c8c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1458,7 +1458,15 @@ static __initconst const u64 slm_hw_cache_event_ids
 };
 
 /*
- * Use from PMIs where the LBRs are already disabled.
+ * Used from PMIs where the LBRs are already disabled.
+ *
+ * This function could be called consecutively. It is required to remain in
+ * disabled state if called consecutively.
+ *
+ * During consecutive calls, the same disable value will be written to related
+ * registers, so the PMU state remains unchanged. hw.state in
+ * intel_bts_disable_local will remain PERF_HES_STOPPED too in consecutive
+ * calls.
  */
 static void __intel_pmu_disable_all(void)
 {
@@ -1895,7 +1903,10 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 		goto again;
 
 done:
-	__intel_pmu_enable_all(0, true);
+	/* Only restore PMU state when it's active. See x86_pmu_disable(). */
+	if (cpuc->enabled)
+		__intel_pmu_enable_all(0, true);
+
 	/*
 	 * Only unmask the NMI after the overflow counters
 	 * have been reset. This avoids spurious NMIs on
diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/kernel/cpu/perf_event_knc.c
index 5b0c232d1ee6..b931095e86d4 100644
--- a/arch/x86/kernel/cpu/perf_event_knc.c
+++ b/arch/x86/kernel/cpu/perf_event_knc.c
@@ -263,7 +263,9 @@ static int knc_pmu_handle_irq(struct pt_regs *regs)
 		goto again;
 
 done:
-	knc_pmu_enable_all(0);
+	/* Only restore PMU state when it's active. See x86_pmu_disable(). */
+	if (cpuc->enabled)
+		knc_pmu_enable_all(0);
 
 	return handled;
 }

From a54af124cd73a1429ad5a9d16ab878c71e367bf8 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Tue, 1 Mar 2016 20:03:52 +0100
Subject: [PATCH 307/797] perf/x86/intel: Use PAGE_SIZE for PEBS buffer size on
 Core2

commit e72daf3f4d764c47fb71c9bdc7f9c54a503825b1 upstream.

Using PAGE_SIZE buffers makes the WRMSR to PERF_GLOBAL_CTRL in
intel_pmu_enable_all() mysteriously hang on Core2. As a workaround, we
don't do this.

The hard lockup is easily triggered by running 'perf test attr'
repeatedly. Most of the time it gets stuck on sample session with
small periods.

  # perf test attr -vv
  14: struct perf_event_attr setup                             :
  --- start ---
  ...
    'PERF_TEST_ATTR=/tmp/tmpuEKz3B /usr/bin/perf record -o /tmp/tmpuEKz3B/perf.data -c 123 kill >/dev/null 2>&1' ret 1

Reported-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/20160301190352.GA8355@krava.redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/perf_event.h          |  1 +
 arch/x86/kernel/cpu/perf_event_intel_ds.c | 13 +++++++++++--
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index d0e35ebb2adb..0d7bc499d3eb 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -591,6 +591,7 @@ struct x86_pmu {
 			pebs_active	:1,
 			pebs_broken	:1;
 	int		pebs_record_size;
+	int		pebs_buffer_size;
 	void		(*drain_pebs)(struct pt_regs *regs);
 	struct event_constraint *pebs_constraints;
 	void		(*pebs_aliases)(struct perf_event *event);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 5db1c7755548..4bf080b4f744 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -269,7 +269,7 @@ static int alloc_pebs_buffer(int cpu)
 	if (!x86_pmu.pebs)
 		return 0;
 
-	buffer = kzalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL, node);
+	buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
 	if (unlikely(!buffer))
 		return -ENOMEM;
 
@@ -286,7 +286,7 @@ static int alloc_pebs_buffer(int cpu)
 		per_cpu(insn_buffer, cpu) = ibuffer;
 	}
 
-	max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
+	max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size;
 
 	ds->pebs_buffer_base = (u64)(unsigned long)buffer;
 	ds->pebs_index = ds->pebs_buffer_base;
@@ -1296,6 +1296,7 @@ void __init intel_ds_init(void)
 
 	x86_pmu.bts  = boot_cpu_has(X86_FEATURE_BTS);
 	x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
+	x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
 	if (x86_pmu.pebs) {
 		char pebs_type = x86_pmu.intel_cap.pebs_trap ?  '+' : '-';
 		int format = x86_pmu.intel_cap.pebs_format;
@@ -1304,6 +1305,14 @@ void __init intel_ds_init(void)
 		case 0:
 			printk(KERN_CONT "PEBS fmt0%c, ", pebs_type);
 			x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
+			/*
+			 * Using >PAGE_SIZE buffers makes the WRMSR to
+			 * PERF_GLOBAL_CTRL in intel_pmu_enable_all()
+			 * mysteriously hang on Core2.
+			 *
+			 * As a workaround, we don't do this.
+			 */
+			x86_pmu.pebs_buffer_size = PAGE_SIZE;
 			x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
 			break;
 

From 4b3d06d989b9535bb04f9339a862d06e3311522a Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 1 Mar 2016 14:25:24 -0800
Subject: [PATCH 308/797] perf/x86/intel: Fix PEBS data source interpretation
 on Nehalem/Westmere

commit e17dc65328057c00db7e1bfea249c8771a78b30b upstream.

Jiri reported some time ago that some entries in the PEBS data source table
in perf do not agree with the SDM. We investigated and the bits
changed for Sandy Bridge, but the SDM was not updated.

perf already implements the bits correctly for Sandy Bridge
and later. This patch patches it up for Nehalem and Westmere.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: jolsa@kernel.org
Link: http://lkml.kernel.org/r/1456871124-15985-1-git-send-email-andi@firstfloor.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/perf_event.h          |  2 ++
 arch/x86/kernel/cpu/perf_event_intel.c    |  2 ++
 arch/x86/kernel/cpu/perf_event_intel_ds.c | 11 ++++++++++-
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 0d7bc499d3eb..ee70445fbb1f 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -908,6 +908,8 @@ void intel_pmu_lbr_init_hsw(void);
 
 void intel_pmu_lbr_init_skl(void);
 
+void intel_pmu_pebs_data_source_nhm(void);
+
 int intel_pmu_setup_lbr_filter(struct perf_event *event);
 
 void intel_pt_interrupt(void);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 98a01fd16c8c..078de2e86b7a 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -3336,6 +3336,7 @@ __init int intel_pmu_init(void)
 		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
 			X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
 
+		intel_pmu_pebs_data_source_nhm();
 		x86_add_quirk(intel_nehalem_quirk);
 
 		pr_cont("Nehalem events, ");
@@ -3398,6 +3399,7 @@ __init int intel_pmu_init(void)
 		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
 			X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
 
+		intel_pmu_pebs_data_source_nhm();
 		pr_cont("Westmere events, ");
 		break;
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 4bf080b4f744..7abb2b88572e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -51,7 +51,8 @@ union intel_x86_pebs_dse {
 #define OP_LH (P(OP, LOAD) | P(LVL, HIT))
 #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
 
-static const u64 pebs_data_source[] = {
+/* Version for Sandy Bridge and later */
+static u64 pebs_data_source[] = {
 	P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
 	OP_LH | P(LVL, L1)  | P(SNOOP, NONE),	/* 0x01: L1 local */
 	OP_LH | P(LVL, LFB) | P(SNOOP, NONE),	/* 0x02: LFB hit */
@@ -70,6 +71,14 @@ static const u64 pebs_data_source[] = {
 	OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
 };
 
+/* Patch up minor differences in the bits */
+void __init intel_pmu_pebs_data_source_nhm(void)
+{
+	pebs_data_source[0x05] = OP_LH | P(LVL, L3)  | P(SNOOP, HIT);
+	pebs_data_source[0x06] = OP_LH | P(LVL, L3)  | P(SNOOP, HITM);
+	pebs_data_source[0x07] = OP_LH | P(LVL, L3)  | P(SNOOP, HITM);
+}
+
 static u64 precise_store_data(u64 status)
 {
 	union intel_x86_pebs_dse dse;

From b40108b826ed9e1c558f73b9dbabb8d80ded268b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 12 Apr 2016 09:09:26 -0700
Subject: [PATCH 309/797] Linux 4.4.7

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 87d12b44ab66..5a493e785aca 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 4
-SUBLEVEL = 6
+SUBLEVEL = 7
 EXTRAVERSION =
 NAME = Blurry Fish Butt
 

From 63c22e8fe29efe1d03980d5cf933c4d7b9a72d09 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sat, 26 Mar 2016 12:28:05 -0700
Subject: [PATCH 310/797] hwmon: (max1111) Return -ENODEV from
 max1111_read_channel if not instantiated

commit 3c2e2266a5bd2d1cef258e6e54dca1d99946379f upstream.

arm:pxa_defconfig can result in the following crash if the max1111 driver
is not instantiated.

Unhandled fault: page domain fault (0x01b) at 0x00000000
pgd = c0004000
[00000000] *pgd=00000000
Internal error: : 1b [#1] PREEMPT ARM
Modules linked in:
CPU: 0 PID: 300 Comm: kworker/0:1 Not tainted 4.5.0-01301-g1701f680407c #10
Hardware name: SHARP Akita
Workqueue: events sharpsl_charge_toggle
task: c390a000 ti: c391e000 task.ti: c391e000
PC is at max1111_read_channel+0x20/0x30
LR is at sharpsl_pm_pxa_read_max1111+0x2c/0x3c
pc : [<c03aaab0>]    lr : [<c0024b50>]    psr: 20000013
...
[<c03aaab0>] (max1111_read_channel) from [<c0024b50>]
					(sharpsl_pm_pxa_read_max1111+0x2c/0x3c)
[<c0024b50>] (sharpsl_pm_pxa_read_max1111) from [<c00262e0>]
					(spitzpm_read_devdata+0x5c/0xc4)
[<c00262e0>] (spitzpm_read_devdata) from [<c0024094>]
					(sharpsl_check_battery_temp+0x78/0x110)
[<c0024094>] (sharpsl_check_battery_temp) from [<c0024f9c>]
					(sharpsl_charge_toggle+0x48/0x110)
[<c0024f9c>] (sharpsl_charge_toggle) from [<c004429c>]
					(process_one_work+0x14c/0x48c)
[<c004429c>] (process_one_work) from [<c0044618>] (worker_thread+0x3c/0x5d4)
[<c0044618>] (worker_thread) from [<c004a238>] (kthread+0xd0/0xec)
[<c004a238>] (kthread) from [<c000a670>] (ret_from_fork+0x14/0x24)

This can occur because the SPI controller driver (SPI_PXA2XX) is built as
module and thus not necessarily loaded. While building SPI_PXA2XX into the
kernel would make the problem disappear, it appears prudent to ensure that
the driver is instantiated before accessing its data structures.

Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwmon/max1111.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/hwmon/max1111.c b/drivers/hwmon/max1111.c
index 36544c4f653c..303d0c9df907 100644
--- a/drivers/hwmon/max1111.c
+++ b/drivers/hwmon/max1111.c
@@ -85,6 +85,9 @@ static struct max1111_data *the_max1111;
 
 int max1111_read_channel(int channel)
 {
+	if (!the_max1111 || !the_max1111->spi)
+		return -ENODEV;
+
 	return max1111_read(&the_max1111->spi->dev, channel);
 }
 EXPORT_SYMBOL(max1111_read_channel);
@@ -258,6 +261,9 @@ static int max1111_remove(struct spi_device *spi)
 {
 	struct max1111_data *data = spi_get_drvdata(spi);
 
+#ifdef CONFIG_SHARPSL_PM
+	the_max1111 = NULL;
+#endif
 	hwmon_device_unregister(data->hwmon_dev);
 	sysfs_remove_group(&spi->dev.kobj, &max1110_attr_group);
 	sysfs_remove_group(&spi->dev.kobj, &max1111_attr_group);

From 19c1764a19cdb41afebc2e66d7a75a7064c0000f Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nicstange@gmail.com>
Date: Sun, 20 Mar 2016 23:23:46 +0100
Subject: [PATCH 311/797] PKCS#7: pkcs7_validate_trust(): initialize the
 _trusted output argument

commit e54358915d0a00399c11c2c23ae1be674cba188a upstream.

Despite what the DocBook comment to pkcs7_validate_trust() says, the
*_trusted argument is never set to false.

pkcs7_validate_trust() only positively sets *_trusted upon encountering
a trusted PKCS#7 SignedInfo block.

This is quite unfortunate since its callers, system_verify_data() for
example, depend on pkcs7_validate_trust() clearing *_trusted on non-trust.

Indeed, UBSAN splats when attempting to load the uninitialized local
variable 'trusted' from system_verify_data() in pkcs7_validate_trust():

  UBSAN: Undefined behaviour in crypto/asymmetric_keys/pkcs7_trust.c:194:14
  load of value 82 is not a valid value for type '_Bool'
  [...]
  Call Trace:
    [<ffffffff818c4d35>] dump_stack+0xbc/0x117
    [<ffffffff818c4c79>] ? _atomic_dec_and_lock+0x169/0x169
    [<ffffffff8194113b>] ubsan_epilogue+0xd/0x4e
    [<ffffffff819419fa>] __ubsan_handle_load_invalid_value+0x111/0x158
    [<ffffffff819418e9>] ? val_to_string.constprop.12+0xcf/0xcf
    [<ffffffff818334a4>] ? x509_request_asymmetric_key+0x114/0x370
    [<ffffffff814b83f0>] ? kfree+0x220/0x370
    [<ffffffff818312c2>] ? public_key_verify_signature_2+0x32/0x50
    [<ffffffff81835e04>] pkcs7_validate_trust+0x524/0x5f0
    [<ffffffff813c391a>] system_verify_data+0xca/0x170
    [<ffffffff813c3850>] ? top_trace_array+0x9b/0x9b
    [<ffffffff81510b29>] ? __vfs_read+0x279/0x3d0
    [<ffffffff8129372f>] mod_verify_sig+0x1ff/0x290
    [...]

The implication is that pkcs7_validate_trust() effectively grants trust
when it really shouldn't have.

Fix this by explicitly setting *_trusted to false at the very beginning
of pkcs7_validate_trust().

Signed-off-by: Nicolai Stange <nicstange@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 crypto/asymmetric_keys/pkcs7_trust.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/crypto/asymmetric_keys/pkcs7_trust.c b/crypto/asymmetric_keys/pkcs7_trust.c
index 90d6d47965b0..ecdb5a2ce085 100644
--- a/crypto/asymmetric_keys/pkcs7_trust.c
+++ b/crypto/asymmetric_keys/pkcs7_trust.c
@@ -178,6 +178,8 @@ int pkcs7_validate_trust(struct pkcs7_message *pkcs7,
 	int cached_ret = -ENOKEY;
 	int ret;
 
+	*_trusted = false;
+
 	for (p = pkcs7->certs; p; p = p->next)
 		p->seen = false;
 

From 7cdf5d71b408f110657f2f441f7d37c2ebde2839 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Fri, 8 Apr 2016 18:11:33 +0200
Subject: [PATCH 312/797] parisc: Avoid function pointers for kernel exception
 routines

commit e3893027a300927049efc1572f852201eb785142 upstream.

We want to avoid the kernel module loader to create function pointers
for the kernel fixup routines of get_user() and put_user(). Changing
the external reference from function type to int type fixes this.

This unbreaks exception handling for get_user() and put_user() when
called from a kernel module.

Signed-off-by: Helge Deller <deller@gmx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/parisc/kernel/parisc_ksyms.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c
index 568b2c61ea02..3cad8aadc69e 100644
--- a/arch/parisc/kernel/parisc_ksyms.c
+++ b/arch/parisc/kernel/parisc_ksyms.c
@@ -47,11 +47,11 @@ EXPORT_SYMBOL(__cmpxchg_u64);
 EXPORT_SYMBOL(lclear_user);
 EXPORT_SYMBOL(lstrnlen_user);
 
-/* Global fixups */
-extern void fixup_get_user_skip_1(void);
-extern void fixup_get_user_skip_2(void);
-extern void fixup_put_user_skip_1(void);
-extern void fixup_put_user_skip_2(void);
+/* Global fixups - defined as int to avoid creation of function pointers */
+extern int fixup_get_user_skip_1;
+extern int fixup_get_user_skip_2;
+extern int fixup_put_user_skip_1;
+extern int fixup_put_user_skip_2;
 EXPORT_SYMBOL(fixup_get_user_skip_1);
 EXPORT_SYMBOL(fixup_get_user_skip_2);
 EXPORT_SYMBOL(fixup_put_user_skip_1);

From ec353a589df2b1bea7e001266e97e4fd4347e1bf Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Fri, 8 Apr 2016 18:18:48 +0200
Subject: [PATCH 313/797] parisc: Fix kernel crash with reversed
 copy_from_user()

commit ef72f3110d8b19f4c098a0bff7ed7d11945e70c6 upstream.

The kernel module testcase (lib/test_user_copy.c) exhibited a kernel
crash on parisc if the parameters for copy_from_user were reversed
("illegal reversed copy_to_user" testcase).

Fix this potential crash by checking the fault handler if the faulting
address is in the exception table.

Signed-off-by: Helge Deller <deller@gmx.de>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/parisc/kernel/traps.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index 553b09855cfd..77e2262c97f6 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -798,6 +798,9 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
 
 	    if (fault_space == 0 && !faulthandler_disabled())
 	    {
+		/* Clean up and return if in exception table. */
+		if (fixup_exception(regs))
+			return;
 		pdc_chassis_send_status(PDC_CHASSIS_DIRECT_PANIC);
 		parisc_terminate("Kernel Fault", regs, code, fault_address);
 	    }

From 4d6deebe06c5383dffe9cba859378ca708d4076d Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Fri, 8 Apr 2016 18:32:52 +0200
Subject: [PATCH 314/797] parisc: Unbreak handling exceptions from kernel
 modules

commit 2ef4dfd9d9f288943e249b78365a69e3ea3ec072 upstream.

Handling exceptions from modules never worked on parisc.
It was just masked by the fact that exceptions from modules
don't happen during normal use.

When a module triggers an exception in get_user() we need to load the
main kernel dp value before accessing the exception_data structure, and
afterwards restore the original dp value of the module on exit.

Noticed-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Helge Deller <deller@gmx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/parisc/include/asm/uaccess.h | 1 +
 arch/parisc/kernel/asm-offsets.c  | 1 +
 arch/parisc/lib/fixup.S           | 6 ++++++
 arch/parisc/mm/fault.c            | 1 +
 4 files changed, 9 insertions(+)

diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index 0abdd4c607ed..1960b87c1c8b 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -76,6 +76,7 @@ struct exception_table_entry {
  */
 struct exception_data {
 	unsigned long fault_ip;
+	unsigned long fault_gp;
 	unsigned long fault_space;
 	unsigned long fault_addr;
 };
diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c
index d2f62570a7b1..78d30d2ea2d8 100644
--- a/arch/parisc/kernel/asm-offsets.c
+++ b/arch/parisc/kernel/asm-offsets.c
@@ -299,6 +299,7 @@ int main(void)
 #endif
 	BLANK();
 	DEFINE(EXCDATA_IP, offsetof(struct exception_data, fault_ip));
+	DEFINE(EXCDATA_GP, offsetof(struct exception_data, fault_gp));
 	DEFINE(EXCDATA_SPACE, offsetof(struct exception_data, fault_space));
 	DEFINE(EXCDATA_ADDR, offsetof(struct exception_data, fault_addr));
 	BLANK();
diff --git a/arch/parisc/lib/fixup.S b/arch/parisc/lib/fixup.S
index 536ef66bb94b..1052b747e011 100644
--- a/arch/parisc/lib/fixup.S
+++ b/arch/parisc/lib/fixup.S
@@ -26,6 +26,7 @@
 
 #ifdef CONFIG_SMP
 	.macro  get_fault_ip t1 t2
+	loadgp
 	addil LT%__per_cpu_offset,%r27
 	LDREG RT%__per_cpu_offset(%r1),\t1
 	/* t2 = smp_processor_id() */
@@ -40,14 +41,19 @@
 	LDREG RT%exception_data(%r1),\t1
 	/* t1 = this_cpu_ptr(&exception_data) */
 	add,l \t1,\t2,\t1
+	/* %r27 = t1->fault_gp - restore gp */
+	LDREG EXCDATA_GP(\t1), %r27
 	/* t1 = t1->fault_ip */
 	LDREG EXCDATA_IP(\t1), \t1
 	.endm
 #else
 	.macro  get_fault_ip t1 t2
+	loadgp
 	/* t1 = this_cpu_ptr(&exception_data) */
 	addil LT%exception_data,%r27
 	LDREG RT%exception_data(%r1),\t2
+	/* %r27 = t2->fault_gp - restore gp */
+	LDREG EXCDATA_GP(\t2), %r27
 	/* t1 = t2->fault_ip */
 	LDREG EXCDATA_IP(\t2), \t1
 	.endm
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index a762864ec92e..f9064449908a 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -151,6 +151,7 @@ int fixup_exception(struct pt_regs *regs)
 		struct exception_data *d;
 		d = this_cpu_ptr(&exception_data);
 		d->fault_ip = regs->iaoq[0];
+		d->fault_gp = regs->gr[27];
 		d->fault_space = regs->isr;
 		d->fault_addr = regs->ior;
 

From 57f21bd260958fbfda2ef819a7fa8a0054df0c68 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 1 Apr 2016 12:28:16 +0200
Subject: [PATCH 315/797] ALSA: timer: Use mod_timer() for rearming the system
 timer

commit 4a07083ed613644c96c34a7dd2853dc5d7c70902 upstream.

ALSA system timer backend stops the timer via del_timer() without sync
and leaves del_timer_sync() at the close instead.  This is because of
the restriction by the design of ALSA timer: namely, the stop callback
may be called from the timer handler, and calling the sync shall lead
to a hangup.  However, this also triggers a kernel BUG() when the
timer is rearmed immediately after stopping without sync:
 kernel BUG at kernel/time/timer.c:966!
 Call Trace:
  <IRQ>
  [<ffffffff8239c94e>] snd_timer_s_start+0x13e/0x1a0
  [<ffffffff8239e1f4>] snd_timer_interrupt+0x504/0xec0
  [<ffffffff8122fca0>] ? debug_check_no_locks_freed+0x290/0x290
  [<ffffffff8239ec64>] snd_timer_s_function+0xb4/0x120
  [<ffffffff81296b72>] call_timer_fn+0x162/0x520
  [<ffffffff81296add>] ? call_timer_fn+0xcd/0x520
  [<ffffffff8239ebb0>] ? snd_timer_interrupt+0xec0/0xec0
  ....

It's the place where add_timer() checks the pending timer.  It's clear
that this may happen after the immediate restart without sync in our
cases.

So, the workaround here is just to use mod_timer() instead of
add_timer().  This looks like a band-aid fix, but it's a right move,
as snd_timer_interrupt() takes care of the continuous rearm of timer.

Reported-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/core/timer.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/core/timer.c b/sound/core/timer.c
index f24c9fccf008..b982d1b089bd 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -1051,8 +1051,8 @@ static int snd_timer_s_start(struct snd_timer * timer)
 		njiff += timer->sticks - priv->correction;
 		priv->correction = 0;
 	}
-	priv->last_expires = priv->tlist.expires = njiff;
-	add_timer(&priv->tlist);
+	priv->last_expires = njiff;
+	mod_timer(&priv->tlist, njiff);
 	return 0;
 }
 

From 4568babcf7b22fcde8574892a349cb7759e45b0b Mon Sep 17 00:00:00 2001
From: Bobi Mihalca <bobbymihalca@touchtech.ro>
Date: Wed, 23 Mar 2016 13:23:55 +0200
Subject: [PATCH 316/797] ALSA: hda - Asus N750JV external subwoofer fixup

commit 70cf2cbd685e218c3ffd105d9fb6cf0f8d767481 upstream.

ASUS N750JV needs the same fixup as N550 for enabling its subwoofer.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=115181
Signed-off-by: Bobi Mihalca <bobbymihalca@touchtech.ro>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 6968b796baa3..1b460c1f8678 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6643,6 +6643,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800),
 	SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_BASS_1A),
 	SND_PCI_QUIRK(0x1043, 0x13df, "Asus N550JX", ALC662_FIXUP_BASS_1A),
+	SND_PCI_QUIRK(0x1043, 0x129d, "Asus N750", ALC662_FIXUP_BASS_1A),
 	SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_BASS_MODE4_CHMAP),
 	SND_PCI_QUIRK(0x1043, 0x15a7, "ASUS UX51VZH", ALC662_FIXUP_BASS_16),
 	SND_PCI_QUIRK(0x1043, 0x1b73, "ASUS N55SF", ALC662_FIXUP_BASS_16),

From 0d55f19ac372fa3cad40df3ccf8ce463012d6748 Mon Sep 17 00:00:00 2001
From: Bobi Mihalca <bobbymihalca@touchtech.ro>
Date: Wed, 23 Mar 2016 13:26:11 +0200
Subject: [PATCH 317/797] ALSA: hda - Fix white noise on Asus N750JV headphone

commit 9d4dc5840f93bcb002fa311693349deae7702bc5 upstream.

For reducing the noise from the headphone output on ASUS N750JV,
call the existing fixup, alc_fixup_auto_mute_via_amp(), additionally.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=115181
Signed-off-by: Bobi Mihalca <bobbymihalca@touchtech.ro>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_realtek.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 1b460c1f8678..ba4e5b921b7c 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6379,6 +6379,7 @@ enum {
 	ALC668_FIXUP_AUTO_MUTE,
 	ALC668_FIXUP_DELL_DISABLE_AAMIX,
 	ALC668_FIXUP_DELL_XPS13,
+	ALC662_FIXUP_ASUS_Nx50,
 };
 
 static const struct hda_fixup alc662_fixups[] = {
@@ -6619,6 +6620,12 @@ static const struct hda_fixup alc662_fixups[] = {
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc_fixup_bass_chmap,
 	},
+	[ALC662_FIXUP_ASUS_Nx50] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc_fixup_auto_mute_via_amp,
+		.chained = true,
+		.chain_id = ALC662_FIXUP_BASS_1A
+	},
 };
 
 static const struct snd_pci_quirk alc662_fixup_tbl[] = {
@@ -6643,7 +6650,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800),
 	SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_BASS_1A),
 	SND_PCI_QUIRK(0x1043, 0x13df, "Asus N550JX", ALC662_FIXUP_BASS_1A),
-	SND_PCI_QUIRK(0x1043, 0x129d, "Asus N750", ALC662_FIXUP_BASS_1A),
+	SND_PCI_QUIRK(0x1043, 0x129d, "Asus N750", ALC662_FIXUP_ASUS_Nx50),
 	SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_BASS_MODE4_CHMAP),
 	SND_PCI_QUIRK(0x1043, 0x15a7, "ASUS UX51VZH", ALC662_FIXUP_BASS_16),
 	SND_PCI_QUIRK(0x1043, 0x1b73, "ASUS N55SF", ALC662_FIXUP_BASS_16),

From 3495017eaaee88cd0e85742b710fee3bbc2b3d08 Mon Sep 17 00:00:00 2001
From: Bobi Mihalca <bobbymihalca@touchtech.ro>
Date: Wed, 23 Mar 2016 13:32:33 +0200
Subject: [PATCH 318/797] ALSA: hda - Apply fix for white noise on Asus N550JV,
 too

commit 83a9efb5b8170b7cffef4f62656656e1d8ad2ccd upstream.

Apply the new fixup that is used for ASUS N750JV to another similar
model, N500JV, too, for reducing the headphone noise.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=115181
Signed-off-by: Bobi Mihalca <bobbymihalca@touchtech.ro>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_realtek.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index ba4e5b921b7c..3671eb89dd28 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6648,7 +6648,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1028, 0x0698, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x069f, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800),
-	SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_BASS_1A),
+	SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_ASUS_Nx50),
 	SND_PCI_QUIRK(0x1043, 0x13df, "Asus N550JX", ALC662_FIXUP_BASS_1A),
 	SND_PCI_QUIRK(0x1043, 0x129d, "Asus N750", ALC662_FIXUP_ASUS_Nx50),
 	SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_BASS_MODE4_CHMAP),

From fb4cfc6e0a465ccdeddd47567c42fbb197253aca Mon Sep 17 00:00:00 2001
From: Xishi Qiu <qiuxishi@huawei.com>
Date: Fri, 1 Apr 2016 14:31:20 -0700
Subject: [PATCH 319/797] mm: fix invalid node in alloc_migrate_target()

commit 6f25a14a7053b69917e2ebea0d31dd444cd31fd5 upstream.

It is incorrect to use next_node to find a target node, it will return
MAX_NUMNODES or invalid node.  This will lead to crash in buddy system
allocation.

Fixes: c8721bbbdd36 ("mm: memory-hotplug: enable memory hotplug to handle hugepage")
Signed-off-by: Xishi Qiu <qiuxishi@huawei.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Joonsoo Kim <js1304@gmail.com>
Cc: David Rientjes <rientjes@google.com>
Cc: "Laura Abbott" <lauraa@codeaurora.org>
Cc: Hui Zhu <zhuhui@xiaomi.com>
Cc: Wang Xiaoqiang <wangxq10@lzu.edu.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/page_isolation.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 4568fd58f70a..00c96462cc36 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -283,11 +283,11 @@ struct page *alloc_migrate_target(struct page *page, unsigned long private,
 	 * now as a simple work-around, we use the next node for destination.
 	 */
 	if (PageHuge(page)) {
-		nodemask_t src = nodemask_of_node(page_to_nid(page));
-		nodemask_t dst;
-		nodes_complement(dst, src);
+		int node = next_online_node(page_to_nid(page));
+		if (node == MAX_NUMNODES)
+			node = first_online_node;
 		return alloc_huge_page_node(page_hstate(compound_head(page)),
-					    next_node(page_to_nid(page), dst));
+					    node);
 	}
 
 	if (PageHighMem(page))

From d38ae1c2be13f28629ae08de79f7d511ab79d1ee Mon Sep 17 00:00:00 2001
From: Sebastian Siewior <bigeasy@linutronix.de>
Date: Tue, 8 Mar 2016 10:03:56 +0100
Subject: [PATCH 320/797] powerpc/mm: Fixup preempt underflow with huge pages

commit 08a5bb2921e490939f78f38fd0d02858bb709942 upstream.

hugepd_free() used __get_cpu_var() once. Nothing ensured that the code
accessing the variable did not migrate from one CPU to another and soon
this was noticed by Tiejun Chen in 94b09d755462 ("powerpc/hugetlb:
Replace __get_cpu_var with get_cpu_var"). So we had it fixed.

Christoph Lameter was doing his __get_cpu_var() replaces and forgot
PowerPC. Then he noticed this and sent his fixed up batch again which
got applied as 69111bac42f5 ("powerpc: Replace __get_cpu_var uses").

The careful reader will noticed one little detail: get_cpu_var() got
replaced with this_cpu_ptr(). So now we have a put_cpu_var() which does
a preempt_enable() and nothing that does preempt_disable() so we
underflow the preempt counter.

Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Christoph Lameter <cl@linux.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/mm/hugetlbpage.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 9833fee493ec..807f1594701d 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -486,13 +486,13 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
 {
 	struct hugepd_freelist **batchp;
 
-	batchp = this_cpu_ptr(&hugepd_freelist_cur);
+	batchp = &get_cpu_var(hugepd_freelist_cur);
 
 	if (atomic_read(&tlb->mm->mm_users) < 2 ||
 	    cpumask_equal(mm_cpumask(tlb->mm),
 			  cpumask_of(smp_processor_id()))) {
 		kmem_cache_free(hugepte_cache, hugepte);
-        put_cpu_var(hugepd_freelist_cur);
+		put_cpu_var(hugepd_freelist_cur);
 		return;
 	}
 

From fa07cf6613c9cfb744be453005db2268d1811e73 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 7 Apr 2016 19:58:44 -0700
Subject: [PATCH 321/797] libnvdimm: fix smart data retrieval

commit 211291126698c8f047617565b2e2e7f822f86354 upstream.

It appears that smart data retrieval has been broken the since the
initial implementation.  Fix the payload size to be 128-bytes per the
specification.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvdimm/bus.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 496b9b662dc6..5f47356d6942 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -335,7 +335,7 @@ static const struct nd_cmd_desc __nd_cmd_dimm_descs[] = {
 	[ND_CMD_IMPLEMENTED] = { },
 	[ND_CMD_SMART] = {
 		.out_num = 2,
-		.out_sizes = { 4, 8, },
+		.out_sizes = { 4, 128, },
 	},
 	[ND_CMD_SMART_THRESHOLD] = {
 		.out_num = 2,

From 966bf1bea449c9f0ef89051b2025c748d28baa2b Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 7 Apr 2016 19:59:27 -0700
Subject: [PATCH 322/797] libnvdimm, pfn: fix uuid validation

commit e5670563f588ed1c0603819350c0f02cec23f5c5 upstream.

If we detect a namespace has a stale info block in the init path, we
should overwrite with the latest configuration.  In fact, we already
return -ENODEV when the parent uuid is invalid, the same should be done
for the 'self' uuid.  Otherwise we can get into a condition where
userspace is unable to reconfigure the pfn-device without directly /
manually invalidating the info block.

Reported-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvdimm/pfn_devs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 71805a1aa0f3..9d3974591cd6 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -275,7 +275,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
 	} else {
 		/* from init we validate */
 		if (memcmp(nd_pfn->uuid, pfn_sb->uuid, 16) != 0)
-			return -EINVAL;
+			return -ENODEV;
 	}
 
 	/*

From d4429b81f68bc696535c455f2fd6e86d2b99dd4f Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 31 Mar 2016 09:38:51 +0200
Subject: [PATCH 323/797] compiler-gcc: disable -ftracer for __noclone
 functions

commit 95272c29378ee7dc15f43fa2758cb28a5913a06d upstream.

-ftracer can duplicate asm blocks causing compilation to fail in
noclone functions.  For example, KVM declares a global variable
in an asm like

    asm("2: ... \n
         .pushsection data \n
         .global vmx_return \n
         vmx_return: .long 2b");

and -ftracer causes a double declaration.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Marek <mmarek@suse.cz>
Cc: kvm@vger.kernel.org
Reported-by: Linda Walsh <lkml@tlinx.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/compiler-gcc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 22ab246feed3..eeae401a2412 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -199,7 +199,7 @@
 #define unreachable() __builtin_unreachable()
 
 /* Mark a function definition as prohibited from being cloned. */
-#define __noclone	__attribute__((__noclone__))
+#define __noclone	__attribute__((__noclone__, __optimize__("no-tracer")))
 
 #endif /* GCC_VERSION >= 40500 */
 

From 6fcee661a2edbf40b6154aaad4166776a9bad294 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Thu, 24 Mar 2016 16:54:34 +0000
Subject: [PATCH 324/797] arm64: opcodes.h: Add arm big-endian config options
 before including arm header

commit a6002ec5a8c68e69706b2efd6db6d682d0ab672c upstream.

arm and arm64 use different config options to specify big endian. This
needs taking into account when including code/headers between the two
architectures.

A case in point is PAN, which uses the __instr_arm() macro to output
instructions. The macro comes from opcodes.h, which lives under arch/arm.
On a big-endian build the mismatched config options mean the instruction
isn't byte swapped correctly, resulting in undefined instruction exceptions
during boot:

| alternatives: patching kernel code
| kdevtmpfs[87]: undefined instruction: pc=ffffffc0004505b4
| kdevtmpfs[87]: undefined instruction: pc=ffffffc00076231c
| kdevtmpfs[87]: undefined instruction: pc=ffffffc00076231c
| kdevtmpfs[87]: undefined instruction: pc=ffffffc00076231c
| kdevtmpfs[87]: undefined instruction: pc=ffffffc00076231c
| kdevtmpfs[87]: undefined instruction: pc=ffffffc00076231c
| kdevtmpfs[87]: undefined instruction: pc=ffffffc00076231c
| kdevtmpfs[87]: undefined instruction: pc=ffffffc00076231c
| kdevtmpfs[87]: undefined instruction: pc=ffffffc00076231c
| kdevtmpfs[87]: undefined instruction: pc=ffffffc00076231c
| Internal error: Oops - undefined instruction: 0 [#1] SMP
| Modules linked in:
| CPU: 0 PID: 87 Comm: kdevtmpfs Not tainted 4.1.16+ #5
| Hardware name: Hisilicon PhosphorHi1382 EVB (DT)
| task: ffffffc336591700 ti: ffffffc3365a4000 task.ti: ffffffc3365a4000
| PC is at dump_instr+0x68/0x100
| LR is at do_undefinstr+0x1d4/0x2a4
| pc : [<ffffffc00076231c>] lr : [<ffffffc0000811d4>] pstate: 604001c5
| sp : ffffffc3365a6450

Reported-by: Hanjun Guo <guohanjun@huawei.com>
Tested-by: Xuefeng Wang <wxf.wang@hisilicon.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/include/asm/opcodes.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/arm64/include/asm/opcodes.h b/arch/arm64/include/asm/opcodes.h
index 4e603ea36ad3..123f45d92cd1 100644
--- a/arch/arm64/include/asm/opcodes.h
+++ b/arch/arm64/include/asm/opcodes.h
@@ -1 +1,5 @@
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define CONFIG_CPU_ENDIAN_BE8 CONFIG_CPU_BIG_ENDIAN
+#endif
+
 #include <../../arm/include/asm/opcodes.h>

From 7ed78a4894600f29f88f85d22d411c8116d27a8c Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@gmail.com>
Date: Thu, 25 Feb 2016 16:15:05 -0500
Subject: [PATCH 325/797] drm/dp: move hw_mutex up the call stack
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 7779c5e23c5132c22a219f1f5554ef81dd15ee91 upstream.

1) don't let other threads trying to bang on aux channel interrupt the
defer timeout/logic
2) don't let other threads interrupt the i2c over aux logic

Technically, according to people who actually have the DP spec, this
should not be required.  In practice, it makes some troublesome Dell
monitor (and perhaps others) work, so probably a case of "It's compliant
if it works with windows" on the hw vendor's part..

v2: rebased to come before DPCD/AUX logging patch for easier backport
to stable branches.

Reported-by: Dave Wysochanski <dwysocha@redhat.com>
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1274157
Signed-off-by: Rob Clark <robdclark@gmail.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/drm_dp_helper.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c
index 9535c5b60387..7e5a97204051 100644
--- a/drivers/gpu/drm/drm_dp_helper.c
+++ b/drivers/gpu/drm/drm_dp_helper.c
@@ -178,7 +178,7 @@ static int drm_dp_dpcd_access(struct drm_dp_aux *aux, u8 request,
 {
 	struct drm_dp_aux_msg msg;
 	unsigned int retry;
-	int err;
+	int err = 0;
 
 	memset(&msg, 0, sizeof(msg));
 	msg.address = offset;
@@ -186,6 +186,8 @@ static int drm_dp_dpcd_access(struct drm_dp_aux *aux, u8 request,
 	msg.buffer = buffer;
 	msg.size = size;
 
+	mutex_lock(&aux->hw_mutex);
+
 	/*
 	 * The specification doesn't give any recommendation on how often to
 	 * retry native transactions. We used to retry 7 times like for
@@ -194,25 +196,24 @@ static int drm_dp_dpcd_access(struct drm_dp_aux *aux, u8 request,
 	 */
 	for (retry = 0; retry < 32; retry++) {
 
-		mutex_lock(&aux->hw_mutex);
 		err = aux->transfer(aux, &msg);
-		mutex_unlock(&aux->hw_mutex);
 		if (err < 0) {
 			if (err == -EBUSY)
 				continue;
 
-			return err;
+			goto unlock;
 		}
 
 
 		switch (msg.reply & DP_AUX_NATIVE_REPLY_MASK) {
 		case DP_AUX_NATIVE_REPLY_ACK:
 			if (err < size)
-				return -EPROTO;
-			return err;
+				err = -EPROTO;
+			goto unlock;
 
 		case DP_AUX_NATIVE_REPLY_NACK:
-			return -EIO;
+			err = -EIO;
+			goto unlock;
 
 		case DP_AUX_NATIVE_REPLY_DEFER:
 			usleep_range(AUX_RETRY_INTERVAL, AUX_RETRY_INTERVAL + 100);
@@ -221,7 +222,11 @@ static int drm_dp_dpcd_access(struct drm_dp_aux *aux, u8 request,
 	}
 
 	DRM_DEBUG_KMS("too many retries, giving up\n");
-	return -EIO;
+	err = -EIO;
+
+unlock:
+	mutex_unlock(&aux->hw_mutex);
+	return err;
 }
 
 /**
@@ -543,9 +548,7 @@ static int drm_dp_i2c_do_msg(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg)
 	int max_retries = max(7, drm_dp_i2c_retry_count(msg, dp_aux_i2c_speed_khz));
 
 	for (retry = 0, defer_i2c = 0; retry < (max_retries + defer_i2c); retry++) {
-		mutex_lock(&aux->hw_mutex);
 		ret = aux->transfer(aux, msg);
-		mutex_unlock(&aux->hw_mutex);
 		if (ret < 0) {
 			if (ret == -EBUSY)
 				continue;
@@ -684,6 +687,8 @@ static int drm_dp_i2c_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs,
 
 	memset(&msg, 0, sizeof(msg));
 
+	mutex_lock(&aux->hw_mutex);
+
 	for (i = 0; i < num; i++) {
 		msg.address = msgs[i].addr;
 		drm_dp_i2c_msg_set_request(&msg, &msgs[i]);
@@ -738,6 +743,8 @@ static int drm_dp_i2c_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs,
 	msg.size = 0;
 	(void)drm_dp_i2c_do_msg(aux, &msg);
 
+	mutex_unlock(&aux->hw_mutex);
+
 	return err;
 }
 

From dd4fea89f865b0e36cb92c35377d97a82d620e4b Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 30 Mar 2016 11:40:43 +0200
Subject: [PATCH 326/797] drm/udl: Use unlocked gem unreferencing

commit 72b9ff0612ad8fc969b910cd00ac16b57a1a9ba4 upstream.

For drm_gem_object_unreference callers are required to hold
dev->struct_mutex, which these paths don't. Enforcing this requirement
has become a bit more strict with

commit ef4c6270bf2867e2f8032e9614d1a8cfc6c71663
Author: Daniel Vetter <daniel.vetter@ffwll.ch>
Date:   Thu Oct 15 09:36:25 2015 +0200

    drm/gem: Check locking in drm_gem_object_unreference

Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/udl/udl_fb.c  | 2 +-
 drivers/gpu/drm/udl/udl_gem.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c
index 62c7b1dafaa4..73e41a8613da 100644
--- a/drivers/gpu/drm/udl/udl_fb.c
+++ b/drivers/gpu/drm/udl/udl_fb.c
@@ -539,7 +539,7 @@ static int udlfb_create(struct drm_fb_helper *helper,
 out_destroy_fbi:
 	drm_fb_helper_release_fbi(helper);
 out_gfree:
-	drm_gem_object_unreference(&ufbdev->ufb.obj->base);
+	drm_gem_object_unreference_unlocked(&ufbdev->ufb.obj->base);
 out:
 	return ret;
 }
diff --git a/drivers/gpu/drm/udl/udl_gem.c b/drivers/gpu/drm/udl/udl_gem.c
index 2a0a784ab6ee..d7528e0d8442 100644
--- a/drivers/gpu/drm/udl/udl_gem.c
+++ b/drivers/gpu/drm/udl/udl_gem.c
@@ -52,7 +52,7 @@ udl_gem_create(struct drm_file *file,
 		return ret;
 	}
 
-	drm_gem_object_unreference(&obj->base);
+	drm_gem_object_unreference_unlocked(&obj->base);
 	*handle_p = handle;
 	return 0;
 }

From 1456f5cf1565ede6a0adcb43f27d66eede96876f Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 25 Mar 2016 10:31:04 -0400
Subject: [PATCH 327/797] drm/radeon: add a dpm quirk for sapphire Dual-X R7
 370 2G D5

commit f971f2263deaa4a441e377b385c11aee0f3b3f9a upstream.

bug:
https://bugs.freedesktop.org/show_bug.cgi?id=94692

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/radeon/si_dpm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index a82b891ae1fe..7d7500413238 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -2926,6 +2926,7 @@ static struct si_dpm_quirk si_dpm_quirk_list[] = {
 	/* PITCAIRN - https://bugs.freedesktop.org/show_bug.cgi?id=76490 */
 	{ PCI_VENDOR_ID_ATI, 0x6810, 0x1462, 0x3036, 0, 120000 },
 	{ PCI_VENDOR_ID_ATI, 0x6811, 0x174b, 0xe271, 0, 120000 },
+	{ PCI_VENDOR_ID_ATI, 0x6811, 0x174b, 0x2015, 0, 120000 },
 	{ PCI_VENDOR_ID_ATI, 0x6810, 0x174b, 0xe271, 85000, 90000 },
 	{ PCI_VENDOR_ID_ATI, 0x6811, 0x1462, 0x2015, 0, 120000 },
 	{ PCI_VENDOR_ID_ATI, 0x6811, 0x1043, 0x2015, 0, 120000 },

From ddf58bfd05fc6b71858228962dffcaa91d1b4d53 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 28 Mar 2016 10:16:40 -0400
Subject: [PATCH 328/797] drm/radeon: add another R7 370 quirk

commit a64663d9870364bd2a2df62bf0d3a9fbe5ea62a8 upstream.

bug:
https://bugzilla.kernel.org/show_bug.cgi?id=115291

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/radeon/si_dpm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index 7d7500413238..444935dec7af 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -2930,6 +2930,7 @@ static struct si_dpm_quirk si_dpm_quirk_list[] = {
 	{ PCI_VENDOR_ID_ATI, 0x6810, 0x174b, 0xe271, 85000, 90000 },
 	{ PCI_VENDOR_ID_ATI, 0x6811, 0x1462, 0x2015, 0, 120000 },
 	{ PCI_VENDOR_ID_ATI, 0x6811, 0x1043, 0x2015, 0, 120000 },
+	{ PCI_VENDOR_ID_ATI, 0x6811, 0x148c, 0x2015, 0, 120000 },
 	{ 0, 0, 0, 0 },
 };
 

From 0bccb7a91e02086d7fcdd61042d508e012ba87ef Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 28 Mar 2016 10:21:20 -0400
Subject: [PATCH 329/797] drm/radeon: add a dpm quirk for all R7 370 parts

commit 0e5585dc870af947fab2af96a88c2d8b4270247c upstream.

Higher mclk values are not stable due to a bug somewhere.
Limit them for now.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/radeon/si_dpm.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index 444935dec7af..7285adb27099 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -3010,6 +3010,10 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
 		}
 		++p;
 	}
+	/* limit mclk on all R7 370 parts for stability */
+	if (rdev->pdev->device == 0x6811 &&
+	    rdev->pdev->revision == 0x81)
+		max_mclk = 120000;
 
 	if (rps->vce_active) {
 		rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk;

From 1e84f8b8dad8c270006b89385b4b60e1cc1ccf4c Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 31 Mar 2016 16:07:38 -0400
Subject: [PATCH 330/797] drm/amdgpu/gmc: move vram type fetching into sw_init

commit d1518a1db31a25682ea09c4b135fa72d9883be42 upstream.

early_init gets called before atom asic init so on non-posted
cards, the vram type is not initialized.

Reviewed-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 16 ++++++++--------
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 16 ++++++++--------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 272110cc18c2..ea87033bfaf6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -898,14 +898,6 @@ static int gmc_v7_0_early_init(void *handle)
 	gmc_v7_0_set_gart_funcs(adev);
 	gmc_v7_0_set_irq_funcs(adev);
 
-	if (adev->flags & AMD_IS_APU) {
-		adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
-	} else {
-		u32 tmp = RREG32(mmMC_SEQ_MISC0);
-		tmp &= MC_SEQ_MISC0__MT__MASK;
-		adev->mc.vram_type = gmc_v7_0_convert_vram_type(tmp);
-	}
-
 	return 0;
 }
 
@@ -926,6 +918,14 @@ static int gmc_v7_0_sw_init(void *handle)
 	if (r)
 		return r;
 
+	if (adev->flags & AMD_IS_APU) {
+		adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
+	} else {
+		u32 tmp = RREG32(mmMC_SEQ_MISC0);
+		tmp &= MC_SEQ_MISC0__MT__MASK;
+		adev->mc.vram_type = gmc_v7_0_convert_vram_type(tmp);
+	}
+
 	r = amdgpu_irq_add_id(adev, 146, &adev->mc.vm_fault);
 	if (r)
 		return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index ba4ad00ba8b4..f035b5b99cb5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -852,14 +852,6 @@ static int gmc_v8_0_early_init(void *handle)
 	gmc_v8_0_set_gart_funcs(adev);
 	gmc_v8_0_set_irq_funcs(adev);
 
-	if (adev->flags & AMD_IS_APU) {
-		adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
-	} else {
-		u32 tmp = RREG32(mmMC_SEQ_MISC0);
-		tmp &= MC_SEQ_MISC0__MT__MASK;
-		adev->mc.vram_type = gmc_v8_0_convert_vram_type(tmp);
-	}
-
 	return 0;
 }
 
@@ -880,6 +872,14 @@ static int gmc_v8_0_sw_init(void *handle)
 	if (r)
 		return r;
 
+	if (adev->flags & AMD_IS_APU) {
+		adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
+	} else {
+		u32 tmp = RREG32(mmMC_SEQ_MISC0);
+		tmp &= MC_SEQ_MISC0__MT__MASK;
+		adev->mc.vram_type = gmc_v8_0_convert_vram_type(tmp);
+	}
+
 	r = amdgpu_irq_add_id(adev, 146, &adev->mc.vm_fault);
 	if (r)
 		return r;

From 40a8f74321fb892dec5757ca44ec3850bde26a24 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 31 Mar 2016 16:41:32 -0400
Subject: [PATCH 331/797] drm/amdgpu/gmc: use proper register for vram type on
 Fiji

commit b634de4f446c062a0c95ec4d150b4cf7c85e3526 upstream.

The offset changed on Fiji.

Reviewed-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index f035b5b99cb5..08423089fb84 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -862,6 +862,8 @@ static int gmc_v8_0_late_init(void *handle)
 	return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0);
 }
 
+#define mmMC_SEQ_MISC0_FIJI 0xA71
+
 static int gmc_v8_0_sw_init(void *handle)
 {
 	int r;
@@ -875,7 +877,12 @@ static int gmc_v8_0_sw_init(void *handle)
 	if (adev->flags & AMD_IS_APU) {
 		adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
 	} else {
-		u32 tmp = RREG32(mmMC_SEQ_MISC0);
+		u32 tmp;
+
+		if (adev->asic_type == CHIP_FIJI)
+			tmp = RREG32(mmMC_SEQ_MISC0_FIJI);
+		else
+			tmp = RREG32(mmMC_SEQ_MISC0);
 		tmp &= MC_SEQ_MISC0__MT__MASK;
 		adev->mc.vram_type = gmc_v8_0_convert_vram_type(tmp);
 	}

From 0cc60c58ba3fcbc8467a3d56b98c664b0fc7d9bb Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Fri, 18 Mar 2016 10:11:07 -0400
Subject: [PATCH 332/797] xen/events: Mask a moving irq

commit ff1e22e7a638a0782f54f81a6c9cb139aca2da35 upstream.

Moving an unmasked irq may result in irq handler being invoked on both
source and target CPUs.

With 2-level this can happen as follows:

On source CPU:
        evtchn_2l_handle_events() ->
            generic_handle_irq() ->
                handle_edge_irq() ->
                   eoi_pirq():
                       irq_move_irq(data);

                       /***** WE ARE HERE *****/

                       if (VALID_EVTCHN(evtchn))
                           clear_evtchn(evtchn);

If at this moment target processor is handling an unrelated event in
evtchn_2l_handle_events()'s loop it may pick up our event since target's
cpu_evtchn_mask claims that this event belongs to it *and* the event is
unmasked and still pending. At the same time, source CPU will continue
executing its own handle_edge_irq().

With FIFO interrupt the scenario is similar: irq_move_irq() may result
in a EVTCHNOP_unmask hypercall which, in turn, may make the event
pending on the target CPU.

We can avoid this situation by moving and clearing the event while
keeping event masked.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/xen/events/events_base.c | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 524c22146429..44367783f07a 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -484,9 +484,19 @@ static void eoi_pirq(struct irq_data *data)
 	struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) };
 	int rc = 0;
 
-	irq_move_irq(data);
+	if (!VALID_EVTCHN(evtchn))
+		return;
 
-	if (VALID_EVTCHN(evtchn))
+	if (unlikely(irqd_is_setaffinity_pending(data))) {
+		int masked = test_and_set_mask(evtchn);
+
+		clear_evtchn(evtchn);
+
+		irq_move_masked_irq(data);
+
+		if (!masked)
+			unmask_evtchn(evtchn);
+	} else
 		clear_evtchn(evtchn);
 
 	if (pirq_needs_eoi(data->irq)) {
@@ -1357,9 +1367,19 @@ static void ack_dynirq(struct irq_data *data)
 {
 	int evtchn = evtchn_from_irq(data->irq);
 
-	irq_move_irq(data);
+	if (!VALID_EVTCHN(evtchn))
+		return;
 
-	if (VALID_EVTCHN(evtchn))
+	if (unlikely(irqd_is_setaffinity_pending(data))) {
+		int masked = test_and_set_mask(evtchn);
+
+		clear_evtchn(evtchn);
+
+		irq_move_masked_irq(data);
+
+		if (!masked)
+			unmask_evtchn(evtchn);
+	} else
 		clear_evtchn(evtchn);
 }
 

From d5322b91e61647630a91cacd7e22736c35dd98bd Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Sun, 21 Feb 2016 10:12:39 +0300
Subject: [PATCH 333/797] tcp: convert cached rtt from usec to jiffies when
 feeding initial rto

[ Upstream commit 9bdfb3b79e61c60e1a3e2dc05ad164528afa6b8a ]

Currently it's converted into msecs, thus HZ=1000 intact.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Fixes: 740b0f1841f6 ("tcp: switch rtt estimations to usec resolution")
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_metrics.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index c8cbc2b4b792..a726d7853ce5 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -550,7 +550,7 @@ void tcp_init_metrics(struct sock *sk)
 	 */
 	if (crtt > tp->srtt_us) {
 		/* Set RTO like tcp_rtt_estimator(), but from cached RTT. */
-		crtt /= 8 * USEC_PER_MSEC;
+		crtt /= 8 * USEC_PER_SEC / HZ;
 		inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk));
 	} else if (tp->srtt_us == 0) {
 		/* RFC6298: 5.7 We've failed to get a valid RTT sample from

From 207485dc4f22f8da29d2ba5e9e41c4c9e84d4e01 Mon Sep 17 00:00:00 2001
From: Bernie Harris <bernie.harris@alliedtelesis.co.nz>
Date: Mon, 22 Feb 2016 12:58:05 +1300
Subject: [PATCH 334/797] tunnel: Clear IPCB(skb)->opt before dst_link_failure
 called

[ Upstream commit 5146d1f151122e868e594c7b45115d64825aee5f ]

IPCB may contain data from previous layers (in the observed case the
qdisc layer). In the observed scenario, the data was misinterpreted as
ip header options, which later caused the ihl to be set to an invalid
value (<5). This resulted in an infinite loop in the mips implementation
of ip_fast_csum.

This patch clears IPCB(skb)->opt before dst_link_failure can be called for
various types of tunnels. This change only applies to encapsulated ipv4
packets.

The code introduced in 11c21a30 which clears all of IPCB has been removed
to be consistent with these changes, and instead the opt field is cleared
unconditionally in ip_tunnel_xmit. The change in ip_tunnel_xmit applies to
SIT, GRE, and IPIP tunnels.

The relevant vti, l2tp, and pptp functions already contain similar code for
clearing the IPCB.

Signed-off-by: Bernie Harris <bernie.harris@alliedtelesis.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/ip_tunnel.c  | 3 ++-
 net/ipv4/udp_tunnel.c | 2 ++
 net/ipv6/ip6_gre.c    | 2 ++
 net/ipv6/ip6_tunnel.c | 2 ++
 4 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index cbb51f3fac06..ce30c8b72457 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -663,6 +663,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
 	connected = (tunnel->parms.iph.daddr != 0);
 
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
 	dst = tnl_params->daddr;
 	if (dst == 0) {
 		/* NBMA tunnel */
@@ -760,7 +762,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
 			tunnel->err_count--;
 
-			memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
 			dst_link_failure(skb);
 		} else
 			tunnel->err_count = 0;
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index aba428626b52..280a9bdeddee 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -89,6 +89,8 @@ int udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
 	uh->source = src_port;
 	uh->len = htons(skb->len);
 
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
 	udp_set_csum(nocheck, skb, src, dst, skb->len);
 
 	return iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP,
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index e5ea177d34c6..4650c6824783 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -778,6 +778,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
 	__u32 mtu;
 	int err;
 
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
 	if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
 		encap_limit = t->parms.encap_limit;
 
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 137fca42aaa6..6c5dfec7a377 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1180,6 +1180,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	u8 tproto;
 	int err;
 
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
 	tproto = ACCESS_ONCE(t->parms.proto);
 	if (tproto != IPPROTO_IPIP && tproto != 0)
 		return -1;

From e948c9adee31b56f90efd3eb240221fe257f5aba Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Mon, 22 Feb 2016 18:43:25 +0100
Subject: [PATCH 335/797] ipv4: only create late gso-skb if skb is already set
 up with CHECKSUM_PARTIAL

[ Upstream commit a8c4a2522a0808c5c2143612909717d1115c40cf ]

Otherwise we break the contract with GSO to only pass CHECKSUM_PARTIAL
skbs down. This can easily happen with UDP+IPv4 sockets with the first
MSG_MORE write smaller than the MTU, second write is a sendfile.

Returning -EOPNOTSUPP lets the callers fall back into normal sendmsg path,
were we calculate the checksum manually during copying.

Commit d749c9cbffd6 ("ipv4: no CHECKSUM_PARTIAL on MSG_MORE corked
sockets") started to exposes this bug.

Fixes: d749c9cbffd6 ("ipv4: no CHECKSUM_PARTIAL on MSG_MORE corked sockets")
Reported-by: Jiri Benc <jbenc@redhat.com>
Cc: Jiri Benc <jbenc@redhat.com>
Reported-by: Wakko Warner <wakko@animx.eu.org>
Cc: Wakko Warner <wakko@animx.eu.org>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/ip_output.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 49f02856304d..f2ad5216c438 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1237,13 +1237,16 @@ ssize_t	ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
 	if (!skb)
 		return -EINVAL;
 
-	cork->length += size;
 	if ((size + skb->len > mtu) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
 	    (rt->dst.dev->features & NETIF_F_UFO)) {
+		if (skb->ip_summed != CHECKSUM_PARTIAL)
+			return -EOPNOTSUPP;
+
 		skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
 		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
 	}
+	cork->length += size;
 
 	while (size > 0) {
 		if (skb_is_gso(skb)) {

From 05b0ca3ec3a2e49f29bfc2879c87699eb5a90ee4 Mon Sep 17 00:00:00 2001
From: Diego Viola <diego.viola@gmail.com>
Date: Tue, 23 Feb 2016 12:04:04 -0300
Subject: [PATCH 336/797] net: jme: fix suspend/resume on JMC260

[ Upstream commit ee50c130c82175eaa0820c96b6d3763928af2241 ]

The JMC260 network card fails to suspend/resume because the call to
jme_start_irq() was too early, moving the call to jme_start_irq() after
the call to jme_reset_link() makes it work.

Prior this change suspend/resume would fail unless /sys/power/pm_async=0
was explicitly specified.

Relevant bug report: https://bugzilla.kernel.org/show_bug.cgi?id=112351

Signed-off-by: Diego Viola <diego.viola@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/jme.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
index 060dd3922974..973dade2d07f 100644
--- a/drivers/net/ethernet/jme.c
+++ b/drivers/net/ethernet/jme.c
@@ -3312,13 +3312,14 @@ jme_resume(struct device *dev)
 		jme_reset_phy_processor(jme);
 	jme_phy_calibration(jme);
 	jme_phy_setEA(jme);
-	jme_start_irq(jme);
 	netif_device_attach(netdev);
 
 	atomic_inc(&jme->link_changing);
 
 	jme_reset_link(jme);
 
+	jme_start_irq(jme);
+
 	return 0;
 }
 

From ea0519a2bc8d7e6244cc5201f7b92b896be60c07 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Tue, 23 Feb 2016 10:10:26 -0800
Subject: [PATCH 337/797] net: vrf: Remove direct access to skb->data

[ Upstream commit 65c38aa653c14df49e19faad74bd375f36e61c57 ]

Nik pointed that the VRF driver should be using skb_header_pointer
instead of accessing skb->data and bits beyond directly which can
be garbage.

Fixes: 35402e313663 ("net: Add IPv6 support to VRF device")
Cc: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/vrf.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 0a242b200df4..903bda437839 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -114,20 +114,23 @@ static struct dst_ops vrf_dst_ops = {
 #if IS_ENABLED(CONFIG_IPV6)
 static bool check_ipv6_frame(const struct sk_buff *skb)
 {
-	const struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb->data;
-	size_t hlen = sizeof(*ipv6h);
+	const struct ipv6hdr *ipv6h;
+	struct ipv6hdr _ipv6h;
 	bool rc = true;
 
-	if (skb->len < hlen)
+	ipv6h = skb_header_pointer(skb, 0, sizeof(_ipv6h), &_ipv6h);
+	if (!ipv6h)
 		goto out;
 
 	if (ipv6h->nexthdr == NEXTHDR_ICMP) {
 		const struct icmp6hdr *icmph;
+		struct icmp6hdr _icmph;
 
-		if (skb->len < hlen + sizeof(*icmph))
+		icmph = skb_header_pointer(skb, sizeof(_ipv6h),
+					   sizeof(_icmph), &_icmph);
+		if (!icmph)
 			goto out;
 
-		icmph = (struct icmp6hdr *)(skb->data + sizeof(*ipv6h));
 		switch (icmph->icmp6_type) {
 		case NDISC_ROUTER_SOLICITATION:
 		case NDISC_ROUTER_ADVERTISEMENT:

From aad983b70b30a90d0e5fba4d8929d0db1191dd3f Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Tue, 23 Feb 2016 19:23:23 +0000
Subject: [PATCH 338/797] net: qca_spi: Don't clear IFF_BROADCAST

[ Upstream commit 2b70bad23c89b121a3e4a00f8968d14ebb78887d ]

Currently qcaspi_netdev_setup accidentally clears IFF_BROADCAST.
So fix this by keeping the flags from ether_setup.

Reported-by: Michael Heimpold <michael.heimpold@i2se.com>
Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Fixes: 291ab06ecf67 (net: qualcomm: new Ethernet over SPI driver for QCA7000)
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/qualcomm/qca_spi.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index 689a4a5c8dcf..f2ee3e5fb167 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -811,7 +811,6 @@ qcaspi_netdev_setup(struct net_device *dev)
 	dev->netdev_ops = &qcaspi_netdev_ops;
 	qcaspi_set_ethtool_ops(dev);
 	dev->watchdog_timeo = QCASPI_TX_TIMEOUT;
-	dev->flags = IFF_MULTICAST;
 	dev->tx_queue_len = 100;
 
 	qca = netdev_priv(dev);

From 2f0bd0e69e5b0a4ac00fd69f6591301b43fdeaae Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Tue, 23 Feb 2016 19:23:24 +0000
Subject: [PATCH 339/797] net: qca_spi: clear IFF_TX_SKB_SHARING

[ Upstream commit a4690afeb0d2d7ba4d60dfa98a89f3bb1ce60ecd ]

ether_setup sets IFF_TX_SKB_SHARING but this is not supported by
qca_spi as it modifies the skb on xmit.

Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Fixes: 291ab06ecf67 (net: qualcomm: new Ethernet over SPI driver for QCA7000)
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/qualcomm/qca_spi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index f2ee3e5fb167..1ef03939d25f 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -811,6 +811,7 @@ qcaspi_netdev_setup(struct net_device *dev)
 	dev->netdev_ops = &qcaspi_netdev_ops;
 	qcaspi_set_ethtool_ops(dev);
 	dev->watchdog_timeo = QCASPI_TX_TIMEOUT;
+	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 	dev->tx_queue_len = 100;
 
 	qca = netdev_priv(dev);

From 44bc7d1b9777128656310c0c7b47cb952a7c7b2d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
Date: Wed, 24 Feb 2016 04:21:42 +0100
Subject: [PATCH 340/797] net: fix bridge multicast packet checksum validation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 9b368814b336b0a1a479135eb2815edbc00efd3c ]

We need to update the skb->csum after pulling the skb, otherwise
an unnecessary checksum (re)computation can ocure for IGMP/MLD packets
in the bridge code. Additionally this fixes the following splats for
network devices / bridge ports with support for and enabled RX checksum
offloading:

[...]
[   43.986968] eth0: hw csum failure
[   43.990344] CPU: 3 PID: 0 Comm: swapper/3 Not tainted 4.4.0 #2
[   43.996193] Hardware name: BCM2709
[   43.999647] [<800204e0>] (unwind_backtrace) from [<8001cf14>] (show_stack+0x10/0x14)
[   44.007432] [<8001cf14>] (show_stack) from [<801ab614>] (dump_stack+0x80/0x90)
[   44.014695] [<801ab614>] (dump_stack) from [<802e4548>] (__skb_checksum_complete+0x6c/0xac)
[   44.023090] [<802e4548>] (__skb_checksum_complete) from [<803a055c>] (ipv6_mc_validate_checksum+0x104/0x178)
[   44.032959] [<803a055c>] (ipv6_mc_validate_checksum) from [<802e111c>] (skb_checksum_trimmed+0x130/0x188)
[   44.042565] [<802e111c>] (skb_checksum_trimmed) from [<803a06e8>] (ipv6_mc_check_mld+0x118/0x338)
[   44.051501] [<803a06e8>] (ipv6_mc_check_mld) from [<803b2c98>] (br_multicast_rcv+0x5dc/0xd00)
[   44.060077] [<803b2c98>] (br_multicast_rcv) from [<803aa510>] (br_handle_frame_finish+0xac/0x51c)
[...]

Fixes: 9afd85c9e455 ("net: Export IGMP/MLD message validation code")
Reported-by: Álvaro Fernández Rojas <noltari@gmail.com>
Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/skbuff.h | 17 +++++++++++++++++
 net/core/skbuff.c      | 22 ++++++++++++++++++++--
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 75f136a22a5e..d84c593012ed 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2724,6 +2724,23 @@ static inline void skb_postpull_rcsum(struct sk_buff *skb,
 
 unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len);
 
+static inline void skb_postpush_rcsum(struct sk_buff *skb,
+				      const void *start, unsigned int len)
+{
+	/* For performing the reverse operation to skb_postpull_rcsum(),
+	 * we can instead of ...
+	 *
+	 *   skb->csum = csum_add(skb->csum, csum_partial(start, len, 0));
+	 *
+	 * ... just use this equivalent version here to save a few
+	 * instructions. Feeding csum of 0 in csum_partial() and later
+	 * on adding skb->csum is equivalent to feed skb->csum in the
+	 * first place.
+	 */
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->csum = csum_partial(start, len, skb->csum);
+}
+
 /**
  *	pskb_trim_rcsum - trim received skb and update checksum
  *	@skb: buffer to trim
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 5bf88f58bee7..8616d1147c93 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2947,6 +2947,24 @@ int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
 }
 EXPORT_SYMBOL_GPL(skb_append_pagefrags);
 
+/**
+ *	skb_push_rcsum - push skb and update receive checksum
+ *	@skb: buffer to update
+ *	@len: length of data pulled
+ *
+ *	This function performs an skb_push on the packet and updates
+ *	the CHECKSUM_COMPLETE checksum.  It should be used on
+ *	receive path processing instead of skb_push unless you know
+ *	that the checksum difference is zero (e.g., a valid IP header)
+ *	or you are setting ip_summed to CHECKSUM_NONE.
+ */
+static unsigned char *skb_push_rcsum(struct sk_buff *skb, unsigned len)
+{
+	skb_push(skb, len);
+	skb_postpush_rcsum(skb, skb->data, len);
+	return skb->data;
+}
+
 /**
  *	skb_pull_rcsum - pull skb and update receive checksum
  *	@skb: buffer to update
@@ -4084,9 +4102,9 @@ struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb,
 	if (!pskb_may_pull(skb_chk, offset))
 		goto err;
 
-	__skb_pull(skb_chk, offset);
+	skb_pull_rcsum(skb_chk, offset);
 	ret = skb_chkf(skb_chk);
-	__skb_push(skb_chk, offset);
+	skb_push_rcsum(skb_chk, offset);
 
 	if (ret)
 		goto err;

From a87c65252bb82cc82ed226f70922aa83709ea2fe Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 28 Feb 2016 10:03:51 +0800
Subject: [PATCH 341/797] sctp: lack the check for ports in sctp_v6_cmp_addr

[ Upstream commit 40b4f0fd74e46c017814618d67ec9127ff20f157 ]

As the member .cmp_addr of sctp_af_inet6, sctp_v6_cmp_addr should also check
the port of addresses, just like sctp_v4_cmp_addr, cause it's invoked by
sctp_cmp_addr_exact().

Now sctp_v6_cmp_addr just check the port when two addresses have different
family, and lack the port check for two ipv6 addresses. that will make
sctp_hash_cmp() cannot work well.

so fix it by adding ports comparison in sctp_v6_cmp_addr().

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/sctp/ipv6.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index ec529121f38a..ce46f1c7f133 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -526,6 +526,8 @@ static int sctp_v6_cmp_addr(const union sctp_addr *addr1,
 		}
 		return 0;
 	}
+	if (addr1->v6.sin6_port != addr2->v6.sin6_port)
+		return 0;
 	if (!ipv6_addr_equal(&addr1->v6.sin6_addr, &addr2->v6.sin6_addr))
 		return 0;
 	/* If this is a linklocal address, compare the scope_id. */

From d9bbdcd83d63010fab254d5ed39116f9f58f1228 Mon Sep 17 00:00:00 2001
From: Benjamin Poirier <bpoirier@suse.com>
Date: Mon, 29 Feb 2016 15:03:33 -0800
Subject: [PATCH 342/797] mld, igmp: Fix reserved tailroom calculation

[ Upstream commit 1837b2e2bcd23137766555a63867e649c0b637f0 ]

The current reserved_tailroom calculation fails to take hlen and tlen into
account.

skb:
[__hlen__|__data____________|__tlen___|__extra__]
^                                               ^
head                                            skb_end_offset

In this representation, hlen + data + tlen is the size passed to alloc_skb.
"extra" is the extra space made available in __alloc_skb because of
rounding up by kmalloc. We can reorder the representation like so:

[__hlen__|__data____________|__extra__|__tlen___]
^                                               ^
head                                            skb_end_offset

The maximum space available for ip headers and payload without
fragmentation is min(mtu, data + extra). Therefore,
reserved_tailroom
= data + extra + tlen - min(mtu, data + extra)
= skb_end_offset - hlen - min(mtu, skb_end_offset - hlen - tlen)
= skb_tailroom - min(mtu, skb_tailroom - tlen) ; after skb_reserve(hlen)

Compare the second line to the current expression:
reserved_tailroom = skb_end_offset - min(mtu, skb_end_offset)
and we can see that hlen and tlen are not taken into account.

The min() in the third line can be expanded into:
if mtu < skb_tailroom - tlen:
	reserved_tailroom = skb_tailroom - mtu
else:
	reserved_tailroom = tlen

Depending on hlen, tlen, mtu and the number of multicast address records,
the current code may output skbs that have less tailroom than
dev->needed_tailroom or it may output more skbs than needed because not all
space available is used.

Fixes: 4c672e4b ("ipv6: mld: fix add_grhead skb_over_panic for devs with large MTUs")
Signed-off-by: Benjamin Poirier <bpoirier@suse.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/skbuff.h | 24 ++++++++++++++++++++++++
 net/ipv4/igmp.c        |  3 +--
 net/ipv6/mcast.c       |  3 +--
 3 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d84c593012ed..4fde61804191 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1908,6 +1908,30 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
 	skb->tail += len;
 }
 
+/**
+ *	skb_tailroom_reserve - adjust reserved_tailroom
+ *	@skb: buffer to alter
+ *	@mtu: maximum amount of headlen permitted
+ *	@needed_tailroom: minimum amount of reserved_tailroom
+ *
+ *	Set reserved_tailroom so that headlen can be as large as possible but
+ *	not larger than mtu and tailroom cannot be smaller than
+ *	needed_tailroom.
+ *	The required headroom should already have been reserved before using
+ *	this function.
+ */
+static inline void skb_tailroom_reserve(struct sk_buff *skb, unsigned int mtu,
+					unsigned int needed_tailroom)
+{
+	SKB_LINEAR_ASSERT(skb);
+	if (mtu < skb_tailroom(skb) - needed_tailroom)
+		/* use at most mtu */
+		skb->reserved_tailroom = skb_tailroom(skb) - mtu;
+	else
+		/* use up to all available space */
+		skb->reserved_tailroom = needed_tailroom;
+}
+
 #define ENCAP_TYPE_ETHER	0
 #define ENCAP_TYPE_IPPROTO	1
 
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 05e4cba14162..b3086cf27027 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -356,9 +356,8 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
 	skb_dst_set(skb, &rt->dst);
 	skb->dev = dev;
 
-	skb->reserved_tailroom = skb_end_offset(skb) -
-				 min(mtu, skb_end_offset(skb));
 	skb_reserve(skb, hlen);
+	skb_tailroom_reserve(skb, mtu, tlen);
 
 	skb_reset_network_header(skb);
 	pip = ip_hdr(skb);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 5ee56d0a8699..d64ee7e83664 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1574,9 +1574,8 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
 		return NULL;
 
 	skb->priority = TC_PRIO_CONTROL;
-	skb->reserved_tailroom = skb_end_offset(skb) -
-				 min(mtu, skb_end_offset(skb));
 	skb_reserve(skb, hlen);
+	skb_tailroom_reserve(skb, mtu, tlen);
 
 	if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) {
 		/* <draft-ietf-magma-mld-source-05.txt>:

From 7da899cee1f19bdaf2649e5e8c36c0e65ace5a5c Mon Sep 17 00:00:00 2001
From: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Date: Tue, 1 Mar 2016 11:07:09 +0100
Subject: [PATCH 343/797] tipc: Revert "tipc: use existing sk_write_queue for
 outgoing packet chain"

[ Upstream commit f214fc402967e1bc94ad7f39faa03db5813d6849 ]

reverts commit 94153e36e709e ("tipc: use existing sk_write_queue for
outgoing packet chain")

In Commit 94153e36e709e, we assume that we fill & empty the socket's
sk_write_queue within the same lock_sock() session.

This is not true if the link is congested. During congestion, the
socket lock is released while we wait for the congestion to cease.
This implementation causes a nullptr exception, if the user space
program has several threads accessing the same socket descriptor.

Consider two threads of the same program performing the following:
     Thread1                                  Thread2
--------------------                    ----------------------
Enter tipc_sendmsg()                    Enter tipc_sendmsg()
lock_sock()                             lock_sock()
Enter tipc_link_xmit(), ret=ELINKCONG   spin on socket lock..
sk_wait_event()                             :
release_sock()                          grab socket lock
    :                                   Enter tipc_link_xmit(), ret=0
    :                                   release_sock()
Wakeup after congestion
lock_sock()
skb = skb_peek(pktchain);
!! TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong;

In this case, the second thread transmits the buffers belonging to
both thread1 and thread2 successfully. When the first thread wakeup
after the congestion it assumes that the pktchain is intact and
operates on the skb's in it, which leads to the following exception:

[2102.439969] BUG: unable to handle kernel NULL pointer dereference at 00000000000000d0
[2102.440074] IP: [<ffffffffa005f330>] __tipc_link_xmit+0x2b0/0x4d0 [tipc]
[2102.440074] PGD 3fa3f067 PUD 3fa6b067 PMD 0
[2102.440074] Oops: 0000 [#1] SMP
[2102.440074] CPU: 2 PID: 244 Comm: sender Not tainted 3.12.28 #1
[2102.440074] RIP: 0010:[<ffffffffa005f330>]  [<ffffffffa005f330>] __tipc_link_xmit+0x2b0/0x4d0 [tipc]
[...]
[2102.440074] Call Trace:
[2102.440074]  [<ffffffff8163f0b9>] ? schedule+0x29/0x70
[2102.440074]  [<ffffffffa006a756>] ? tipc_node_unlock+0x46/0x170 [tipc]
[2102.440074]  [<ffffffffa005f761>] tipc_link_xmit+0x51/0xf0 [tipc]
[2102.440074]  [<ffffffffa006d8ae>] tipc_send_stream+0x11e/0x4f0 [tipc]
[2102.440074]  [<ffffffff8106b150>] ? __wake_up_sync+0x20/0x20
[2102.440074]  [<ffffffffa006dc9c>] tipc_send_packet+0x1c/0x20 [tipc]
[2102.440074]  [<ffffffff81502478>] sock_sendmsg+0xa8/0xd0
[2102.440074]  [<ffffffff81507895>] ? release_sock+0x145/0x170
[2102.440074]  [<ffffffff815030d8>] ___sys_sendmsg+0x3d8/0x3e0
[2102.440074]  [<ffffffff816426ae>] ? _raw_spin_unlock+0xe/0x10
[2102.440074]  [<ffffffff81115c2a>] ? handle_mm_fault+0x6ca/0x9d0
[2102.440074]  [<ffffffff8107dd65>] ? set_next_entity+0x85/0xa0
[2102.440074]  [<ffffffff816426de>] ? _raw_spin_unlock_irq+0xe/0x20
[2102.440074]  [<ffffffff8107463c>] ? finish_task_switch+0x5c/0xc0
[2102.440074]  [<ffffffff8163ea8c>] ? __schedule+0x34c/0x950
[2102.440074]  [<ffffffff81504e12>] __sys_sendmsg+0x42/0x80
[2102.440074]  [<ffffffff81504e62>] SyS_sendmsg+0x12/0x20
[2102.440074]  [<ffffffff8164aed2>] system_call_fastpath+0x16/0x1b

In this commit, we maintain the skb list always in the stack.

Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/tipc/socket.c | 33 +++++++++++++++++++--------------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index b53246fb0412..e53003cf7703 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -673,7 +673,7 @@ static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
 	struct tipc_sock *tsk = tipc_sk(sk);
 	struct net *net = sock_net(sk);
 	struct tipc_msg *mhdr = &tsk->phdr;
-	struct sk_buff_head *pktchain = &sk->sk_write_queue;
+	struct sk_buff_head pktchain;
 	struct iov_iter save = msg->msg_iter;
 	uint mtu;
 	int rc;
@@ -687,14 +687,16 @@ static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
 	msg_set_nameupper(mhdr, seq->upper);
 	msg_set_hdr_sz(mhdr, MCAST_H_SIZE);
 
+	skb_queue_head_init(&pktchain);
+
 new_mtu:
 	mtu = tipc_bcast_get_mtu(net);
-	rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, pktchain);
+	rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, &pktchain);
 	if (unlikely(rc < 0))
 		return rc;
 
 	do {
-		rc = tipc_bcast_xmit(net, pktchain);
+		rc = tipc_bcast_xmit(net, &pktchain);
 		if (likely(!rc))
 			return dsz;
 
@@ -704,7 +706,7 @@ static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
 			if (!rc)
 				continue;
 		}
-		__skb_queue_purge(pktchain);
+		__skb_queue_purge(&pktchain);
 		if (rc == -EMSGSIZE) {
 			msg->msg_iter = save;
 			goto new_mtu;
@@ -863,7 +865,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
 	struct net *net = sock_net(sk);
 	struct tipc_msg *mhdr = &tsk->phdr;
 	u32 dnode, dport;
-	struct sk_buff_head *pktchain = &sk->sk_write_queue;
+	struct sk_buff_head pktchain;
 	struct sk_buff *skb;
 	struct tipc_name_seq *seq;
 	struct iov_iter save;
@@ -924,17 +926,18 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
 		msg_set_hdr_sz(mhdr, BASIC_H_SIZE);
 	}
 
+	skb_queue_head_init(&pktchain);
 	save = m->msg_iter;
 new_mtu:
 	mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
-	rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, pktchain);
+	rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, &pktchain);
 	if (rc < 0)
 		return rc;
 
 	do {
-		skb = skb_peek(pktchain);
+		skb = skb_peek(&pktchain);
 		TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong;
-		rc = tipc_node_xmit(net, pktchain, dnode, tsk->portid);
+		rc = tipc_node_xmit(net, &pktchain, dnode, tsk->portid);
 		if (likely(!rc)) {
 			if (sock->state != SS_READY)
 				sock->state = SS_CONNECTING;
@@ -946,7 +949,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
 			if (!rc)
 				continue;
 		}
-		__skb_queue_purge(pktchain);
+		__skb_queue_purge(&pktchain);
 		if (rc == -EMSGSIZE) {
 			m->msg_iter = save;
 			goto new_mtu;
@@ -1016,7 +1019,7 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
 	struct net *net = sock_net(sk);
 	struct tipc_sock *tsk = tipc_sk(sk);
 	struct tipc_msg *mhdr = &tsk->phdr;
-	struct sk_buff_head *pktchain = &sk->sk_write_queue;
+	struct sk_buff_head pktchain;
 	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
 	u32 portid = tsk->portid;
 	int rc = -EINVAL;
@@ -1044,17 +1047,19 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
 
 	timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
 	dnode = tsk_peer_node(tsk);
+	skb_queue_head_init(&pktchain);
 
 next:
 	save = m->msg_iter;
 	mtu = tsk->max_pkt;
 	send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE);
-	rc = tipc_msg_build(mhdr, m, sent, send, mtu, pktchain);
+	rc = tipc_msg_build(mhdr, m, sent, send, mtu, &pktchain);
 	if (unlikely(rc < 0))
 		return rc;
+
 	do {
 		if (likely(!tsk_conn_cong(tsk))) {
-			rc = tipc_node_xmit(net, pktchain, dnode, portid);
+			rc = tipc_node_xmit(net, &pktchain, dnode, portid);
 			if (likely(!rc)) {
 				tsk->sent_unacked++;
 				sent += send;
@@ -1063,7 +1068,7 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
 				goto next;
 			}
 			if (rc == -EMSGSIZE) {
-				__skb_queue_purge(pktchain);
+				__skb_queue_purge(&pktchain);
 				tsk->max_pkt = tipc_node_get_mtu(net, dnode,
 								 portid);
 				m->msg_iter = save;
@@ -1077,7 +1082,7 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
 		rc = tipc_wait_for_sndpkt(sock, &timeo);
 	} while (!rc);
 
-	__skb_queue_purge(pktchain);
+	__skb_queue_purge(&pktchain);
 	return sent ? sent : rc;
 }
 

From 242fab1419e149ffc64b8b778fa1dabab34ff2ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Tue, 1 Mar 2016 14:31:02 +0100
Subject: [PATCH 344/797] qmi_wwan: add Sierra Wireless EM74xx device ID
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit bf13c94ccb33c3182efc92ce4989506a0f541243 ]

The MC74xx and EM74xx modules use different IDs by default, according
to the Lenovo EM7455 driver for Windows.

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/usb/qmi_wwan.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 982e0acd1a36..df77467c7e93 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -718,8 +718,10 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x1199, 0x9061, 8)},	/* Sierra Wireless Modem */
 	{QMI_FIXED_INTF(0x1199, 0x9070, 8)},	/* Sierra Wireless MC74xx/EM74xx */
 	{QMI_FIXED_INTF(0x1199, 0x9070, 10)},	/* Sierra Wireless MC74xx/EM74xx */
-	{QMI_FIXED_INTF(0x1199, 0x9071, 8)},	/* Sierra Wireless MC74xx/EM74xx */
-	{QMI_FIXED_INTF(0x1199, 0x9071, 10)},	/* Sierra Wireless MC74xx/EM74xx */
+	{QMI_FIXED_INTF(0x1199, 0x9071, 8)},	/* Sierra Wireless MC74xx */
+	{QMI_FIXED_INTF(0x1199, 0x9071, 10)},	/* Sierra Wireless MC74xx */
+	{QMI_FIXED_INTF(0x1199, 0x9079, 8)},	/* Sierra Wireless EM74xx */
+	{QMI_FIXED_INTF(0x1199, 0x9079, 10)},	/* Sierra Wireless EM74xx */
 	{QMI_FIXED_INTF(0x1bbb, 0x011e, 4)},	/* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */
 	{QMI_FIXED_INTF(0x1bbb, 0x0203, 2)},	/* Alcatel L800MA */
 	{QMI_FIXED_INTF(0x2357, 0x0201, 4)},	/* TP-LINK HSUPA Modem MA180 */

From b80398d91c2bdb23d503c5d742ba5c4541269112 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 1 Mar 2016 16:15:16 +0100
Subject: [PATCH 345/797] ipv6: re-enable fragment header matching in
 ipv6_find_hdr

[ Upstream commit 5d150a985520bbe3cb2aa1ceef24a7e32f20c15f ]

When ipv6_find_hdr is used to find a fragment header
(caller specifies target NEXTHDR_FRAGMENT) we erronously return
-ENOENT for all fragments with nonzero offset.

Before commit 9195bb8e381d, when target was specified, we did not
enter the exthdr walk loop as nexthdr == target so this used to work.

Now we do (so we can skip empty route headers). When we then stumble upon
a frag with nonzero frag_off we must return -ENOENT ("header not found")
only if the caller did not specifically request NEXTHDR_FRAGMENT.

This allows nfables exthdr expression to match ipv6 fragments, e.g. via

nft add rule ip6 filter input frag frag-off gt 0

Fixes: 9195bb8e381d ("ipv6: improve ipv6_find_hdr() to skip empty routing headers")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/exthdrs_core.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 5c5d23e59da5..9508a20fbf61 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -257,7 +257,11 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
 						*fragoff = _frag_off;
 					return hp->nexthdr;
 				}
-				return -ENOENT;
+				if (!found)
+					return -ENOENT;
+				if (fragoff)
+					*fragoff = _frag_off;
+				break;
 			}
 			hdrlen = 8;
 		} else if (nexthdr == NEXTHDR_AUTH) {

From 32cb6781a96f24287a7b3c8716f47b0e8768709d Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 2 Mar 2016 02:32:08 +0100
Subject: [PATCH 346/797] vxlan: fix missing options_len update on RX with
 collect metadata

[ Upstream commit 4024fcf70556311521e7b6cf79fa50e16f31013a ]

When signalling to metadata consumers that the metadata_dst entry
carries additional GBP extension data for vxlan (TUNNEL_VXLAN_OPT),
the dst's vxlan_metadata information is populated, but options_len
is left to zero. F.e. in ovs, ovs_flow_key_extract() checks for
options_len before extracting the data through ip_tunnel_info_opts_get().

Geneve uses ip_tunnel_info_opts_set() helper in receive path, which
sets options_len internally, vxlan however uses ip_tunnel_info_opts(),
so when filling vxlan_metadata, we do need to update options_len.

Fixes: 4c22279848c5 ("ip-tunnel: Use API to access tunnel metadata options.")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/vxlan.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index e0fcda4ddd55..3c0df70e2f53 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1306,8 +1306,10 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 		gbp = (struct vxlanhdr_gbp *)vxh;
 		md->gbp = ntohs(gbp->policy_id);
 
-		if (tun_dst)
+		if (tun_dst) {
 			tun_dst->u.tun_info.key.tun_flags |= TUNNEL_VXLAN_OPT;
+			tun_dst->u.tun_info.options_len = sizeof(*md);
+		}
 
 		if (gbp->dont_learn)
 			md->gbp |= VXLAN_GBP_DONT_LEARN;

From 3aaa64b61a787d0aedc5c3fe36102419fc4eeb93 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Thu, 3 Mar 2016 22:20:53 +0100
Subject: [PATCH 347/797] cdc_ncm: toggle altsetting to force reset before
 setup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 48906f62c96cc2cd35753e59310cb70eb08cc6a5 ]

Some devices will silently fail setup unless they are reset first.
This is necessary even if the data interface is already in
altsetting 0, which it will be when the device is probed for the
first time.  Briefly toggling the altsetting forces a function
reset regardless of the initial state.

This fixes a setup problem observed on a number of Huawei devices,
appearing to operate in NTB-32 mode even if we explicitly set them
to NTB-16 mode.

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/usb/cdc_ncm.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index e8a1144c5a8b..8c2bb77db049 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -794,7 +794,11 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_
 
 	iface_no = ctx->data->cur_altsetting->desc.bInterfaceNumber;
 
-	/* reset data interface */
+	/* Reset data interface. Some devices will not reset properly
+	 * unless they are configured first.  Toggle the altsetting to
+	 * force a reset
+	 */
+	usb_set_interface(dev->udev, iface_no, data_altsetting);
 	temp = usb_set_interface(dev->udev, iface_no, 0);
 	if (temp) {
 		dev_dbg(&intf->dev, "set interface failed\n");

From 2d11623bd01a311c868d2e90fbdda8b5eec39a2f Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Mon, 7 Mar 2016 11:31:10 +0100
Subject: [PATCH 348/797] usbnet: cleanup after bind() in probe()

[ Upstream commit 1666984c8625b3db19a9abc298931d35ab7bc64b ]

In case bind() works, but a later error forces bailing
in probe() in error cases work and a timer may be scheduled.
They must be killed. This fixes an error case related to
the double free reported in
http://www.spinics.net/lists/netdev/msg367669.html
and needs to go on top of Linus' fix to cdc-ncm.

Signed-off-by: Oliver Neukum <ONeukum@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/usb/usbnet.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 0744bf2ef2d6..c2ea4e5666fb 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1766,6 +1766,13 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
 	if (info->unbind)
 		info->unbind (dev, udev);
 out1:
+	/* subdrivers must undo all they did in bind() if they
+	 * fail it, but we may fail later and a deferred kevent
+	 * may trigger an error resubmitting itself and, worse,
+	 * schedule a timer. So we kill it all just in case.
+	 */
+	cancel_work_sync(&dev->kevent);
+	del_timer_sync(&dev->delay);
 	free_netdev(net);
 out:
 	return status;

From 8a2226c17e0e2256f71e6b3175b6d3455b479f02 Mon Sep 17 00:00:00 2001
From: Bill Sommerfeld <wsommerfeld@google.com>
Date: Fri, 4 Mar 2016 14:47:21 -0800
Subject: [PATCH 349/797] udp6: fix UDP/IPv6 encap resubmit path

[ Upstream commit 59dca1d8a6725a121dae6c452de0b2611d5865dc ]

IPv4 interprets a negative return value from a protocol handler as a
request to redispatch to a new protocol.  In contrast, IPv6 interprets a
negative value as an error, and interprets a positive value as a request
for redispatch.

UDP for IPv6 was unaware of this difference.  Change __udp6_lib_rcv() to
return a positive value for redispatch.  Note that the socket's
encap_rcv hook still needs to return a negative value to request
dispatch, and in the case of IPv6 packets, adjust IP6CB(skb)->nhoff to
identify the byte containing the next protocol.

Signed-off-by: Bill Sommerfeld <wsommerfeld@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/udp.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 9da3287a3923..1e293a552693 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -916,11 +916,9 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 		ret = udpv6_queue_rcv_skb(sk, skb);
 		sock_put(sk);
 
-		/* a return value > 0 means to resubmit the input, but
-		 * it wants the return to be -protocol, or 0
-		 */
+		/* a return value > 0 means to resubmit the input */
 		if (ret > 0)
-			return -ret;
+			return ret;
 
 		return 0;
 	}

From 36b9c7cc09fc6d90e155f322f3f1d6fd3f53b0b7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 6 Mar 2016 09:29:21 -0800
Subject: [PATCH 350/797] tcp: fix tcpi_segs_in after connection establishment

[ Upstream commit a9d99ce28ed359d68cf6f3c1a69038aefedf6d6a ]

If final packet (ACK) of 3WHS is lost, it appears we do not properly
account the following incoming segment into tcpi_segs_in

While we are at it, starts segs_in with one, to count the SYN packet.

We do not yet count number of SYN we received for a request sock, we
might add this someday.

packetdrill script showing proper behavior after fix :

// Tests tcpi_segs_in when 3rd packet (ACK) of 3WHS is lost
0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
   +0 bind(3, ..., ...) = 0
   +0 listen(3, 1) = 0

   +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop>
   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+.020 < P. 1:1001(1000) ack 1 win 32792

   +0 accept(3, ..., ...) = 4

+.000 %{ assert tcpi_segs_in == 2, 'tcpi_segs_in=%d' % tcpi_segs_in }%

Fixes: 2efd055c53c06 ("tcp: add tcpi_segs_in and tcpi_segs_out to tcp_info")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_minisocks.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index ac6b1961ffeb..9475a2748a9a 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -458,7 +458,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 
 		newtp->rcv_wup = newtp->copied_seq =
 		newtp->rcv_nxt = treq->rcv_isn + 1;
-		newtp->segs_in = 0;
+		newtp->segs_in = 1;
 
 		newtp->snd_sml = newtp->snd_una =
 		newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
@@ -818,6 +818,7 @@ int tcp_child_process(struct sock *parent, struct sock *child,
 	int ret = 0;
 	int state = child->sk_state;
 
+	tcp_sk(child)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
 	if (!sock_owned_by_user(child)) {
 		ret = tcp_rcv_state_process(child, skb);
 		/* Wakeup parent, send SIGIO */

From cd8101d8ece8d776b79b7e0528cc8ba6d6c33562 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Mon, 7 Mar 2016 19:36:44 +0100
Subject: [PATCH 351/797] ppp: release rtnl mutex when interface creation fails

[ Upstream commit 6faac63a6986f29ef39827f460edd3a5ba64ad5c ]

Add missing rtnl_unlock() in the error path of ppp_create_interface().

Fixes: 58a89ecaca53 ("ppp: fix lockdep splat in ppp_dev_uninit()")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ppp/ppp_generic.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 9a863c6a6a33..40b303ed63b7 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -2803,6 +2803,7 @@ static struct ppp *ppp_create_interface(struct net *net, int unit,
 
 out2:
 	mutex_unlock(&pn->all_ppp_mutex);
+	rtnl_unlock();
 	free_netdev(dev);
 out1:
 	*retp = ret;

From 8b8d278aa4de9335682bbd4a3bb619af015c859e Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Wed, 9 Mar 2016 21:58:32 -0500
Subject: [PATCH 352/797] net: validate variable length ll headers

[ Upstream commit 2793a23aacbd754dbbb5cb75093deb7e4103bace ]

Netdevice parameter hard_header_len is variously interpreted both as
an upper and lower bound on link layer header length. The field is
used as upper bound when reserving room at allocation, as lower bound
when validating user input in PF_PACKET.

Clarify the definition to be maximum header length. For validation
of untrusted headers, add an optional validate member to header_ops.

Allow bypassing of validation by passing CAP_SYS_RAWIO, for instance
for deliberate testing of corrupt input. In this case, pad trailing
bytes, as some device drivers expect completely initialized headers.

See also http://comments.gmane.org/gmane.linux.network/401064

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/netdevice.h | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3143c847bddb..04c068e55353 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -265,6 +265,7 @@ struct header_ops {
 	void	(*cache_update)(struct hh_cache *hh,
 				const struct net_device *dev,
 				const unsigned char *haddr);
+	bool	(*validate)(const char *ll_header, unsigned int len);
 };
 
 /* These flag bits are private to the generic network queueing
@@ -1398,8 +1399,7 @@ enum netdev_priv_flags {
  *	@dma:		DMA channel
  *	@mtu:		Interface MTU value
  *	@type:		Interface hardware type
- *	@hard_header_len: Hardware header length, which means that this is the
- *			  minimum size of a packet.
+ *	@hard_header_len: Maximum hardware header length.
  *
  *	@needed_headroom: Extra headroom the hardware may need, but not in all
  *			  cases can this be guaranteed
@@ -2493,6 +2493,24 @@ static inline int dev_parse_header(const struct sk_buff *skb,
 	return dev->header_ops->parse(skb, haddr);
 }
 
+/* ll_header must have at least hard_header_len allocated */
+static inline bool dev_validate_header(const struct net_device *dev,
+				       char *ll_header, int len)
+{
+	if (likely(len >= dev->hard_header_len))
+		return true;
+
+	if (capable(CAP_SYS_RAWIO)) {
+		memset(ll_header + len, 0, dev->hard_header_len - len);
+		return true;
+	}
+
+	if (dev->header_ops && dev->header_ops->validate)
+		return dev->header_ops->validate(ll_header, len);
+
+	return false;
+}
+
 typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len);
 int register_gifconf(unsigned int family, gifconf_func_t *gifconf);
 static inline int unregister_gifconf(unsigned int family)

From abd42587771aff48c39312116e8b8db851ea0fa5 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Wed, 9 Mar 2016 21:58:33 -0500
Subject: [PATCH 353/797] ax25: add link layer header validation function

[ Upstream commit ea47781c26510e5d97f80f9aceafe9065bd5e3aa ]

As variable length protocol, AX25 fails link layer header validation
tests based on a minimum length. header_ops.validate allows protocols
to validate headers that are shorter than hard_header_len. Implement
this callback for AX25.

See also http://comments.gmane.org/gmane.linux.network/401064

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ax25/ax25_ip.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index b563a3f5f2a8..2fa3be965101 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -228,8 +228,23 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb)
 }
 #endif
 
+static bool ax25_validate_header(const char *header, unsigned int len)
+{
+	ax25_digi digi;
+
+	if (!len)
+		return false;
+
+	if (header[0])
+		return true;
+
+	return ax25_addr_parse(header + 1, len - 1, NULL, NULL, &digi, NULL,
+			       NULL);
+}
+
 const struct header_ops ax25_header_ops = {
 	.create = ax25_hard_header,
+	.validate = ax25_validate_header,
 };
 
 EXPORT_SYMBOL(ax25_header_ops);

From edb60bc7bb4ee6e3862aa8840a65cce47e09dcfe Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Wed, 9 Mar 2016 21:58:34 -0500
Subject: [PATCH 354/797] packet: validate variable length ll headers

[ Upstream commit 9ed988cd591500c040b2a6257bc68543e08ceeef ]

Replace link layer header validation check ll_header_truncate with
more generic dev_validate_header.

Validation based on hard_header_len incorrectly drops valid packets
in variable length protocols, such as AX25. dev_validate_header
calls header_ops.validate for such protocols to ensure correctness
below hard_header_len.

See also http://comments.gmane.org/gmane.linux.network/401064

Fixes 9c7077622dd9 ("packet: make packet_snd fail on len smaller than l2 header")
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/packet/af_packet.c | 37 ++++++++++++++++---------------------
 1 file changed, 16 insertions(+), 21 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 992396aa635c..da1ae0e13cb5 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1916,6 +1916,10 @@ static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg,
 		goto retry;
 	}
 
+	if (!dev_validate_header(dev, skb->data, len)) {
+		err = -EINVAL;
+		goto out_unlock;
+	}
 	if (len > (dev->mtu + dev->hard_header_len + extra_len) &&
 	    !packet_extra_vlan_len_allowed(dev, skb)) {
 		err = -EMSGSIZE;
@@ -2326,18 +2330,6 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
 	sock_wfree(skb);
 }
 
-static bool ll_header_truncated(const struct net_device *dev, int len)
-{
-	/* net device doesn't like empty head */
-	if (unlikely(len < dev->hard_header_len)) {
-		net_warn_ratelimited("%s: packet size is too short (%d < %d)\n",
-				     current->comm, len, dev->hard_header_len);
-		return true;
-	}
-
-	return false;
-}
-
 static void tpacket_set_protocol(const struct net_device *dev,
 				 struct sk_buff *skb)
 {
@@ -2420,19 +2412,19 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 		if (unlikely(err < 0))
 			return -EINVAL;
 	} else if (dev->hard_header_len) {
-		if (ll_header_truncated(dev, tp_len))
-			return -EINVAL;
+		int hdrlen = min_t(int, dev->hard_header_len, tp_len);
 
 		skb_push(skb, dev->hard_header_len);
-		err = skb_store_bits(skb, 0, data,
-				dev->hard_header_len);
+		err = skb_store_bits(skb, 0, data, hdrlen);
 		if (unlikely(err))
 			return err;
+		if (!dev_validate_header(dev, skb->data, hdrlen))
+			return -EINVAL;
 		if (!skb->protocol)
 			tpacket_set_protocol(dev, skb);
 
-		data += dev->hard_header_len;
-		to_write -= dev->hard_header_len;
+		data += hdrlen;
+		to_write -= hdrlen;
 	}
 
 	offset = offset_in_page(data);
@@ -2763,9 +2755,6 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 		offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len);
 		if (unlikely(offset < 0))
 			goto out_free;
-	} else {
-		if (ll_header_truncated(dev, len))
-			goto out_free;
 	}
 
 	/* Returns -EFAULT on error */
@@ -2773,6 +2762,12 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	if (err)
 		goto out_free;
 
+	if (sock->type == SOCK_RAW &&
+	    !dev_validate_header(dev, skb->data, len)) {
+		err = -EINVAL;
+		goto out_free;
+	}
+
 	sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
 
 	if (!gso_type && (len > dev->mtu + reserve + extra_len) &&

From e8e43232627082328fa4016fab1960360360f167 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Wed, 9 Mar 2016 20:02:33 -0800
Subject: [PATCH 355/797] bpf: avoid copying junk bytes in
 bpf_get_current_comm()

[ Upstream commit cdc4e47da8f4c32eeb6b2061a8a834f4362a12b7 ]

Lots of places in the kernel use memcpy(buf, comm, TASK_COMM_LEN); but
the result is typically passed to print("%s", buf) and extra bytes
after zero don't cause any harm.
In bpf the result of bpf_get_current_comm() is used as the part of
map key and was causing spurious hash map mismatches.
Use strlcpy() to guarantee zero-terminated string.
bpf verifier checks that output buffer is zero-initialized,
so even for short task names the output buffer don't have junk bytes.
Note it's not a security concern, since kprobe+bpf is root only.

Fixes: ffeedafbf023 ("bpf: introduce current->pid, tgid, uid, gid, comm accessors")
Reported-by: Tobias Waldekranz <tobias@waldekranz.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/bpf/helpers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 4504ca66118d..50da680c479f 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -166,7 +166,7 @@ static u64 bpf_get_current_comm(u64 r1, u64 size, u64 r3, u64 r4, u64 r5)
 	if (!task)
 		return -EINVAL;
 
-	memcpy(buf, task->comm, min_t(size_t, size, sizeof(task->comm)));
+	strlcpy(buf, task->comm, min_t(size_t, size, sizeof(task->comm)));
 	return 0;
 }
 

From a95fc0f757728d08acae89c6194f8ea9e89ec3fe Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Tue, 8 Mar 2016 01:36:28 +0300
Subject: [PATCH 356/797] sh_eth: fix NULL pointer dereference in
 sh_eth_ring_format()

[ Upstream commit c1b7fca65070bfadca94dd53a4e6b71cd4f69715 ]

In a low memory situation, if netdev_alloc_skb() fails on a first RX ring
loop iteration  in sh_eth_ring_format(), 'rxdesc' is still NULL.  Avoid
kernel oops by adding the 'rxdesc' check after the loop.

Reported-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/renesas/sh_eth.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 6a8fc0f341ff..a1702f4ac6ff 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -1212,7 +1212,8 @@ static void sh_eth_ring_format(struct net_device *ndev)
 	mdp->dirty_rx = (u32) (i - mdp->num_rx_ring);
 
 	/* Mark the last entry as wrapping the ring. */
-	rxdesc->status |= cpu_to_edmac(mdp, RD_RDLE);
+	if (rxdesc)
+		rxdesc->status |= cpu_to_edmac(mdp, RD_RDLE);
 
 	memset(mdp->tx_ring, 0, tx_ringsize);
 

From 8352a292fe92ae2397b60701495b576e9afbc012 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Tue, 8 Mar 2016 01:37:09 +0300
Subject: [PATCH 357/797] sh_eth: advance 'rxdesc' later in
 sh_eth_ring_format()

[ Upstream commit d0ba913488dc8c55d1880f5ed34f096dc45fb05d ]

Iff dma_map_single() fails, 'rxdesc'  should point  to the last filled RX
descriptor, so  that it can be marked as the last one, however the driver
would have  already  advanced it by that time. In order to fix that, only
fill  an RX descriptor  once all the data for it is ready.

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/renesas/sh_eth.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index a1702f4ac6ff..36fc9427418f 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -1185,11 +1185,8 @@ static void sh_eth_ring_format(struct net_device *ndev)
 			break;
 		sh_eth_set_receive_align(skb);
 
-		/* RX descriptor */
-		rxdesc = &mdp->rx_ring[i];
 		/* The size of the buffer is a multiple of 32 bytes. */
 		buf_len = ALIGN(mdp->rx_buf_sz, 32);
-		rxdesc->len = cpu_to_edmac(mdp, buf_len << 16);
 		dma_addr = dma_map_single(&ndev->dev, skb->data, buf_len,
 					  DMA_FROM_DEVICE);
 		if (dma_mapping_error(&ndev->dev, dma_addr)) {
@@ -1197,6 +1194,10 @@ static void sh_eth_ring_format(struct net_device *ndev)
 			break;
 		}
 		mdp->rx_skbuff[i] = skb;
+
+		/* RX descriptor */
+		rxdesc = &mdp->rx_ring[i];
+		rxdesc->len = cpu_to_edmac(mdp, buf_len << 16);
 		rxdesc->addr = cpu_to_edmac(mdp, dma_addr);
 		rxdesc->status = cpu_to_edmac(mdp, RD_RACT | RD_RFP);
 

From 12dd6d869b22ae114f81962e346cd5428b358b72 Mon Sep 17 00:00:00 2001
From: Rajesh Borundia <rajesh.borundia@qlogic.com>
Date: Tue, 8 Mar 2016 02:39:57 -0500
Subject: [PATCH 358/797] qlcnic: Remove unnecessary usage of atomic_t

[ Upstream commit 5bf93251cee1fb66141d1d2eaff86e04a9397bdf ]

o atomic_t usage is incorrect as we are not implementing
any atomicity.

Signed-off-by: Rajesh Borundia <rajesh.borundia@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic.h         | 2 +-
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 9 ++++-----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index 46bbea8e023c..d18667b1b5b7 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
@@ -1099,7 +1099,7 @@ struct qlcnic_mailbox {
 	unsigned long		status;
 	spinlock_t		queue_lock;	/* Mailbox queue lock */
 	spinlock_t		aen_lock;	/* Mailbox response/AEN lock */
-	atomic_t		rsp_status;
+	u32			rsp_status;
 	u32			num_cmds;
 };
 
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
index 37a731be7d39..e3d1bb722903 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
@@ -491,7 +491,7 @@ irqreturn_t qlcnic_83xx_clear_legacy_intr(struct qlcnic_adapter *adapter)
 
 static inline void qlcnic_83xx_notify_mbx_response(struct qlcnic_mailbox *mbx)
 {
-	atomic_set(&mbx->rsp_status, QLC_83XX_MBX_RESPONSE_ARRIVED);
+	mbx->rsp_status = QLC_83XX_MBX_RESPONSE_ARRIVED;
 	complete(&mbx->completion);
 }
 
@@ -510,7 +510,7 @@ static void qlcnic_83xx_poll_process_aen(struct qlcnic_adapter *adapter)
 	if (event &  QLCNIC_MBX_ASYNC_EVENT) {
 		__qlcnic_83xx_process_aen(adapter);
 	} else {
-		if (atomic_read(&mbx->rsp_status) != rsp_status)
+		if (mbx->rsp_status != rsp_status)
 			qlcnic_83xx_notify_mbx_response(mbx);
 	}
 out:
@@ -1023,7 +1023,7 @@ static void qlcnic_83xx_process_aen(struct qlcnic_adapter *adapter)
 		if (event &  QLCNIC_MBX_ASYNC_EVENT) {
 			__qlcnic_83xx_process_aen(adapter);
 		} else {
-			if (atomic_read(&mbx->rsp_status) != rsp_status)
+			if (mbx->rsp_status != rsp_status)
 				qlcnic_83xx_notify_mbx_response(mbx);
 		}
 	}
@@ -4050,7 +4050,6 @@ static void qlcnic_83xx_mailbox_worker(struct work_struct *work)
 	struct qlcnic_adapter *adapter = mbx->adapter;
 	const struct qlcnic_mbx_ops *mbx_ops = mbx->ops;
 	struct device *dev = &adapter->pdev->dev;
-	atomic_t *rsp_status = &mbx->rsp_status;
 	struct list_head *head = &mbx->cmd_q;
 	struct qlcnic_hardware_context *ahw;
 	struct qlcnic_cmd_args *cmd = NULL;
@@ -4063,7 +4062,7 @@ static void qlcnic_83xx_mailbox_worker(struct work_struct *work)
 			return;
 		}
 
-		atomic_set(rsp_status, QLC_83XX_MBX_RESPONSE_WAIT);
+		mbx->rsp_status = QLC_83XX_MBX_RESPONSE_WAIT;
 
 		spin_lock(&mbx->queue_lock);
 

From b39af5aa111dd05472c8b1b0d3f114f4e34f61fe Mon Sep 17 00:00:00 2001
From: Rajesh Borundia <rajesh.borundia@qlogic.com>
Date: Tue, 8 Mar 2016 02:39:58 -0500
Subject: [PATCH 359/797] qlcnic: Fix mailbox completion handling during
 spurious interrupt

[ Upstream commit 819bfe764dceec2f6b4551768453f374b4c60443 ]

o While the driver is in the middle of a MB completion processing
and it receives a spurious MB interrupt, it is mistaken as a good MB
completion interrupt leading to premature completion of the next MB
request. Fix the driver to guard against this by checking the current
state of MB processing and ignore the spurious interrupt.
Also added a stats counter to record this condition.

Signed-off-by: Rajesh Borundia <rajesh.borundia@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic.h       |  1 +
 .../net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c   | 15 +++++++++++----
 .../net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c   |  3 ++-
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index d18667b1b5b7..55007f1e6bbc 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
@@ -566,6 +566,7 @@ struct qlcnic_adapter_stats {
 	u64  tx_dma_map_error;
 	u64  spurious_intr;
 	u64  mac_filter_limit_overrun;
+	u64  mbx_spurious_intr;
 };
 
 /*
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
index e3d1bb722903..f9640d5ce6ba 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
@@ -2338,9 +2338,9 @@ static void qlcnic_83xx_handle_link_aen(struct qlcnic_adapter *adapter,
 
 static irqreturn_t qlcnic_83xx_handle_aen(int irq, void *data)
 {
+	u32 mask, resp, event, rsp_status = QLC_83XX_MBX_RESPONSE_ARRIVED;
 	struct qlcnic_adapter *adapter = data;
 	struct qlcnic_mailbox *mbx;
-	u32 mask, resp, event;
 	unsigned long flags;
 
 	mbx = adapter->ahw->mailbox;
@@ -2350,10 +2350,14 @@ static irqreturn_t qlcnic_83xx_handle_aen(int irq, void *data)
 		goto out;
 
 	event = readl(QLCNIC_MBX_FW(adapter->ahw, 0));
-	if (event &  QLCNIC_MBX_ASYNC_EVENT)
+	if (event &  QLCNIC_MBX_ASYNC_EVENT) {
 		__qlcnic_83xx_process_aen(adapter);
-	else
-		qlcnic_83xx_notify_mbx_response(mbx);
+	} else {
+		if (mbx->rsp_status != rsp_status)
+			qlcnic_83xx_notify_mbx_response(mbx);
+		else
+			adapter->stats.mbx_spurious_intr++;
+	}
 
 out:
 	mask = QLCRDX(adapter->ahw, QLCNIC_DEF_INT_MASK);
@@ -4053,6 +4057,7 @@ static void qlcnic_83xx_mailbox_worker(struct work_struct *work)
 	struct list_head *head = &mbx->cmd_q;
 	struct qlcnic_hardware_context *ahw;
 	struct qlcnic_cmd_args *cmd = NULL;
+	unsigned long flags;
 
 	ahw = adapter->ahw;
 
@@ -4062,7 +4067,9 @@ static void qlcnic_83xx_mailbox_worker(struct work_struct *work)
 			return;
 		}
 
+		spin_lock_irqsave(&mbx->aen_lock, flags);
 		mbx->rsp_status = QLC_83XX_MBX_RESPONSE_WAIT;
+		spin_unlock_irqrestore(&mbx->aen_lock, flags);
 
 		spin_lock(&mbx->queue_lock);
 
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
index 494e8105adee..0a2318cad34d 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
@@ -59,7 +59,8 @@ static const struct qlcnic_stats qlcnic_gstrings_stats[] = {
 	 QLC_OFF(stats.mac_filter_limit_overrun)},
 	{"spurious intr", QLC_SIZEOF(stats.spurious_intr),
 	 QLC_OFF(stats.spurious_intr)},
-
+	{"mbx spurious intr", QLC_SIZEOF(stats.mbx_spurious_intr),
+	 QLC_OFF(stats.mbx_spurious_intr)},
 };
 
 static const char qlcnic_device_gstrings_stats[][ETH_GSTRING_LEN] = {

From a96f3553d5d1d6650f608f14162ed403d64e2b66 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Tue, 8 Mar 2016 15:18:54 -0500
Subject: [PATCH 360/797] macvtap: always pass ethernet header in linear

[ Upstream commit 8e2ad4113ce4671686740f808ff2795395c39eef ]

The stack expects link layer headers in the skb linear section.
Macvtap can create skbs with llheader in frags in edge cases:
when (IFF_VNET_HDR is off or vnet_hdr.hdr_len < ETH_HLEN) and
prepad + len > PAGE_SIZE and vnet_hdr.flags has no or bad csum.

Add checks to ensure linear is always at least ETH_HLEN.
At this point, len is already ensured to be >= ETH_HLEN.

For backwards compatiblity, rounds up short vnet_hdr.hdr_len.
This differs from tap and packet, which return an error.

Fixes b9fb9ee07e67 ("macvtap: add GSO/csum offload support")
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/macvtap.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 0fc521941c71..159a68782bec 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -760,6 +760,8 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
 			macvtap16_to_cpu(q, vnet_hdr.hdr_len) : GOODCOPY_LEN;
 		if (copylen > good_linear)
 			copylen = good_linear;
+		else if (copylen < ETH_HLEN)
+			copylen = ETH_HLEN;
 		linear = copylen;
 		i = *from;
 		iov_iter_advance(&i, copylen);
@@ -769,10 +771,11 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
 
 	if (!zerocopy) {
 		copylen = len;
-		if (macvtap16_to_cpu(q, vnet_hdr.hdr_len) > good_linear)
+		linear = macvtap16_to_cpu(q, vnet_hdr.hdr_len);
+		if (linear > good_linear)
 			linear = good_linear;
-		else
-			linear = macvtap16_to_cpu(q, vnet_hdr.hdr_len);
+		else if (linear < ETH_HLEN)
+			linear = ETH_HLEN;
 	}
 
 	skb = macvtap_alloc_skb(&q->sk, MACVTAP_RESERVE, copylen,

From 7d870cff8ece6088dd9e26f54a3fd1b4b899ddf9 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Tue, 8 Mar 2016 12:59:33 -0800
Subject: [PATCH 361/797] mlxsw: spectrum: Check requested ageing time is valid

[ Upstream commit 869f63a4d28144c03c8f4a4c0d1e8f31f8c11a10 ]

Commit c62987bbd8a1 ("bridge: push bridge setting ageing_time down to
switchdev") added a check for minimum and maximum ageing time, but this
breaks existing behaviour where one can set ageing time to 0 for a
non-learning bridge.

Push this check down to the driver and allow the check in the bridge
layer to be removed. Currently ageing time 0 is refused by the driver,
but we can later add support for this functionality.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h           | 2 ++
 drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 9 +++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 4365c8bccc6d..605f6410f867 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -61,6 +61,8 @@ struct mlxsw_sp {
 #define MLXSW_SP_DEFAULT_LEARNING_INTERVAL 100
 		unsigned int interval; /* ms */
 	} fdb_notify;
+#define MLXSW_SP_MIN_AGEING_TIME 10
+#define MLXSW_SP_MAX_AGEING_TIME 1000000
 #define MLXSW_SP_DEFAULT_AGEING_TIME 300
 	u32 ageing_time;
 	struct {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 7dbeafa65934..d4c4c2b5156c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -232,8 +232,13 @@ static int mlxsw_sp_port_attr_br_ageing_set(struct mlxsw_sp_port *mlxsw_sp_port,
 	unsigned long ageing_jiffies = clock_t_to_jiffies(ageing_clock_t);
 	u32 ageing_time = jiffies_to_msecs(ageing_jiffies) / 1000;
 
-	if (switchdev_trans_ph_prepare(trans))
-		return 0;
+	if (switchdev_trans_ph_prepare(trans)) {
+		if (ageing_time < MLXSW_SP_MIN_AGEING_TIME ||
+		    ageing_time > MLXSW_SP_MAX_AGEING_TIME)
+			return -ERANGE;
+		else
+			return 0;
+	}
 
 	return mlxsw_sp_ageing_set(mlxsw_sp, ageing_time);
 }

From c3d8f507e7fedeeab81bd9dafa2d63d82be159a3 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Tue, 8 Mar 2016 12:59:34 -0800
Subject: [PATCH 362/797] rocker: set FDB cleanup timer according to lowest
 ageing time

[ Upstream commit 88de1cd457e5cb664d6d437e2ea4750d089165f5 ]

In rocker, ageing time is a per-port attribute, so the next time the FDB
cleanup timer fires should be set according to the lowest ageing time.

This will later allow us to delete the BR_MIN_AGEING_TIME macro, which was
added to guarantee minimum ageing time in the bridge layer, thereby breaking
existing behavior.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/rocker/rocker.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c
index 52ec3d6e056a..2b34622a4bfe 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -239,6 +239,7 @@ struct rocker {
 	struct {
 		u64 id;
 	} hw;
+	unsigned long ageing_time;
 	spinlock_t cmd_ring_lock;		/* for cmd ring accesses */
 	struct rocker_dma_ring_info cmd_ring;
 	struct rocker_dma_ring_info event_ring;
@@ -3704,7 +3705,7 @@ static void rocker_fdb_cleanup(unsigned long data)
 	struct rocker_port *rocker_port;
 	struct rocker_fdb_tbl_entry *entry;
 	struct hlist_node *tmp;
-	unsigned long next_timer = jiffies + BR_MIN_AGEING_TIME;
+	unsigned long next_timer = jiffies + rocker->ageing_time;
 	unsigned long expires;
 	unsigned long lock_flags;
 	int flags = ROCKER_OP_FLAG_NOWAIT | ROCKER_OP_FLAG_REMOVE |
@@ -4367,8 +4368,12 @@ static int rocker_port_bridge_ageing_time(struct rocker_port *rocker_port,
 					  struct switchdev_trans *trans,
 					  u32 ageing_time)
 {
+	struct rocker *rocker = rocker_port->rocker;
+
 	if (!switchdev_trans_ph_prepare(trans)) {
 		rocker_port->ageing_time = clock_t_to_jiffies(ageing_time);
+		if (rocker_port->ageing_time < rocker->ageing_time)
+			rocker->ageing_time = rocker_port->ageing_time;
 		mod_timer(&rocker_port->rocker->fdb_cleanup_timer, jiffies);
 	}
 
@@ -5206,10 +5211,13 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto err_init_tbls;
 	}
 
+	rocker->ageing_time = BR_DEFAULT_AGEING_TIME;
 	setup_timer(&rocker->fdb_cleanup_timer, rocker_fdb_cleanup,
 		    (unsigned long) rocker);
 	mod_timer(&rocker->fdb_cleanup_timer, jiffies);
 
+	rocker->ageing_time = BR_DEFAULT_AGEING_TIME;
+
 	err = rocker_probe_ports(rocker);
 	if (err) {
 		dev_err(&pdev->dev, "failed to probe ports\n");

From acbea202fbba11c52df2fd4040c19bb796fd37fa Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemming@brocade.com>
Date: Tue, 8 Mar 2016 12:59:35 -0800
Subject: [PATCH 363/797] bridge: allow zero ageing time

[ Upstream commit 4c656c13b254d598e83e586b7b4d36a2043dad85 ]

This fixes a regression in the bridge ageing time caused by:
commit c62987bbd8a1 ("bridge: push bridge setting ageing_time down to switchdev")

There are users of Linux bridge which use the feature that if ageing time
is set to 0 it causes entries to never expire. See:
  https://www.linuxfoundation.org/collaborate/workgroups/networking/bridge

For a pure software bridge, it is unnecessary for the code to have
arbitrary restrictions on what values are allowable.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/if_bridge.h |  4 ----
 net/bridge/br_stp.c       | 11 ++++++++---
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index a338a688ee4a..dcb89e3515db 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -46,10 +46,6 @@ struct br_ip_list {
 #define BR_LEARNING_SYNC	BIT(9)
 #define BR_PROXYARP_WIFI	BIT(10)
 
-/* values as per ieee8021QBridgeFdbAgingTime */
-#define BR_MIN_AGEING_TIME	(10 * HZ)
-#define BR_MAX_AGEING_TIME	(1000000 * HZ)
-
 #define BR_DEFAULT_AGEING_TIME	(300 * HZ)
 
 extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 5f3f64553179..0e658f47a5da 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -567,6 +567,14 @@ int br_set_max_age(struct net_bridge *br, unsigned long val)
 
 }
 
+/* Set time interval that dynamic forwarding entries live
+ * For pure software bridge, allow values outside the 802.1
+ * standard specification for special cases:
+ *  0 - entry never ages (all permanant)
+ *  1 - entry disappears (no persistance)
+ *
+ * Offloaded switch entries maybe more restrictive
+ */
 int br_set_ageing_time(struct net_bridge *br, u32 ageing_time)
 {
 	struct switchdev_attr attr = {
@@ -577,9 +585,6 @@ int br_set_ageing_time(struct net_bridge *br, u32 ageing_time)
 	unsigned long t = clock_t_to_jiffies(ageing_time);
 	int err;
 
-	if (t < BR_MIN_AGEING_TIME || t > BR_MAX_AGEING_TIME)
-		return -ERANGE;
-
 	err = switchdev_port_attr_set(br->dev, &attr);
 	if (err)
 		return err;

From 54789759917f127cfadcca730f44ea67d557a9b0 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 13 Mar 2016 23:28:00 -0400
Subject: [PATCH 364/797] ipv4: Don't do expensive useless work during inetdev
 destroy.

[ Upstream commit fbd40ea0180a2d328c5adc61414dc8bab9335ce2 ]

When an inetdev is destroyed, every address assigned to the interface
is removed.  And in this scenerio we do two pointless things which can
be very expensive if the number of assigned interfaces is large:

1) Address promotion.  We are deleting all addresses, so there is no
   point in doing this.

2) A full nf conntrack table purge for every address.  We only need to
   do this once, as is already caught by the existing
   masq_dev_notifier so masq_inet_event() can skip this.

Reported-by: Solar Designer <solar@openwall.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Tested-by: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/devinet.c                          |  4 ++++
 net/ipv4/fib_frontend.c                     |  4 ++++
 net/ipv4/netfilter/nf_nat_masquerade_ipv4.c | 12 ++++++++++--
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index f6303b17546b..0212591b0077 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -334,6 +334,9 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 
 	ASSERT_RTNL();
 
+	if (in_dev->dead)
+		goto no_promotions;
+
 	/* 1. Deleting primary ifaddr forces deletion all secondaries
 	 * unless alias promotion is set
 	 **/
@@ -380,6 +383,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 			fib_del_ifaddr(ifa, ifa1);
 	}
 
+no_promotions:
 	/* 2. Unlink it */
 
 	*ifap = ifa1->ifa_next;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 473447593060..21add552e56a 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -922,6 +922,9 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
 		subnet = 1;
 	}
 
+	if (in_dev->dead)
+		goto no_promotions;
+
 	/* Deletion is more complicated than add.
 	 * We should take care of not to delete too much :-)
 	 *
@@ -997,6 +1000,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
 		}
 	}
 
+no_promotions:
 	if (!(ok & BRD_OK))
 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
 	if (subnet && ifa->ifa_prefixlen < 31) {
diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
index c6eb42100e9a..ea91058b5f6f 100644
--- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
@@ -108,10 +108,18 @@ static int masq_inet_event(struct notifier_block *this,
 			   unsigned long event,
 			   void *ptr)
 {
-	struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev;
+	struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev;
 	struct netdev_notifier_info info;
 
-	netdev_notifier_info_init(&info, dev);
+	/* The masq_dev_notifier will catch the case of the device going
+	 * down.  So if the inetdev is dead and being destroyed we have
+	 * no work to do.  Otherwise this is an individual address removal
+	 * and we have to perform the flush.
+	 */
+	if (idev->dead)
+		return NOTIFY_DONE;
+
+	netdev_notifier_info_init(&info, idev->dev);
 	return masq_device_event(this, event, &info);
 }
 

From 405f10a39443ae9ccacf51f18511dfc827e09108 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 14 Mar 2016 09:56:35 -0300
Subject: [PATCH 365/797] net: Fix use after free in the recvmmsg exit path

[ Upstream commit 34b88a68f26a75e4fded796f1a49c40f82234b7d ]

The syzkaller fuzzer hit the following use-after-free:

  Call Trace:
   [<ffffffff8175ea0e>] __asan_report_load8_noabort+0x3e/0x40 mm/kasan/report.c:295
   [<ffffffff851cc31a>] __sys_recvmmsg+0x6fa/0x7f0 net/socket.c:2261
   [<     inline     >] SYSC_recvmmsg net/socket.c:2281
   [<ffffffff851cc57f>] SyS_recvmmsg+0x16f/0x180 net/socket.c:2270
   [<ffffffff86332bb6>] entry_SYSCALL_64_fastpath+0x16/0x7a
  arch/x86/entry/entry_64.S:185

And, as Dmitry rightly assessed, that is because we can drop the
reference and then touch it when the underlying recvmsg calls return
some packets and then hit an error, which will make recvmmsg to set
sock->sk->sk_err, oops, fix it.

Reported-and-Tested-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Kostya Serebryany <kcc@google.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Fixes: a2e2725541fa ("net: Introduce recvmmsg socket syscall")
http://lkml.kernel.org/r/20160122211644.GC2470@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/socket.c | 44 ++++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/net/socket.c b/net/socket.c
index d730ef9dfbf0..263b334ec5e4 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2238,31 +2238,31 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
 			break;
 	}
 
+	if (err == 0)
+		goto out_put;
+
+	if (datagrams == 0) {
+		datagrams = err;
+		goto out_put;
+	}
+
+	/*
+	 * We may return less entries than requested (vlen) if the
+	 * sock is non block and there aren't enough datagrams...
+	 */
+	if (err != -EAGAIN) {
+		/*
+		 * ... or  if recvmsg returns an error after we
+		 * received some datagrams, where we record the
+		 * error to return on the next call or if the
+		 * app asks about it using getsockopt(SO_ERROR).
+		 */
+		sock->sk->sk_err = -err;
+	}
 out_put:
 	fput_light(sock->file, fput_needed);
 
-	if (err == 0)
-		return datagrams;
-
-	if (datagrams != 0) {
-		/*
-		 * We may return less entries than requested (vlen) if the
-		 * sock is non block and there aren't enough datagrams...
-		 */
-		if (err != -EAGAIN) {
-			/*
-			 * ... or  if recvmsg returns an error after we
-			 * received some datagrams, where we record the
-			 * error to return on the next call or if the
-			 * app asks about it using getsockopt(SO_ERROR).
-			 */
-			sock->sk->sk_err = -err;
-		}
-
-		return datagrams;
-	}
-
-	return err;
+	return datagrams;
 }
 
 SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,

From 13684fe9dc61c38b4241474ea4f9e28a59c9518c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 14 Mar 2016 15:18:34 +0100
Subject: [PATCH 366/797] mlx4: add missing braces in verify_qp_parameters

[ Upstream commit baefd7015cdb304ce6c94f9679d0486c71954766 ]

The implementation of QP paravirtualization back in linux-3.7 included
some code that looks very dubious, and gcc-6 has grown smart enough
to warn about it:

drivers/net/ethernet/mellanox/mlx4/resource_tracker.c: In function 'verify_qp_parameters':
drivers/net/ethernet/mellanox/mlx4/resource_tracker.c:3154:5: error: statement is indented as if it were guarded by... [-Werror=misleading-indentation]
     if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) {
     ^~
drivers/net/ethernet/mellanox/mlx4/resource_tracker.c:3144:4: note: ...this 'if' clause, but it is not
    if (slave != mlx4_master_func_num(dev))

>From looking at the context, I'm reasonably sure that the indentation
is correct but that it should have contained curly braces from the
start, as the update_gid() function in the same patch correctly does.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 54679e148287 ("mlx4: Implement QP paravirtualization and maintain phys_pkey_cache for smp_snoop")
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index cad6c44df91c..d314d96dcb1c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -3132,7 +3132,7 @@ static int verify_qp_parameters(struct mlx4_dev *dev,
 		case QP_TRANS_RTS2RTS:
 		case QP_TRANS_SQD2SQD:
 		case QP_TRANS_SQD2RTS:
-			if (slave != mlx4_master_func_num(dev))
+			if (slave != mlx4_master_func_num(dev)) {
 				if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) {
 					port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1;
 					if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB)
@@ -3151,6 +3151,7 @@ static int verify_qp_parameters(struct mlx4_dev *dev,
 					if (qp_ctx->alt_path.mgid_index >= num_gids)
 						return -EINVAL;
 				}
+			}
 			break;
 		default:
 			break;

From 6e6ede49a9bdc8e6762216fe1760c4183791676c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 14 Mar 2016 15:18:35 +0100
Subject: [PATCH 367/797] farsync: fix off-by-one bug in fst_add_one

[ Upstream commit e725a66c0202b5f36c2f9d59d26a65c53bbf21f7 ]

gcc-6 finds an out of bounds access in the fst_add_one function
when calculating the end of the mmio area:

drivers/net/wan/farsync.c: In function 'fst_add_one':
drivers/net/wan/farsync.c:418:53: error: index 2 denotes an offset greater than size of 'u8[2][8192] {aka unsigned char[2][8192]}' [-Werror=array-bounds]
 #define BUF_OFFSET(X)   (BFM_BASE + offsetof(struct buf_window, X))
                                                     ^
include/linux/compiler-gcc.h:158:21: note: in definition of macro '__compiler_offsetof'
  __builtin_offsetof(a, b)
                     ^
drivers/net/wan/farsync.c:418:37: note: in expansion of macro 'offsetof'
 #define BUF_OFFSET(X)   (BFM_BASE + offsetof(struct buf_window, X))
                                     ^~~~~~~~
drivers/net/wan/farsync.c:2519:36: note: in expansion of macro 'BUF_OFFSET'
                                  + BUF_OFFSET ( txBuffer[i][NUM_TX_BUFFER][0]);
                                    ^~~~~~~~~~

The warning is correct, but not critical because this appears
to be a write-only variable that is set by each WAN driver but
never accessed afterwards.

I'm taking the minimal fix here, using the correct pointer by
pointing 'mem_end' to the last byte inside of the register area
as all other WAN drivers do, rather than the first byte outside of
it. An alternative would be to just remove the mem_end member
entirely.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wan/farsync.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index 44541dbc5c28..69b994f3b8c5 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c
@@ -2516,7 +2516,7 @@ fst_add_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                 dev->mem_start   = card->phys_mem
                                  + BUF_OFFSET ( txBuffer[i][0][0]);
                 dev->mem_end     = card->phys_mem
-                                 + BUF_OFFSET ( txBuffer[i][NUM_TX_BUFFER][0]);
+                                 + BUF_OFFSET ( txBuffer[i][NUM_TX_BUFFER - 1][LEN_RX_BUFFER - 1]);
                 dev->base_addr   = card->pci_conf;
                 dev->irq         = card->irq;
 

From a317579bb62ec6c1cb6bd7e5d0d8a25a746832f2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 14 Mar 2016 15:18:36 +0100
Subject: [PATCH 368/797] ath9k: fix buffer overrun for ar9287

[ Upstream commit 83d6f1f15f8cce844b0a131cbc63e444620e48b5 ]

Code that was added back in 2.6.38 has an obvious overflow
when accessing a static array, and at the time it was added
only a code comment was put in front of it as a reminder
to have it reviewed properly.

This has not happened, but gcc-6 now points to the specific
overflow:

drivers/net/wireless/ath/ath9k/eeprom.c: In function 'ath9k_hw_get_gain_boundaries_pdadcs':
drivers/net/wireless/ath/ath9k/eeprom.c:483:44: error: array subscript is above array bounds [-Werror=array-bounds]
     maxPwrT4[i] = data_9287[idxL].pwrPdg[i][4];
                   ~~~~~~~~~~~~~~~~~~~~~~~~~^~~

It turns out that the correct array length exists in the local
'intercepts' variable of this function, so we can just use that
instead of hardcoding '4', so this patch changes all three
instances to use that variable. The other two instances were
already correct, but it's more consistent this way.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 940cd2c12ebf ("ath9k_hw: merge the ar9287 version of ath9k_hw_get_gain_boundaries_pdadcs")
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ath/ath9k/eeprom.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/ath/ath9k/eeprom.c b/drivers/net/wireless/ath/ath9k/eeprom.c
index cc81482c934d..113a43fca9cf 100644
--- a/drivers/net/wireless/ath/ath9k/eeprom.c
+++ b/drivers/net/wireless/ath/ath9k/eeprom.c
@@ -403,10 +403,9 @@ void ath9k_hw_get_gain_boundaries_pdadcs(struct ath_hw *ah,
 
 	if (match) {
 		if (AR_SREV_9287(ah)) {
-			/* FIXME: array overrun? */
 			for (i = 0; i < numXpdGains; i++) {
 				minPwrT4[i] = data_9287[idxL].pwrPdg[i][0];
-				maxPwrT4[i] = data_9287[idxL].pwrPdg[i][4];
+				maxPwrT4[i] = data_9287[idxL].pwrPdg[i][intercepts - 1];
 				ath9k_hw_fill_vpd_table(minPwrT4[i], maxPwrT4[i],
 						data_9287[idxL].pwrPdg[i],
 						data_9287[idxL].vpdPdg[i],
@@ -416,7 +415,7 @@ void ath9k_hw_get_gain_boundaries_pdadcs(struct ath_hw *ah,
 		} else if (eeprom_4k) {
 			for (i = 0; i < numXpdGains; i++) {
 				minPwrT4[i] = data_4k[idxL].pwrPdg[i][0];
-				maxPwrT4[i] = data_4k[idxL].pwrPdg[i][4];
+				maxPwrT4[i] = data_4k[idxL].pwrPdg[i][intercepts - 1];
 				ath9k_hw_fill_vpd_table(minPwrT4[i], maxPwrT4[i],
 						data_4k[idxL].pwrPdg[i],
 						data_4k[idxL].vpdPdg[i],
@@ -426,7 +425,7 @@ void ath9k_hw_get_gain_boundaries_pdadcs(struct ath_hw *ah,
 		} else {
 			for (i = 0; i < numXpdGains; i++) {
 				minPwrT4[i] = data_def[idxL].pwrPdg[i][0];
-				maxPwrT4[i] = data_def[idxL].pwrPdg[i][4];
+				maxPwrT4[i] = data_def[idxL].pwrPdg[i][intercepts - 1];
 				ath9k_hw_fill_vpd_table(minPwrT4[i], maxPwrT4[i],
 						data_def[idxL].pwrPdg[i],
 						data_def[idxL].vpdPdg[i],

From 029464a380858e54ab750a5a536a0bdcd7180b1f Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Mon, 14 Mar 2016 21:17:16 +0100
Subject: [PATCH 369/797] ppp: ensure file->private_data can't be overridden

[ Upstream commit e8e56ffd9d2973398b60ece1f1bebb8d67b4d032 ]

Locking ppp_mutex must be done before dereferencing file->private_data,
otherwise it could be modified before ppp_unattached_ioctl() takes the
lock. This could lead ppp_unattached_ioctl() to override ->private_data,
thus leaking reference to the ppp_file previously pointed to.

v2: lock all ppp_ioctl() instead of just checking private_data in
    ppp_unattached_ioctl(), to avoid ambiguous behaviour.

Fixes: f3ff8a4d80e8 ("ppp: push BKL down into the driver")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ppp/ppp_generic.c | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 40b303ed63b7..35e8b5a6fd93 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -567,7 +567,7 @@ static int get_filter(void __user *arg, struct sock_filter **p)
 
 static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-	struct ppp_file *pf = file->private_data;
+	struct ppp_file *pf;
 	struct ppp *ppp;
 	int err = -EFAULT, val, val2, i;
 	struct ppp_idle idle;
@@ -577,9 +577,14 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	void __user *argp = (void __user *)arg;
 	int __user *p = argp;
 
-	if (!pf)
-		return ppp_unattached_ioctl(current->nsproxy->net_ns,
-					pf, file, cmd, arg);
+	mutex_lock(&ppp_mutex);
+
+	pf = file->private_data;
+	if (!pf) {
+		err = ppp_unattached_ioctl(current->nsproxy->net_ns,
+					   pf, file, cmd, arg);
+		goto out;
+	}
 
 	if (cmd == PPPIOCDETACH) {
 		/*
@@ -594,7 +599,6 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		 * this fd and reopening /dev/ppp.
 		 */
 		err = -EINVAL;
-		mutex_lock(&ppp_mutex);
 		if (pf->kind == INTERFACE) {
 			ppp = PF_TO_PPP(pf);
 			rtnl_lock();
@@ -608,15 +612,13 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		} else
 			pr_warn("PPPIOCDETACH file->f_count=%ld\n",
 				atomic_long_read(&file->f_count));
-		mutex_unlock(&ppp_mutex);
-		return err;
+		goto out;
 	}
 
 	if (pf->kind == CHANNEL) {
 		struct channel *pch;
 		struct ppp_channel *chan;
 
-		mutex_lock(&ppp_mutex);
 		pch = PF_TO_CHANNEL(pf);
 
 		switch (cmd) {
@@ -638,17 +640,16 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 				err = chan->ops->ioctl(chan, cmd, arg);
 			up_read(&pch->chan_sem);
 		}
-		mutex_unlock(&ppp_mutex);
-		return err;
+		goto out;
 	}
 
 	if (pf->kind != INTERFACE) {
 		/* can't happen */
 		pr_err("PPP: not interface or channel??\n");
-		return -EINVAL;
+		err = -EINVAL;
+		goto out;
 	}
 
-	mutex_lock(&ppp_mutex);
 	ppp = PF_TO_PPP(pf);
 	switch (cmd) {
 	case PPPIOCSMRU:
@@ -823,7 +824,10 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	default:
 		err = -ENOTTY;
 	}
+
+out:
 	mutex_unlock(&ppp_mutex);
+
 	return err;
 }
 
@@ -836,7 +840,6 @@ static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf,
 	struct ppp_net *pn;
 	int __user *p = (int __user *)arg;
 
-	mutex_lock(&ppp_mutex);
 	switch (cmd) {
 	case PPPIOCNEWUNIT:
 		/* Create a new ppp unit */
@@ -886,7 +889,7 @@ static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf,
 	default:
 		err = -ENOTTY;
 	}
-	mutex_unlock(&ppp_mutex);
+
 	return err;
 }
 

From bd33d14acf43bdb040f203555b13765cd2b23d9e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 16 Mar 2016 22:52:15 -0700
Subject: [PATCH 370/797] tcp/dccp: remove obsolete WARN_ON() in icmp handlers

[ Upstream commit e316ea62e3203d524ff0239a40c56d3a39ad1b5c ]

Now SYN_RECV request sockets are installed in ehash table, an ICMP
handler can find a request socket while another cpu handles an incoming
packet transforming this SYN_RECV request socket into an ESTABLISHED
socket.

We need to remove the now obsolete WARN_ON(req->sk), since req->sk
is set when a new child is created and added into listener accept queue.

If this race happens, the ICMP will do nothing special.

Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Ben Lazarus <blazarus@google.com>
Reported-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/dccp/ipv4.c     | 2 --
 net/ipv4/tcp_ipv4.c | 2 --
 2 files changed, 4 deletions(-)

diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 902d606324a0..8be8f27bfacc 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -204,8 +204,6 @@ void dccp_req_err(struct sock *sk, u64 seq)
 	 * ICMPs are not backlogged, hence we cannot get an established
 	 * socket here.
 	 */
-	WARN_ON(req->sk);
-
 	if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) {
 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 	} else {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8c7e63163e92..048418b049d8 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -320,8 +320,6 @@ void tcp_req_err(struct sock *sk, u32 seq, bool abort)
 	/* ICMPs are not backlogged, hence we cannot get
 	 * an established socket here.
 	 */
-	WARN_ON(req->sk);
-
 	if (seq != tcp_rsk(req)->snt_isn) {
 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 	} else if (abort) {

From a5ce25f61bce0ffb1ff59071c06b948277c90a28 Mon Sep 17 00:00:00 2001
From: Manish Chopra <manish.chopra@qlogic.com>
Date: Tue, 15 Mar 2016 07:13:45 -0400
Subject: [PATCH 371/797] qlge: Fix receive packets drop.

[ Upstream commit 2c9a266afefe137bff06bbe0fc48b4d3b3cb348c ]

When running small packets [length < 256 bytes] traffic, packets were
being dropped due to invalid data in those packets which were
delivered by the driver upto the stack. Using pci_dma_sync_single_for_cpu
ensures copying latest and updated data into skb from the receive buffer.

Signed-off-by: Sony Chacko <sony.chacko@qlogic.com>
Signed-off-by: Manish Chopra <manish.chopra@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/qlogic/qlge/qlge_main.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
index 997976426799..b28e73ea2c25 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
@@ -1648,7 +1648,18 @@ static void ql_process_mac_rx_skb(struct ql_adapter *qdev,
 		return;
 	}
 	skb_reserve(new_skb, NET_IP_ALIGN);
+
+	pci_dma_sync_single_for_cpu(qdev->pdev,
+				    dma_unmap_addr(sbq_desc, mapaddr),
+				    dma_unmap_len(sbq_desc, maplen),
+				    PCI_DMA_FROMDEVICE);
+
 	memcpy(skb_put(new_skb, length), skb->data, length);
+
+	pci_dma_sync_single_for_device(qdev->pdev,
+				       dma_unmap_addr(sbq_desc, mapaddr),
+				       dma_unmap_len(sbq_desc, maplen),
+				       PCI_DMA_FROMDEVICE);
 	skb = new_skb;
 
 	/* Frame error, so drop the packet. */

From 7a0e9a08642993bfde0dd03a5a3f825869cc4d06 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 17 Mar 2016 11:57:06 -0700
Subject: [PATCH 372/797] net: bcmgenet: fix dma api length mismatch

[ Upstream commit eee577232203842b4dcadb7ab477a298479633ed ]

When un-mapping skb->data in __bcmgenet_tx_reclaim(),
we must use the length that was used in original dma_map_single(),
instead of skb->len that might be bigger (includes the frags)

We simply can store skb_len into tx_cb_ptr->dma_len and use it
at unmap time.

Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 17f017ab4dac..0fb3f8de88e9 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -1197,7 +1197,7 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev,
 			dev->stats.tx_bytes += tx_cb_ptr->skb->len;
 			dma_unmap_single(&dev->dev,
 					 dma_unmap_addr(tx_cb_ptr, dma_addr),
-					 tx_cb_ptr->skb->len,
+					 dma_unmap_len(tx_cb_ptr, dma_len),
 					 DMA_TO_DEVICE);
 			bcmgenet_free_cb(tx_cb_ptr);
 		} else if (dma_unmap_addr(tx_cb_ptr, dma_addr)) {
@@ -1308,7 +1308,7 @@ static int bcmgenet_xmit_single(struct net_device *dev,
 	}
 
 	dma_unmap_addr_set(tx_cb_ptr, dma_addr, mapping);
-	dma_unmap_len_set(tx_cb_ptr, dma_len, skb->len);
+	dma_unmap_len_set(tx_cb_ptr, dma_len, skb_len);
 	length_status = (skb_len << DMA_BUFLENGTH_SHIFT) | dma_desc_flags |
 			(priv->hw_params->qtag_mask << DMA_TX_QTAG_SHIFT) |
 			DMA_TX_APPEND_CRC;

From 8178211eb7948b40b1f730e2d0b9b0a7a2ed62d1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 17 Mar 2016 17:23:36 -0700
Subject: [PATCH 373/797] bonding: fix bond_get_stats()

[ Upstream commit fe30937b65354c7fec244caebbdaae68e28ca797 ]

bond_get_stats() can be called from rtnetlink (with RTNL held)
or from /proc/net/dev seq handler (with RCU held)

The logic added in commit 5f0c5f73e5ef ("bonding: make global bonding
stats more reliable") kind of assumed only one cpu could run there.

If multiple threads are reading /proc/net/dev, stats can be really
messed up after a while.

A second problem is that some fields are 32bit, so we need to properly
handle the wrap around problem.

Given that RTNL is not always held, we need to use
bond_for_each_slave_rcu().

Fixes: 5f0c5f73e5ef ("bonding: make global bonding stats more reliable")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Andy Gospodarek <gospo@cumulusnetworks.com>
Cc: Jay Vosburgh <j.vosburgh@gmail.com>
Cc: Veaceslav Falico <vfalico@gmail.com>
Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/bonding/bond_main.c | 63 ++++++++++++++++++---------------
 include/net/bonding.h           |  1 +
 2 files changed, 35 insertions(+), 29 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 28bbca0af238..b3d70a7a5262 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3260,6 +3260,30 @@ static int bond_close(struct net_device *bond_dev)
 	return 0;
 }
 
+/* fold stats, assuming all rtnl_link_stats64 fields are u64, but
+ * that some drivers can provide 32bit values only.
+ */
+static void bond_fold_stats(struct rtnl_link_stats64 *_res,
+			    const struct rtnl_link_stats64 *_new,
+			    const struct rtnl_link_stats64 *_old)
+{
+	const u64 *new = (const u64 *)_new;
+	const u64 *old = (const u64 *)_old;
+	u64 *res = (u64 *)_res;
+	int i;
+
+	for (i = 0; i < sizeof(*_res) / sizeof(u64); i++) {
+		u64 nv = new[i];
+		u64 ov = old[i];
+
+		/* detects if this particular field is 32bit only */
+		if (((nv | ov) >> 32) == 0)
+			res[i] += (u32)nv - (u32)ov;
+		else
+			res[i] += nv - ov;
+	}
+}
+
 static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
 						struct rtnl_link_stats64 *stats)
 {
@@ -3268,43 +3292,23 @@ static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
 	struct list_head *iter;
 	struct slave *slave;
 
+	spin_lock(&bond->stats_lock);
 	memcpy(stats, &bond->bond_stats, sizeof(*stats));
 
-	bond_for_each_slave(bond, slave, iter) {
-		const struct rtnl_link_stats64 *sstats =
+	rcu_read_lock();
+	bond_for_each_slave_rcu(bond, slave, iter) {
+		const struct rtnl_link_stats64 *new =
 			dev_get_stats(slave->dev, &temp);
-		struct rtnl_link_stats64 *pstats = &slave->slave_stats;
 
-		stats->rx_packets +=  sstats->rx_packets - pstats->rx_packets;
-		stats->rx_bytes += sstats->rx_bytes - pstats->rx_bytes;
-		stats->rx_errors += sstats->rx_errors - pstats->rx_errors;
-		stats->rx_dropped += sstats->rx_dropped - pstats->rx_dropped;
-
-		stats->tx_packets += sstats->tx_packets - pstats->tx_packets;;
-		stats->tx_bytes += sstats->tx_bytes - pstats->tx_bytes;
-		stats->tx_errors += sstats->tx_errors - pstats->tx_errors;
-		stats->tx_dropped += sstats->tx_dropped - pstats->tx_dropped;
-
-		stats->multicast += sstats->multicast - pstats->multicast;
-		stats->collisions += sstats->collisions - pstats->collisions;
-
-		stats->rx_length_errors += sstats->rx_length_errors - pstats->rx_length_errors;
-		stats->rx_over_errors += sstats->rx_over_errors - pstats->rx_over_errors;
-		stats->rx_crc_errors += sstats->rx_crc_errors - pstats->rx_crc_errors;
-		stats->rx_frame_errors += sstats->rx_frame_errors - pstats->rx_frame_errors;
-		stats->rx_fifo_errors += sstats->rx_fifo_errors - pstats->rx_fifo_errors;
-		stats->rx_missed_errors += sstats->rx_missed_errors - pstats->rx_missed_errors;
-
-		stats->tx_aborted_errors += sstats->tx_aborted_errors - pstats->tx_aborted_errors;
-		stats->tx_carrier_errors += sstats->tx_carrier_errors - pstats->tx_carrier_errors;
-		stats->tx_fifo_errors += sstats->tx_fifo_errors - pstats->tx_fifo_errors;
-		stats->tx_heartbeat_errors += sstats->tx_heartbeat_errors - pstats->tx_heartbeat_errors;
-		stats->tx_window_errors += sstats->tx_window_errors - pstats->tx_window_errors;
+		bond_fold_stats(stats, new, &slave->slave_stats);
 
 		/* save off the slave stats for the next run */
-		memcpy(pstats, sstats, sizeof(*sstats));
+		memcpy(&slave->slave_stats, new, sizeof(*new));
 	}
+	rcu_read_unlock();
+
 	memcpy(&bond->bond_stats, stats, sizeof(*stats));
+	spin_unlock(&bond->stats_lock);
 
 	return stats;
 }
@@ -4118,6 +4122,7 @@ void bond_setup(struct net_device *bond_dev)
 	struct bonding *bond = netdev_priv(bond_dev);
 
 	spin_lock_init(&bond->mode_lock);
+	spin_lock_init(&bond->stats_lock);
 	bond->params = bonding_defaults;
 
 	/* Initialize pointers */
diff --git a/include/net/bonding.h b/include/net/bonding.h
index c1740a2794a3..93abe5f6188d 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -214,6 +214,7 @@ struct bonding {
 	 * ALB mode (6) - to sync the use and modifications of its hash table
 	 */
 	spinlock_t mode_lock;
+	spinlock_t stats_lock;
 	u8	 send_peer_notif;
 	u8       igmp_retrans;
 #ifdef CONFIG_PROC_FS

From 2ddb181390475f4902406baa008c220f39aeaa69 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 22 Mar 2016 09:19:38 +0100
Subject: [PATCH 374/797] ipv4: fix broadcast packets reception

[ Upstream commit ad0ea1989cc4d5905941d0a9e62c63ad6d859cef ]

Currently, ingress ipv4 broadcast datagrams are dropped since,
in udp_v4_early_demux(), ip_check_mc_rcu() is invoked even on
bcast packets.

This patch addresses the issue, invoking ip_check_mc_rcu()
only for mcast packets.

Fixes: 6e5403093261 ("ipv4/udp: Verify multicast group is ours in upd_v4_early_demux()")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/udp.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 7f8ab46adf61..21fbb54f11d0 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1990,10 +1990,14 @@ void udp_v4_early_demux(struct sk_buff *skb)
 		if (!in_dev)
 			return;
 
-		ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr,
-				       iph->protocol);
-		if (!ours)
-			return;
+		/* we are supposed to accept bcast packets */
+		if (skb->pkt_type == PACKET_MULTICAST) {
+			ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr,
+					       iph->protocol);
+			if (!ours)
+				return;
+		}
+
 		sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr,
 						   uh->source, iph->saddr, dif);
 	} else if (skb->pkt_type == PACKET_HOST) {

From 80de2e4115130a392dd528fe023a2508c15617a4 Mon Sep 17 00:00:00 2001
From: Lance Richardson <lrichard@redhat.com>
Date: Tue, 22 Mar 2016 14:56:57 -0400
Subject: [PATCH 375/797] ipv4: initialize flowi4_flags before calling
 fib_lookup()

[ Upstream commit 4cfc86f3dae6ca38ed49cdd78f458a03d4d87992 ]

Field fl4.flowi4_flags is not initialized in fib_compute_spec_dst()
before calling fib_lookup(), which means fib_table_lookup() is
using non-deterministic data at this line:

	if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) {

Fix by initializing the entire fl4 structure, which will prevent
similar issues as fields are added in the future by ensuring that
all fields are initialized to zero unless explicitly initialized
to another value.

Fixes: 58189ca7b2741 ("net: Fix vti use case with oif in dst lookups")
Suggested-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: Lance Richardson <lrichard@redhat.com>
Acked-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/fib_frontend.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 21add552e56a..8a9246deccfe 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -280,7 +280,6 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
 	struct in_device *in_dev;
 	struct fib_result res;
 	struct rtable *rt;
-	struct flowi4 fl4;
 	struct net *net;
 	int scope;
 
@@ -296,14 +295,13 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
 
 	scope = RT_SCOPE_UNIVERSE;
 	if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) {
-		fl4.flowi4_oif = 0;
-		fl4.flowi4_iif = LOOPBACK_IFINDEX;
-		fl4.daddr = ip_hdr(skb)->saddr;
-		fl4.saddr = 0;
-		fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
-		fl4.flowi4_scope = scope;
-		fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0;
-		fl4.flowi4_tun_key.tun_id = 0;
+		struct flowi4 fl4 = {
+			.flowi4_iif = LOOPBACK_IFINDEX,
+			.daddr = ip_hdr(skb)->saddr,
+			.flowi4_tos = RT_TOS(ip_hdr(skb)->tos),
+			.flowi4_scope = scope,
+			.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0,
+		};
 		if (!fib_lookup(net, &fl4, &res, 0))
 			return FIB_RES_PREFSRC(net, res);
 	} else {

From 046ea8180ecaf5d8b5823e17714a09526ad7d321 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Wed, 23 Mar 2016 16:38:55 +0100
Subject: [PATCH 376/797] ppp: take reference on channels netns

[ Upstream commit 1f461dcdd296eecedaffffc6bae2bfa90bd7eb89 ]

Let channels hold a reference on their network namespace.
Some channel types, like ppp_async and ppp_synctty, can have their
userspace controller running in a different namespace. Therefore they
can't rely on them to preclude their netns from being removed from
under them.

==================================================================
BUG: KASAN: use-after-free in ppp_unregister_channel+0x372/0x3a0 at
addr ffff880064e217e0
Read of size 8 by task syz-executor/11581
=============================================================================
BUG net_namespace (Not tainted): kasan: bad access detected
-----------------------------------------------------------------------------

Disabling lock debugging due to kernel taint
INFO: Allocated in copy_net_ns+0x6b/0x1a0 age=92569 cpu=3 pid=6906
[<      none      >] ___slab_alloc+0x4c7/0x500 kernel/mm/slub.c:2440
[<      none      >] __slab_alloc+0x4c/0x90 kernel/mm/slub.c:2469
[<     inline     >] slab_alloc_node kernel/mm/slub.c:2532
[<     inline     >] slab_alloc kernel/mm/slub.c:2574
[<      none      >] kmem_cache_alloc+0x23a/0x2b0 kernel/mm/slub.c:2579
[<     inline     >] kmem_cache_zalloc kernel/include/linux/slab.h:597
[<     inline     >] net_alloc kernel/net/core/net_namespace.c:325
[<      none      >] copy_net_ns+0x6b/0x1a0 kernel/net/core/net_namespace.c:360
[<      none      >] create_new_namespaces+0x2f6/0x610 kernel/kernel/nsproxy.c:95
[<      none      >] copy_namespaces+0x297/0x320 kernel/kernel/nsproxy.c:150
[<      none      >] copy_process.part.35+0x1bf4/0x5760 kernel/kernel/fork.c:1451
[<     inline     >] copy_process kernel/kernel/fork.c:1274
[<      none      >] _do_fork+0x1bc/0xcb0 kernel/kernel/fork.c:1723
[<     inline     >] SYSC_clone kernel/kernel/fork.c:1832
[<      none      >] SyS_clone+0x37/0x50 kernel/kernel/fork.c:1826
[<      none      >] entry_SYSCALL_64_fastpath+0x16/0x7a kernel/arch/x86/entry/entry_64.S:185

INFO: Freed in net_drop_ns+0x67/0x80 age=575 cpu=2 pid=2631
[<      none      >] __slab_free+0x1fc/0x320 kernel/mm/slub.c:2650
[<     inline     >] slab_free kernel/mm/slub.c:2805
[<      none      >] kmem_cache_free+0x2a0/0x330 kernel/mm/slub.c:2814
[<     inline     >] net_free kernel/net/core/net_namespace.c:341
[<      none      >] net_drop_ns+0x67/0x80 kernel/net/core/net_namespace.c:348
[<      none      >] cleanup_net+0x4e5/0x600 kernel/net/core/net_namespace.c:448
[<      none      >] process_one_work+0x794/0x1440 kernel/kernel/workqueue.c:2036
[<      none      >] worker_thread+0xdb/0xfc0 kernel/kernel/workqueue.c:2170
[<      none      >] kthread+0x23f/0x2d0 kernel/drivers/block/aoe/aoecmd.c:1303
[<      none      >] ret_from_fork+0x3f/0x70 kernel/arch/x86/entry/entry_64.S:468
INFO: Slab 0xffffea0001938800 objects=3 used=0 fp=0xffff880064e20000
flags=0x5fffc0000004080
INFO: Object 0xffff880064e20000 @offset=0 fp=0xffff880064e24200

CPU: 1 PID: 11581 Comm: syz-executor Tainted: G    B           4.4.0+
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
rel-1.8.2-0-g33fbe13 by qemu-project.org 04/01/2014
 00000000ffffffff ffff8800662c7790 ffffffff8292049d ffff88003e36a300
 ffff880064e20000 ffff880064e20000 ffff8800662c77c0 ffffffff816f2054
 ffff88003e36a300 ffffea0001938800 ffff880064e20000 0000000000000000
Call Trace:
 [<     inline     >] __dump_stack kernel/lib/dump_stack.c:15
 [<ffffffff8292049d>] dump_stack+0x6f/0xa2 kernel/lib/dump_stack.c:50
 [<ffffffff816f2054>] print_trailer+0xf4/0x150 kernel/mm/slub.c:654
 [<ffffffff816f875f>] object_err+0x2f/0x40 kernel/mm/slub.c:661
 [<     inline     >] print_address_description kernel/mm/kasan/report.c:138
 [<ffffffff816fb0c5>] kasan_report_error+0x215/0x530 kernel/mm/kasan/report.c:236
 [<     inline     >] kasan_report kernel/mm/kasan/report.c:259
 [<ffffffff816fb4de>] __asan_report_load8_noabort+0x3e/0x40 kernel/mm/kasan/report.c:280
 [<     inline     >] ? ppp_pernet kernel/include/linux/compiler.h:218
 [<ffffffff83ad71b2>] ? ppp_unregister_channel+0x372/0x3a0 kernel/drivers/net/ppp/ppp_generic.c:2392
 [<     inline     >] ppp_pernet kernel/include/linux/compiler.h:218
 [<ffffffff83ad71b2>] ppp_unregister_channel+0x372/0x3a0 kernel/drivers/net/ppp/ppp_generic.c:2392
 [<     inline     >] ? ppp_pernet kernel/drivers/net/ppp/ppp_generic.c:293
 [<ffffffff83ad6f26>] ? ppp_unregister_channel+0xe6/0x3a0 kernel/drivers/net/ppp/ppp_generic.c:2392
 [<ffffffff83ae18f3>] ppp_asynctty_close+0xa3/0x130 kernel/drivers/net/ppp/ppp_async.c:241
 [<ffffffff83ae1850>] ? async_lcp_peek+0x5b0/0x5b0 kernel/drivers/net/ppp/ppp_async.c:1000
 [<ffffffff82c33239>] tty_ldisc_close.isra.1+0x99/0xe0 kernel/drivers/tty/tty_ldisc.c:478
 [<ffffffff82c332c0>] tty_ldisc_kill+0x40/0x170 kernel/drivers/tty/tty_ldisc.c:744
 [<ffffffff82c34943>] tty_ldisc_release+0x1b3/0x260 kernel/drivers/tty/tty_ldisc.c:772
 [<ffffffff82c1ef21>] tty_release+0xac1/0x13e0 kernel/drivers/tty/tty_io.c:1901
 [<ffffffff82c1e460>] ? release_tty+0x320/0x320 kernel/drivers/tty/tty_io.c:1688
 [<ffffffff8174de36>] __fput+0x236/0x780 kernel/fs/file_table.c:208
 [<ffffffff8174e405>] ____fput+0x15/0x20 kernel/fs/file_table.c:244
 [<ffffffff813595ab>] task_work_run+0x16b/0x200 kernel/kernel/task_work.c:115
 [<     inline     >] exit_task_work kernel/include/linux/task_work.h:21
 [<ffffffff81307105>] do_exit+0x8b5/0x2c60 kernel/kernel/exit.c:750
 [<ffffffff813fdd20>] ? debug_check_no_locks_freed+0x290/0x290 kernel/kernel/locking/lockdep.c:4123
 [<ffffffff81306850>] ? mm_update_next_owner+0x6f0/0x6f0 kernel/kernel/exit.c:357
 [<ffffffff813215e6>] ? __dequeue_signal+0x136/0x470 kernel/kernel/signal.c:550
 [<ffffffff8132067b>] ? recalc_sigpending_tsk+0x13b/0x180 kernel/kernel/signal.c:145
 [<ffffffff81309628>] do_group_exit+0x108/0x330 kernel/kernel/exit.c:880
 [<ffffffff8132b9d4>] get_signal+0x5e4/0x14f0 kernel/kernel/signal.c:2307
 [<     inline     >] ? kretprobe_table_lock kernel/kernel/kprobes.c:1113
 [<ffffffff8151d355>] ? kprobe_flush_task+0xb5/0x450 kernel/kernel/kprobes.c:1158
 [<ffffffff8115f7d3>] do_signal+0x83/0x1c90 kernel/arch/x86/kernel/signal.c:712
 [<ffffffff8151d2a0>] ? recycle_rp_inst+0x310/0x310 kernel/include/linux/list.h:655
 [<ffffffff8115f750>] ? setup_sigcontext+0x780/0x780 kernel/arch/x86/kernel/signal.c:165
 [<ffffffff81380864>] ? finish_task_switch+0x424/0x5f0 kernel/kernel/sched/core.c:2692
 [<     inline     >] ? finish_lock_switch kernel/kernel/sched/sched.h:1099
 [<ffffffff81380560>] ? finish_task_switch+0x120/0x5f0 kernel/kernel/sched/core.c:2678
 [<     inline     >] ? context_switch kernel/kernel/sched/core.c:2807
 [<ffffffff85d794e9>] ? __schedule+0x919/0x1bd0 kernel/kernel/sched/core.c:3283
 [<ffffffff81003901>] exit_to_usermode_loop+0xf1/0x1a0 kernel/arch/x86/entry/common.c:247
 [<     inline     >] prepare_exit_to_usermode kernel/arch/x86/entry/common.c:282
 [<ffffffff810062ef>] syscall_return_slowpath+0x19f/0x210 kernel/arch/x86/entry/common.c:344
 [<ffffffff85d88022>] int_ret_from_sys_call+0x25/0x9f kernel/arch/x86/entry/entry_64.S:281
Memory state around the buggy address:
 ffff880064e21680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
 ffff880064e21700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
>ffff880064e21780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
                                                       ^
 ffff880064e21800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
 ffff880064e21880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
==================================================================

Fixes: 273ec51dd7ce ("net: ppp_generic - introduce net-namespace functionality v2")
Reported-by: Baozeng Ding <sploving1@gmail.com>
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Reviewed-by: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ppp/ppp_generic.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 35e8b5a6fd93..174e06ec7c2f 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -2293,7 +2293,7 @@ int ppp_register_net_channel(struct net *net, struct ppp_channel *chan)
 
 	pch->ppp = NULL;
 	pch->chan = chan;
-	pch->chan_net = net;
+	pch->chan_net = get_net(net);
 	chan->ppp = pch;
 	init_ppp_file(&pch->file, CHANNEL);
 	pch->file.hdrlen = chan->hdrlen;
@@ -2390,6 +2390,8 @@ ppp_unregister_channel(struct ppp_channel *chan)
 	spin_lock_bh(&pn->all_channels_lock);
 	list_del(&pch->list);
 	spin_unlock_bh(&pn->all_channels_lock);
+	put_net(pch->chan_net);
+	pch->chan_net = NULL;
 
 	pch->file.dead = 1;
 	wake_up_interruptible(&pch->file.rwait);

From 759e8f3896d4e7b6d8f374216ab3ae8191e22213 Mon Sep 17 00:00:00 2001
From: "subashab@codeaurora.org" <subashab@codeaurora.org>
Date: Wed, 23 Mar 2016 22:39:50 -0600
Subject: [PATCH 377/797] xfrm: Fix crash observed during device unregistration
 and decryption

[ Upstream commit 071d36bf21bcc837be00cea55bcef8d129e7f609 ]

A crash is observed when a decrypted packet is processed in receive
path. get_rps_cpus() tries to dereference the skb->dev fields but it
appears that the device is freed from the poison pattern.

[<ffffffc000af58ec>] get_rps_cpu+0x94/0x2f0
[<ffffffc000af5f94>] netif_rx_internal+0x140/0x1cc
[<ffffffc000af6094>] netif_rx+0x74/0x94
[<ffffffc000bc0b6c>] xfrm_input+0x754/0x7d0
[<ffffffc000bc0bf8>] xfrm_input_resume+0x10/0x1c
[<ffffffc000ba6eb8>] esp_input_done+0x20/0x30
[<ffffffc0000b64c8>] process_one_work+0x244/0x3fc
[<ffffffc0000b7324>] worker_thread+0x2f8/0x418
[<ffffffc0000bb40c>] kthread+0xe0/0xec

-013|get_rps_cpu(
     |    dev = 0xFFFFFFC08B688000,
     |    skb = 0xFFFFFFC0C76AAC00 -> (
     |      dev = 0xFFFFFFC08B688000 -> (
     |        name =
"......................................................
     |        name_hlist = (next = 0xAAAAAAAAAAAAAAAA, pprev =
0xAAAAAAAAAAA

Following are the sequence of events observed -

- Encrypted packet in receive path from netdevice is queued
- Encrypted packet queued for decryption (asynchronous)
- Netdevice brought down and freed
- Packet is decrypted and returned through callback in esp_input_done
- Packet is queued again for process in network stack using netif_rx

Since the device appears to have been freed, the dereference of
skb->dev in get_rps_cpus() leads to an unhandled page fault
exception.

Fix this by holding on to device reference when queueing packets
asynchronously and releasing the reference on call back return.

v2: Make the change generic to xfrm as mentioned by Steffen and
update the title to xfrm

Suggested-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jerome Stanislaus <jeromes@codeaurora.org>
Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/xfrm/xfrm_input.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index ad7f5b3f9b61..1c4ad477ce93 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -292,12 +292,15 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 		XFRM_SKB_CB(skb)->seq.input.hi = seq_hi;
 
 		skb_dst_force(skb);
+		dev_hold(skb->dev);
 
 		nexthdr = x->type->input(x, skb);
 
 		if (nexthdr == -EINPROGRESS)
 			return 0;
 resume:
+		dev_put(skb->dev);
+
 		spin_lock(&x->lock);
 		if (nexthdr <= 0) {
 			if (nexthdr == -EBADMSG) {

From 9603d0a58d3069a1ec9fc94090d470ae520118f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Mon, 28 Mar 2016 22:38:16 +0200
Subject: [PATCH 378/797] qmi_wwan: add "D-Link DWM-221 B1" device id
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit e84810c7b85a2d7897797b3ad3e879168a8e032a ]

Thomas reports:
"Windows:

00 diagnostics
01 modem
02 at-port
03 nmea
04 nic

Linux:

T:  Bus=02 Lev=01 Prnt=01 Port=03 Cnt=01 Dev#=  4 Spd=480 MxCh= 0
D:  Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs=  1
P:  Vendor=2001 ProdID=7e19 Rev=02.32
S:  Manufacturer=Mobile Connect
S:  Product=Mobile Connect
S:  SerialNumber=0123456789ABCDEF
C:  #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA
I:  If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
I:  If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
I:  If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
I:  If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
I:  If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan
I:  If#= 5 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage"

Reported-by: Thomas Schäfer <tschaefer@t-online.de>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/usb/qmi_wwan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index df77467c7e93..a34f491224c1 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -699,6 +699,7 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x19d2, 0x1426, 2)},	/* ZTE MF91 */
 	{QMI_FIXED_INTF(0x19d2, 0x1428, 2)},	/* Telewell TW-LTE 4G v2 */
 	{QMI_FIXED_INTF(0x19d2, 0x2002, 4)},	/* ZTE (Vodafone) K3765-Z */
+	{QMI_FIXED_INTF(0x2001, 0x7e19, 4)},	/* D-Link DWM-221 B1 */
 	{QMI_FIXED_INTF(0x0f3d, 0x68a2, 8)},    /* Sierra Wireless MC7700 */
 	{QMI_FIXED_INTF(0x114f, 0x68a2, 8)},    /* Sierra Wireless MC7750 */
 	{QMI_FIXED_INTF(0x1199, 0x68a2, 8)},	/* Sierra Wireless MC7710 in QMI mode */

From 26dd42ebff94ff481af56704cd1b4dd32ca8579f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 29 Mar 2016 08:43:41 -0700
Subject: [PATCH 379/797] ipv6: udp: fix UDP_MIB_IGNOREDMULTI updates

[ Upstream commit 2d4212261fdf13e29728ddb5ea9d60c342cc92b5 ]

IPv6 counters updates use a different macro than IPv4.

Fixes: 36cbb2452cbaf ("udp: Increment UDP_MIB_IGNOREDMULTI for arriving unmatched multicasts")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Rick Jones <rick.jones2@hp.com>
Cc: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/udp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 1e293a552693..6665e1a0bfe1 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -837,8 +837,8 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 		flush_stack(stack, count, skb, count - 1);
 	} else {
 		if (!inner_flushed)
-			UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI,
-					 proto == IPPROTO_UDPLITE);
+			UDP6_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI,
+					  proto == IPPROTO_UDPLITE);
 		consume_skb(skb);
 	}
 	return 0;

From df371b19630cf045e00f1b09a721bb7103266796 Mon Sep 17 00:00:00 2001
From: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
Date: Tue, 29 Mar 2016 18:48:08 +0800
Subject: [PATCH 380/797] bridge: Allow set bridge ageing time when switchdev
 disabled

[ Upstream commit 5e263f712691615fb802f06c98d7638c378f5d11 ]

When NET_SWITCHDEV=n, switchdev_port_attr_set will return -EOPNOTSUPP,
we should ignore this error code and continue to set the ageing time.

Fixes: c62987bbd8a1 ("bridge: push bridge setting ageing_time down to switchdev")
Signed-off-by: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
Acked-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/bridge/br_stp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 0e658f47a5da..eff69cb270d2 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -586,7 +586,7 @@ int br_set_ageing_time(struct net_bridge *br, u32 ageing_time)
 	int err;
 
 	err = switchdev_port_attr_set(br->dev, &attr);
-	if (err)
+	if (err && err != -EOPNOTSUPP)
 		return err;
 
 	br->ageing_time = t;

From 18baf0e01eefd27156d8a8fc5ade1ad8930a1b3d Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 31 Mar 2016 18:10:31 +0200
Subject: [PATCH 381/797] rtnl: fix msg size calculation in if_nlmsg_size()

[ Upstream commit c57c7a95da842807b475b823ed2e5435c42cb3b0 ]

Size of the attribute IFLA_PHYS_PORT_NAME was missing.

Fixes: db24a9044ee1 ("net: add support for phys_port_name")
CC: David Ahern <dsahern@gmail.com>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/core/rtnetlink.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 34ba7a08876d..ca966f7de351 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -905,6 +905,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
 	       + rtnl_link_get_af_size(dev, ext_filter_mask) /* IFLA_AF_SPEC */
 	       + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */
 	       + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */
+	       + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */
 	       + nla_total_size(1); /* IFLA_PROTO_DOWN */
 
 }

From e137eeb38d2431ded3ec1aff84183258f1dd4162 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 31 Mar 2016 02:13:18 +0200
Subject: [PATCH 382/797] tun, bpf: fix suspicious RCU usage in tun_{attach,
 detach}_filter

[ Upstream commit 5a5abb1fa3b05dd6aa821525832644c1e7d2905f ]

Sasha Levin reported a suspicious rcu_dereference_protected() warning
found while fuzzing with trinity that is similar to this one:

  [   52.765684] net/core/filter.c:2262 suspicious rcu_dereference_protected() usage!
  [   52.765688] other info that might help us debug this:
  [   52.765695] rcu_scheduler_active = 1, debug_locks = 1
  [   52.765701] 1 lock held by a.out/1525:
  [   52.765704]  #0:  (rtnl_mutex){+.+.+.}, at: [<ffffffff816a64b7>] rtnl_lock+0x17/0x20
  [   52.765721] stack backtrace:
  [   52.765728] CPU: 1 PID: 1525 Comm: a.out Not tainted 4.5.0+ #264
  [...]
  [   52.765768] Call Trace:
  [   52.765775]  [<ffffffff813e488d>] dump_stack+0x85/0xc8
  [   52.765784]  [<ffffffff810f2fa5>] lockdep_rcu_suspicious+0xd5/0x110
  [   52.765792]  [<ffffffff816afdc2>] sk_detach_filter+0x82/0x90
  [   52.765801]  [<ffffffffa0883425>] tun_detach_filter+0x35/0x90 [tun]
  [   52.765810]  [<ffffffffa0884ed4>] __tun_chr_ioctl+0x354/0x1130 [tun]
  [   52.765818]  [<ffffffff8136fed0>] ? selinux_file_ioctl+0x130/0x210
  [   52.765827]  [<ffffffffa0885ce3>] tun_chr_ioctl+0x13/0x20 [tun]
  [   52.765834]  [<ffffffff81260ea6>] do_vfs_ioctl+0x96/0x690
  [   52.765843]  [<ffffffff81364af3>] ? security_file_ioctl+0x43/0x60
  [   52.765850]  [<ffffffff81261519>] SyS_ioctl+0x79/0x90
  [   52.765858]  [<ffffffff81003ba2>] do_syscall_64+0x62/0x140
  [   52.765866]  [<ffffffff817d563f>] entry_SYSCALL64_slow_path+0x25/0x25

Same can be triggered with PROVE_RCU (+ PROVE_RCU_REPEATEDLY) enabled
from tun_attach_filter() when user space calls ioctl(tun_fd, TUN{ATTACH,
DETACH}FILTER, ...) for adding/removing a BPF filter on tap devices.

Since the fix in f91ff5b9ff52 ("net: sk_{detach|attach}_filter() rcu
fixes") sk_attach_filter()/sk_detach_filter() now dereferences the
filter with rcu_dereference_protected(), checking whether socket lock
is held in control path.

Since its introduction in 994051625981 ("tun: socket filter support"),
tap filters are managed under RTNL lock from __tun_chr_ioctl(). Thus the
sock_owned_by_user(sk) doesn't apply in this specific case and therefore
triggers the false positive.

Extend the BPF API with __sk_attach_filter()/__sk_detach_filter() pair
that is used by tap filters and pass in lockdep_rtnl_is_held() for the
rcu_dereference_protected() checks instead.

Reported-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/tun.c      |  8 +++++---
 include/linux/filter.h |  4 ++++
 net/core/filter.c      | 33 +++++++++++++++++++++------------
 3 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index f0db770e8b2f..9bc7b0c7d471 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -621,7 +621,8 @@ static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filte
 
 	/* Re-attach the filter to persist device */
 	if (!skip_filter && (tun->filter_attached == true)) {
-		err = sk_attach_filter(&tun->fprog, tfile->socket.sk);
+		err = __sk_attach_filter(&tun->fprog, tfile->socket.sk,
+					 lockdep_rtnl_is_held());
 		if (!err)
 			goto out;
 	}
@@ -1804,7 +1805,7 @@ static void tun_detach_filter(struct tun_struct *tun, int n)
 
 	for (i = 0; i < n; i++) {
 		tfile = rtnl_dereference(tun->tfiles[i]);
-		sk_detach_filter(tfile->socket.sk);
+		__sk_detach_filter(tfile->socket.sk, lockdep_rtnl_is_held());
 	}
 
 	tun->filter_attached = false;
@@ -1817,7 +1818,8 @@ static int tun_attach_filter(struct tun_struct *tun)
 
 	for (i = 0; i < tun->numqueues; i++) {
 		tfile = rtnl_dereference(tun->tfiles[i]);
-		ret = sk_attach_filter(&tun->fprog, tfile->socket.sk);
+		ret = __sk_attach_filter(&tun->fprog, tfile->socket.sk,
+					 lockdep_rtnl_is_held());
 		if (ret) {
 			tun_detach_filter(tun, i);
 			return ret;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 5972ffe5719a..5110d4211866 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -446,8 +446,12 @@ int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
 void bpf_prog_destroy(struct bpf_prog *fp);
 
 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
+int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk,
+		       bool locked);
 int sk_attach_bpf(u32 ufd, struct sock *sk);
 int sk_detach_filter(struct sock *sk);
+int __sk_detach_filter(struct sock *sk, bool locked);
+
 int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
 		  unsigned int len);
 
diff --git a/net/core/filter.c b/net/core/filter.c
index 37157c4c1a78..f393a22b9d50 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1139,7 +1139,8 @@ void bpf_prog_destroy(struct bpf_prog *fp)
 }
 EXPORT_SYMBOL_GPL(bpf_prog_destroy);
 
-static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
+static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk,
+			    bool locked)
 {
 	struct sk_filter *fp, *old_fp;
 
@@ -1155,10 +1156,8 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
 		return -ENOMEM;
 	}
 
-	old_fp = rcu_dereference_protected(sk->sk_filter,
-					   sock_owned_by_user(sk));
+	old_fp = rcu_dereference_protected(sk->sk_filter, locked);
 	rcu_assign_pointer(sk->sk_filter, fp);
-
 	if (old_fp)
 		sk_filter_uncharge(sk, old_fp);
 
@@ -1175,7 +1174,8 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
  * occurs or there is insufficient memory for the filter a negative
  * errno code is returned. On success the return is zero.
  */
-int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
+int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk,
+		       bool locked)
 {
 	unsigned int fsize = bpf_classic_proglen(fprog);
 	unsigned int bpf_fsize = bpf_prog_size(fprog->len);
@@ -1213,7 +1213,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 	if (IS_ERR(prog))
 		return PTR_ERR(prog);
 
-	err = __sk_attach_prog(prog, sk);
+	err = __sk_attach_prog(prog, sk, locked);
 	if (err < 0) {
 		__bpf_prog_release(prog);
 		return err;
@@ -1221,7 +1221,12 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(sk_attach_filter);
+EXPORT_SYMBOL_GPL(__sk_attach_filter);
+
+int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
+{
+	return __sk_attach_filter(fprog, sk, sock_owned_by_user(sk));
+}
 
 int sk_attach_bpf(u32 ufd, struct sock *sk)
 {
@@ -1240,7 +1245,7 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
 		return -EINVAL;
 	}
 
-	err = __sk_attach_prog(prog, sk);
+	err = __sk_attach_prog(prog, sk, sock_owned_by_user(sk));
 	if (err < 0) {
 		bpf_prog_put(prog);
 		return err;
@@ -1913,7 +1918,7 @@ static int __init register_sk_filter_ops(void)
 }
 late_initcall(register_sk_filter_ops);
 
-int sk_detach_filter(struct sock *sk)
+int __sk_detach_filter(struct sock *sk, bool locked)
 {
 	int ret = -ENOENT;
 	struct sk_filter *filter;
@@ -1921,8 +1926,7 @@ int sk_detach_filter(struct sock *sk)
 	if (sock_flag(sk, SOCK_FILTER_LOCKED))
 		return -EPERM;
 
-	filter = rcu_dereference_protected(sk->sk_filter,
-					   sock_owned_by_user(sk));
+	filter = rcu_dereference_protected(sk->sk_filter, locked);
 	if (filter) {
 		RCU_INIT_POINTER(sk->sk_filter, NULL);
 		sk_filter_uncharge(sk, filter);
@@ -1931,7 +1935,12 @@ int sk_detach_filter(struct sock *sk)
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(sk_detach_filter);
+EXPORT_SYMBOL_GPL(__sk_detach_filter);
+
+int sk_detach_filter(struct sock *sk)
+{
+	return __sk_detach_filter(sk, sock_owned_by_user(sk));
+}
 
 int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
 		  unsigned int len)

From 9daaadbe7ba903615811fdad3e50150eef8e222e Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Fri, 8 Apr 2016 13:26:48 +0800
Subject: [PATCH 383/797] tuntap: restore default qdisc

[ Upstream commit 016adb7260f481168c03e09f785184d6d5278894 ]

After commit f84bb1eac027 ("net: fix IFF_NO_QUEUE for drivers using
alloc_netdev"), default qdisc was changed to noqueue because
tuntap does not set tx_queue_len during .setup(). This patch restores
default qdisc by setting tx_queue_len in tun_setup().

Fixes: f84bb1eac027 ("net: fix IFF_NO_QUEUE for drivers using alloc_netdev")
Cc: Phil Sutter <phil@nwl.cc>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/tun.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 9bc7b0c7d471..2d186bd66d43 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1001,7 +1001,6 @@ static void tun_net_init(struct net_device *dev)
 		/* Zero header length */
 		dev->type = ARPHRD_NONE;
 		dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
-		dev->tx_queue_len = TUN_READQ_SIZE;  /* We prefer our own queue length */
 		break;
 
 	case IFF_TAP:
@@ -1013,7 +1012,6 @@ static void tun_net_init(struct net_device *dev)
 
 		eth_hw_addr_random(dev);
 
-		dev->tx_queue_len = TUN_READQ_SIZE;  /* We prefer our own queue length */
 		break;
 	}
 }
@@ -1464,6 +1462,8 @@ static void tun_setup(struct net_device *dev)
 
 	dev->ethtool_ops = &tun_ethtool_ops;
 	dev->destructor = tun_free_netdev;
+	/* We prefer our own queue length */
+	dev->tx_queue_len = TUN_READQ_SIZE;
 }
 
 /* Trivial set of netlink ops to allow deleting tun or tap

From ad730152036610d28f6f47326393aae3044e4d2f Mon Sep 17 00:00:00 2001
From: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
Date: Sun, 3 Apr 2016 22:09:23 +0800
Subject: [PATCH 384/797] ipv4: l2tp: fix a potential issue in l2tp_ip_recv

[ Upstream commit 5745b8232e942abd5e16e85fa9b27cc21324acf0 ]

pskb_may_pull() can change skb->data, so we have to load ptr/optr at the
right place.

Signed-off-by: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/l2tp/l2tp_ip.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index ec22078b0914..42de4ccd159f 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -123,12 +123,11 @@ static int l2tp_ip_recv(struct sk_buff *skb)
 	struct l2tp_tunnel *tunnel = NULL;
 	int length;
 
-	/* Point to L2TP header */
-	optr = ptr = skb->data;
-
 	if (!pskb_may_pull(skb, 4))
 		goto discard;
 
+	/* Point to L2TP header */
+	optr = ptr = skb->data;
 	session_id = ntohl(*((__be32 *) ptr));
 	ptr += 4;
 
@@ -156,6 +155,9 @@ static int l2tp_ip_recv(struct sk_buff *skb)
 		if (!pskb_may_pull(skb, length))
 			goto discard;
 
+		/* Point to L2TP header */
+		optr = ptr = skb->data;
+		ptr += 4;
 		pr_debug("%s: ip recv\n", tunnel->name);
 		print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
 	}

From 9bd8af9979f543fb605fbfb79e0e8ceeffb88c9c Mon Sep 17 00:00:00 2001
From: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
Date: Sun, 3 Apr 2016 22:09:24 +0800
Subject: [PATCH 385/797] ipv6: l2tp: fix a potential issue in l2tp_ip6_recv

[ Upstream commit be447f305494e019dfc37ea4cdf3b0e4200b4eba ]

pskb_may_pull() can change skb->data, so we have to load ptr/optr at the
right place.

Signed-off-by: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/l2tp/l2tp_ip6.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index a2c8747d2936..9ee4ddb6b397 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -135,12 +135,11 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
 	struct l2tp_tunnel *tunnel = NULL;
 	int length;
 
-	/* Point to L2TP header */
-	optr = ptr = skb->data;
-
 	if (!pskb_may_pull(skb, 4))
 		goto discard;
 
+	/* Point to L2TP header */
+	optr = ptr = skb->data;
 	session_id = ntohl(*((__be32 *) ptr));
 	ptr += 4;
 
@@ -168,6 +167,9 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
 		if (!pskb_may_pull(skb, length))
 			goto discard;
 
+		/* Point to L2TP header */
+		optr = ptr = skb->data;
+		ptr += 4;
 		pr_debug("%s: ip recv\n", tunnel->name);
 		print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
 	}

From 4f4de9ab660c0dca4030b74613d8ac3cea5747c9 Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
Date: Fri, 1 Apr 2016 17:17:50 -0300
Subject: [PATCH 386/797] ip6_tunnel: set rtnl_link_ops before calling
 register_netdevice

[ Upstream commit b6ee376cb0b7fb4e7e07d6cd248bd40436fb9ba6 ]

When creating an ip6tnl tunnel with ip tunnel, rtnl_link_ops is not set
before ip6_tnl_create2 is called. When register_netdevice is called, there
is no linkinfo attribute in the NEWLINK message because of that.

Setting rtnl_link_ops before calling register_netdevice fixes that.

Fixes: 0b112457229d ("ip6tnl: add support of link creation via rtnl")
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/ip6_tunnel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 6c5dfec7a377..3991b21e24ad 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -343,12 +343,12 @@ static int ip6_tnl_create2(struct net_device *dev)
 
 	t = netdev_priv(dev);
 
+	dev->rtnl_link_ops = &ip6_link_ops;
 	err = register_netdevice(dev);
 	if (err < 0)
 		goto out;
 
 	strcpy(t->parms.name, dev->name);
-	dev->rtnl_link_ops = &ip6_link_ops;
 
 	dev_hold(dev);
 	ip6_tnl_link(ip6n, t);

From 5598928f39fc1ffd86b43444c50b378fd08a449e Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jkbs@redhat.com>
Date: Tue, 5 Apr 2016 18:41:08 +0200
Subject: [PATCH 387/797] ipv6: Count in extension headers in
 skb->network_header

[ Upstream commit 3ba3458fb9c050718b95275a3310b74415e767e2 ]

When sending a UDPv6 message longer than MTU, account for the length
of fragmentable IPv6 extension headers in skb->network_header offset.
Same as we do in alloc_new_skb path in __ip6_append_data().

This ensures that later on __ip6_make_skb() will make space in
headroom for fragmentable extension headers:

	/* move skb->data to ip header from ext header */
	if (skb->data < skb_network_header(skb))
		__skb_pull(skb, skb_network_offset(skb));

Prevents a splat due to skb_under_panic:

skbuff: skb_under_panic: text:ffffffff8143397b len:2126 put:14 \
head:ffff880005bacf50 data:ffff880005bacf4a tail:0x48 end:0xc0 dev:lo
------------[ cut here ]------------
kernel BUG at net/core/skbuff.c:104!
invalid opcode: 0000 [#1] KASAN
CPU: 0 PID: 160 Comm: reproducer Not tainted 4.6.0-rc2 #65
[...]
Call Trace:
 [<ffffffff813eb7b9>] skb_push+0x79/0x80
 [<ffffffff8143397b>] eth_header+0x2b/0x100
 [<ffffffff8141e0d0>] neigh_resolve_output+0x210/0x310
 [<ffffffff814eab77>] ip6_finish_output2+0x4a7/0x7c0
 [<ffffffff814efe3a>] ip6_output+0x16a/0x280
 [<ffffffff815440c1>] ip6_local_out+0xb1/0xf0
 [<ffffffff814f1115>] ip6_send_skb+0x45/0xd0
 [<ffffffff81518836>] udp_v6_send_skb+0x246/0x5d0
 [<ffffffff8151985e>] udpv6_sendmsg+0xa6e/0x1090
[...]

Reported-by: Ji Jianwen <jiji@redhat.com>
Signed-off-by: Jakub Sitnicki <jkbs@redhat.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/ip6_output.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 31144c486c52..a175152d3e46 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1091,8 +1091,8 @@ static inline int ip6_ufo_append_data(struct sock *sk,
 			int getfrag(void *from, char *to, int offset, int len,
 			int odd, struct sk_buff *skb),
 			void *from, int length, int hh_len, int fragheaderlen,
-			int transhdrlen, int mtu, unsigned int flags,
-			const struct flowi6 *fl6)
+			int exthdrlen, int transhdrlen, int mtu,
+			unsigned int flags, const struct flowi6 *fl6)
 
 {
 	struct sk_buff *skb;
@@ -1117,7 +1117,7 @@ static inline int ip6_ufo_append_data(struct sock *sk,
 		skb_put(skb, fragheaderlen + transhdrlen);
 
 		/* initialize network header pointer */
-		skb_reset_network_header(skb);
+		skb_set_network_header(skb, exthdrlen);
 
 		/* initialize protocol header pointer */
 		skb->transport_header = skb->network_header + fragheaderlen;
@@ -1359,7 +1359,7 @@ static int __ip6_append_data(struct sock *sk,
 	    (rt->dst.dev->features & NETIF_F_UFO) &&
 	    (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
 		err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
-					  hh_len, fragheaderlen,
+					  hh_len, fragheaderlen, exthdrlen,
 					  transhdrlen, mtu, flags, fl6);
 		if (err)
 			goto error;

From c1ea2d028ffb5b2aeaf183ac3207992f168c51a2 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Thu, 7 Apr 2016 21:28:38 -0700
Subject: [PATCH 388/797] mpls: find_outdev: check for err ptr in addition to
 NULL check

[ Upstream commit 94a57f1f8a9de90ab4b0f8748361ff8be706c80c ]

find_outdev calls inet{,6}_fib_lookup_dev() or dev_get_by_index() to
find the output device. In case of an error, inet{,6}_fib_lookup_dev()
returns error pointer and dev_get_by_index() returns NULL. But the function
only checks for NULL and thus can end up calling dev_put on an ERR_PTR.
This patch adds an additional check for err ptr after the NULL check.

Before: Trying to add an mpls route with no oif from user, no available
path to 10.1.1.8 and no default route:
$ip -f mpls route add 100 as 200 via inet 10.1.1.8
[  822.337195] BUG: unable to handle kernel NULL pointer dereference at
00000000000003a3
[  822.340033] IP: [<ffffffff8148781e>] mpls_nh_assign_dev+0x10b/0x182
[  822.340033] PGD 1db38067 PUD 1de9e067 PMD 0
[  822.340033] Oops: 0000 [#1] SMP
[  822.340033] Modules linked in:
[  822.340033] CPU: 0 PID: 11148 Comm: ip Not tainted 4.5.0-rc7+ #54
[  822.340033] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS rel-1.7.5.1-0-g8936dbb-20141113_115728-nilsson.home.kraxel.org
04/01/2014
[  822.340033] task: ffff88001db82580 ti: ffff88001dad4000 task.ti:
ffff88001dad4000
[  822.340033] RIP: 0010:[<ffffffff8148781e>]  [<ffffffff8148781e>]
mpls_nh_assign_dev+0x10b/0x182
[  822.340033] RSP: 0018:ffff88001dad7a88  EFLAGS: 00010282
[  822.340033] RAX: ffffffffffffff9b RBX: ffffffffffffff9b RCX:
0000000000000002
[  822.340033] RDX: 00000000ffffff9b RSI: 0000000000000008 RDI:
0000000000000000
[  822.340033] RBP: ffff88001ddc9ea0 R08: ffff88001e9f1768 R09:
0000000000000000
[  822.340033] R10: ffff88001d9c1100 R11: ffff88001e3c89f0 R12:
ffffffff8187e0c0
[  822.340033] R13: ffffffff8187e0c0 R14: ffff88001ddc9e80 R15:
0000000000000004
[  822.340033] FS:  00007ff9ed798700(0000) GS:ffff88001fc00000(0000)
knlGS:0000000000000000
[  822.340033] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  822.340033] CR2: 00000000000003a3 CR3: 000000001de89000 CR4:
00000000000006f0
[  822.340033] Stack:
[  822.340033]  0000000000000000 0000000100000000 0000000000000000
0000000000000000
[  822.340033]  0000000000000000 0801010a00000000 0000000000000000
0000000000000000
[  822.340033]  0000000000000004 ffffffff8148749b ffffffff8187e0c0
000000000000001c
[  822.340033] Call Trace:
[  822.340033]  [<ffffffff8148749b>] ? mpls_rt_alloc+0x2b/0x3e
[  822.340033]  [<ffffffff81488e66>] ? mpls_rtm_newroute+0x358/0x3e2
[  822.340033]  [<ffffffff810e7bbc>] ? get_page+0x5/0xa
[  822.340033]  [<ffffffff813b7d94>] ? rtnetlink_rcv_msg+0x17e/0x191
[  822.340033]  [<ffffffff8111794e>] ? __kmalloc_track_caller+0x8c/0x9e
[  822.340033]  [<ffffffff813c9393>] ?
rht_key_hashfn.isra.20.constprop.57+0x14/0x1f
[  822.340033]  [<ffffffff813b7c16>] ? __rtnl_unlock+0xc/0xc
[  822.340033]  [<ffffffff813cb794>] ? netlink_rcv_skb+0x36/0x82
[  822.340033]  [<ffffffff813b4507>] ? rtnetlink_rcv+0x1f/0x28
[  822.340033]  [<ffffffff813cb2b1>] ? netlink_unicast+0x106/0x189
[  822.340033]  [<ffffffff813cb5b3>] ? netlink_sendmsg+0x27f/0x2c8
[  822.340033]  [<ffffffff81392ede>] ? sock_sendmsg_nosec+0x10/0x1b
[  822.340033]  [<ffffffff81393df1>] ? ___sys_sendmsg+0x182/0x1e3
[  822.340033]  [<ffffffff810e4f35>] ?
__alloc_pages_nodemask+0x11c/0x1e4
[  822.340033]  [<ffffffff8110619c>] ? PageAnon+0x5/0xd
[  822.340033]  [<ffffffff811062fe>] ? __page_set_anon_rmap+0x45/0x52
[  822.340033]  [<ffffffff810e7bbc>] ? get_page+0x5/0xa
[  822.340033]  [<ffffffff810e85ab>] ? __lru_cache_add+0x1a/0x3a
[  822.340033]  [<ffffffff81087ea9>] ? current_kernel_time64+0x9/0x30
[  822.340033]  [<ffffffff813940c4>] ? __sys_sendmsg+0x3c/0x5a
[  822.340033]  [<ffffffff8148f597>] ?
entry_SYSCALL_64_fastpath+0x12/0x6a
[  822.340033] Code: 83 08 04 00 00 65 ff 00 48 8b 3c 24 e8 40 7c f2 ff
eb 13 48 c7 c3 9f ff ff ff eb 0f 89 ce e8 f1 ae f1 ff 48 89 c3 48 85 db
74 15 <48> 8b 83 08 04 00 00 65 ff 08 48 81 fb 00 f0 ff ff 76 0d eb 07
[  822.340033] RIP  [<ffffffff8148781e>] mpls_nh_assign_dev+0x10b/0x182
[  822.340033]  RSP <ffff88001dad7a88>
[  822.340033] CR2: 00000000000003a3
[  822.435363] ---[ end trace 98cc65e6f6b8bf11 ]---

After patch:
$ip -f mpls route add 100 as 200 via inet 10.1.1.8
RTNETLINK answers: Network is unreachable

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Reported-by: David Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/mpls/af_mpls.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index c32fc411a911..881bc2072809 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -518,6 +518,9 @@ static struct net_device *find_outdev(struct net *net,
 	if (!dev)
 		return ERR_PTR(-ENODEV);
 
+	if (IS_ERR(dev))
+		return dev;
+
 	/* The caller is holding rtnl anyways, so release the dev reference */
 	dev_put(dev);
 

From 4337f6e548c1203c626740639b6826981d26beee Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 12 Apr 2016 12:27:08 +0200
Subject: [PATCH 389/797] USB: uas: Limit qdepth at the scsi-host level

commit 198de51dbc3454d95b015ca0a055b673f85f01bb upstream.

Commit 64d513ac31bd ("scsi: use host wide tags by default") causes
the SCSI core to queue more commands then we can handle on devices with
multiple LUNs, limit the queue depth at the scsi-host level instead of
per slave to fix this.

BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1315013
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/storage/uas.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index ce0cd6e20d4f..31872bcb0ad8 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -800,7 +800,6 @@ static int uas_slave_configure(struct scsi_device *sdev)
 	if (devinfo->flags & US_FL_BROKEN_FUA)
 		sdev->broken_fua = 1;
 
-	scsi_change_queue_depth(sdev, devinfo->qdepth - 2);
 	return 0;
 }
 
@@ -932,6 +931,12 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id)
 	if (result)
 		goto set_alt0;
 
+	/*
+	 * 1 tag is reserved for untagged commands +
+	 * 1 tag to avoid off by one errors in some bridge firmwares
+	 */
+	shost->can_queue = devinfo->qdepth - 2;
+
 	usb_set_intfdata(intf, shost);
 	result = scsi_add_host(shost, &intf->dev);
 	if (result)

From f9a6b3caddf3ab9b9b490648018c8b02de2171f2 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 12 Apr 2016 12:27:09 +0200
Subject: [PATCH 390/797] USB: uas: Add a new NO_REPORT_LUNS quirk

commit 1363074667a6b7d0507527742ccd7bbed5e3ceaa upstream.

Add a new NO_REPORT_LUNS quirk and set it for Seagate drives with
an usb-id of: 0bc2:331a, as these will fail to respond to a
REPORT_LUNS command.

Reported-and-tested-by: David Webb <djw@noc.ac.uk>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/kernel-parameters.txt |  2 ++
 drivers/usb/storage/uas.c           | 14 +++++++++++++-
 drivers/usb/storage/unusual_uas.h   |  7 +++++++
 drivers/usb/storage/usb.c           |  5 ++++-
 include/linux/usb_usual.h           |  2 ++
 5 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 742f69d18fc8..0e4102ae1a61 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3928,6 +3928,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 					sector if the number is odd);
 				i = IGNORE_DEVICE (don't bind to this
 					device);
+				j = NO_REPORT_LUNS (don't use report luns
+					command, uas only);
 				l = NOT_LOCKABLE (don't try to lock and
 					unlock ejectable media);
 				m = MAX_SECTORS_64 (don't transfer more
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 31872bcb0ad8..9baf081174ce 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -2,7 +2,7 @@
  * USB Attached SCSI
  * Note that this is not the same as the USB Mass Storage driver
  *
- * Copyright Hans de Goede <hdegoede@redhat.com> for Red Hat, Inc. 2013 - 2014
+ * Copyright Hans de Goede <hdegoede@redhat.com> for Red Hat, Inc. 2013 - 2016
  * Copyright Matthew Wilcox for Intel Corp, 2010
  * Copyright Sarah Sharp for Intel Corp, 2010
  *
@@ -757,6 +757,17 @@ static int uas_eh_bus_reset_handler(struct scsi_cmnd *cmnd)
 	return SUCCESS;
 }
 
+static int uas_target_alloc(struct scsi_target *starget)
+{
+	struct uas_dev_info *devinfo = (struct uas_dev_info *)
+			dev_to_shost(starget->dev.parent)->hostdata;
+
+	if (devinfo->flags & US_FL_NO_REPORT_LUNS)
+		starget->no_report_luns = 1;
+
+	return 0;
+}
+
 static int uas_slave_alloc(struct scsi_device *sdev)
 {
 	struct uas_dev_info *devinfo =
@@ -807,6 +818,7 @@ static struct scsi_host_template uas_host_template = {
 	.module = THIS_MODULE,
 	.name = "uas",
 	.queuecommand = uas_queuecommand,
+	.target_alloc = uas_target_alloc,
 	.slave_alloc = uas_slave_alloc,
 	.slave_configure = uas_slave_configure,
 	.eh_abort_handler = uas_eh_abort_handler,
diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h
index ccc113e83d88..53341a77d89f 100644
--- a/drivers/usb/storage/unusual_uas.h
+++ b/drivers/usb/storage/unusual_uas.h
@@ -64,6 +64,13 @@ UNUSUAL_DEV(0x0bc2, 0x3312, 0x0000, 0x9999,
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_NO_ATA_1X),
 
+/* Reported-by: David Webb <djw@noc.ac.uk> */
+UNUSUAL_DEV(0x0bc2, 0x331a, 0x0000, 0x9999,
+		"Seagate",
+		"Expansion Desk",
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+		US_FL_NO_REPORT_LUNS),
+
 /* Reported-by: Hans de Goede <hdegoede@redhat.com> */
 UNUSUAL_DEV(0x0bc2, 0x3320, 0x0000, 0x9999,
 		"Seagate",
diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c
index 43576ed31ccd..9de988a0f856 100644
--- a/drivers/usb/storage/usb.c
+++ b/drivers/usb/storage/usb.c
@@ -482,7 +482,7 @@ void usb_stor_adjust_quirks(struct usb_device *udev, unsigned long *fflags)
 			US_FL_NO_READ_DISC_INFO | US_FL_NO_READ_CAPACITY_16 |
 			US_FL_INITIAL_READ10 | US_FL_WRITE_CACHE |
 			US_FL_NO_ATA_1X | US_FL_NO_REPORT_OPCODES |
-			US_FL_MAX_SECTORS_240);
+			US_FL_MAX_SECTORS_240 | US_FL_NO_REPORT_LUNS);
 
 	p = quirks;
 	while (*p) {
@@ -532,6 +532,9 @@ void usb_stor_adjust_quirks(struct usb_device *udev, unsigned long *fflags)
 		case 'i':
 			f |= US_FL_IGNORE_DEVICE;
 			break;
+		case 'j':
+			f |= US_FL_NO_REPORT_LUNS;
+			break;
 		case 'l':
 			f |= US_FL_NOT_LOCKABLE;
 			break;
diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h
index 7f5f78bd15ad..245f57dbbb61 100644
--- a/include/linux/usb_usual.h
+++ b/include/linux/usb_usual.h
@@ -79,6 +79,8 @@
 		/* Cannot handle MI_REPORT_SUPPORTED_OPERATION_CODES */	\
 	US_FLAG(MAX_SECTORS_240,	0x08000000)		\
 		/* Sets max_sectors to 240 */			\
+	US_FLAG(NO_REPORT_LUNS,	0x10000000)			\
+		/* Cannot handle REPORT_LUNS */			\
 
 #define US_FLAG(name, value)	US_FL_##name = value ,
 enum { US_DO_ALL_FLAGS };

From ac1373d660d5ceb65ee20e3fd9de727579efe98f Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Thu, 7 Jan 2016 11:01:00 +0100
Subject: [PATCH 391/797] cdc-acm: fix NULL pointer reference

commit 29c6dd591bbd592472247441de9fa694acdabae8 upstream.

The union descriptor must be checked. Its usage was conditional
before the parser was introduced. This is important, because
many RNDIS device, which also use the common parser, have
bogus extra descriptors.

Signed-off-by: Oliver Neukum <oneukum@suse.com>
Tested-by: Vasily Galkin <galkin-vv@yandex.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/usb/cdc_ether.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index 3da70bf9936a..7cba2c3759df 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -160,6 +160,12 @@ int usbnet_generic_cdc_bind(struct usbnet *dev, struct usb_interface *intf)
 	info->u = header.usb_cdc_union_desc;
 	info->header = header.usb_cdc_header_desc;
 	info->ether = header.usb_cdc_ether_desc;
+	if (!info->u) {
+		if (rndis)
+			goto skip;
+		else /* in that case a quirk is mandatory */
+			goto bad_desc;
+	}
 	/* we need a master/control interface (what we're
 	 * probed with) and a slave/data interface; union
 	 * descriptors sort this all out.
@@ -256,7 +262,7 @@ int usbnet_generic_cdc_bind(struct usbnet *dev, struct usb_interface *intf)
 			goto bad_desc;
 		}
 
-	} else if (!info->header || !info->u || (!rndis && !info->ether)) {
+	} else if (!info->header || (!rndis && !info->ether)) {
 		dev_dbg(&intf->dev, "missing cdc %s%s%sdescriptor\n",
 			info->header ? "" : "header ",
 			info->u ? "" : "union ",

From 30d6a9fd6c11bd7b9e8f673da0f9caa22e2280fa Mon Sep 17 00:00:00 2001
From: Yuki Shibuya <shibuya.yk@ncos.nec.co.jp>
Date: Thu, 24 Mar 2016 05:17:03 +0000
Subject: [PATCH 392/797] KVM: x86: Inject pending interrupt even if pending
 nmi exist

commit 321c5658c5e9192dea0d58ab67cf1791e45b2b26 upstream.

Non maskable interrupts (NMI) are preferred to interrupts in current
implementation. If a NMI is pending and NMI is blocked by the result
of nmi_allowed(), pending interrupt is not injected and
enable_irq_window() is not executed, even if interrupts injection is
allowed.

In old kernel (e.g. 2.6.32), schedule() is often called in NMI context.
In this case, interrupts are needed to execute iret that intends end
of NMI. The flag of blocking new NMI is not cleared until the guest
execute the iret, and interrupts are blocked by pending NMI. Due to
this, iret can't be invoked in the guest, and the guest is starved
until block is cleared by some events (e.g. canceling injection).

This patch injects pending interrupts, when it's allowed, even if NMI
is blocked. And, If an interrupts is pending after executing
inject_pending_event(), enable_irq_window() is executed regardless of
NMI pending counter.

Signed-off-by: Yuki Shibuya <shibuya.yk@ncos.nec.co.jp>
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/x86.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8bfc5fc6a39b..7eb4ebd3ebea 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6024,12 +6024,10 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
 	}
 
 	/* try to inject new event if pending */
-	if (vcpu->arch.nmi_pending) {
-		if (kvm_x86_ops->nmi_allowed(vcpu)) {
-			--vcpu->arch.nmi_pending;
-			vcpu->arch.nmi_injected = true;
-			kvm_x86_ops->set_nmi(vcpu);
-		}
+	if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
+		--vcpu->arch.nmi_pending;
+		vcpu->arch.nmi_injected = true;
+		kvm_x86_ops->set_nmi(vcpu);
 	} else if (kvm_cpu_has_injectable_intr(vcpu)) {
 		/*
 		 * Because interrupts can be injected asynchronously, we are
@@ -6474,10 +6472,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		if (inject_pending_event(vcpu, req_int_win) != 0)
 			req_immediate_exit = true;
 		/* enable NMI/IRQ window open exits if needed */
-		else if (vcpu->arch.nmi_pending)
-			kvm_x86_ops->enable_nmi_window(vcpu);
-		else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
-			kvm_x86_ops->enable_irq_window(vcpu);
+		else {
+			if (vcpu->arch.nmi_pending)
+				kvm_x86_ops->enable_nmi_window(vcpu);
+			if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
+				kvm_x86_ops->enable_irq_window(vcpu);
+		}
 
 		if (kvm_lapic_enabled(vcpu)) {
 			update_cr8_intercept(vcpu);

From 84f2443e213727323064e581e281d778831fa859 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 29 Mar 2016 17:56:57 +0200
Subject: [PATCH 393/797] KVM: x86: reduce default value of halt_poll_ns
 parameter

commit 14ebda3394fd3e5388747e742e510b0802a65d24 upstream.

Windows lets applications choose the frequency of the timer tick,
and in Windows 10 the maximum rate was changed from 1024 Hz to
2048 Hz.  Unfortunately, because of the way the Windows API
works, most applications who need a higher rate than the default
64 Hz will just do

   timeGetDevCaps(&tc, sizeof(tc));
   timeBeginPeriod(tc.wPeriodMin);

and pick the maximum rate.  This causes very high CPU usage when
playing media or games on Windows 10, even if the guest does not
actually use the CPU very much, because the frequent timer tick
causes halt_poll_ns to kick in.

There is no really good solution, especially because Microsoft
could sooner or later bump the limit to 4096 Hz, but for now
the best we can do is lower a bit the upper limit for
halt_poll_ns. :-(

Reported-by: Jon Panozzo <jonp@lime-technology.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 30cfd64295a0..9d2abb2a41d2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -41,7 +41,7 @@
 
 #define KVM_PIO_PAGE_OFFSET 1
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2
-#define KVM_HALT_POLL_NS_DEFAULT 500000
+#define KVM_HALT_POLL_NS_DEFAULT 400000
 
 #define KVM_IRQCHIP_NUM_PINS  KVM_IOAPIC_NUM_PINS
 

From af22bb95d98c2df7eadecc9445c275b8847f54a9 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Wed, 3 Feb 2016 03:35:49 +0000
Subject: [PATCH 394/797] MIPS: Fix MSA ld unaligned failure cases

commit fa8ff601d72bad3078ddf5ef17a5547700d06908 upstream.

Copying the content of an MSA vector from user memory may involve TLB
faults & mapping in pages. This will fail when preemption is disabled
due to an inability to acquire mmap_sem from do_page_fault, which meant
such vector loads to unmapped pages would always fail to be emulated.
Fix this by disabling preemption later only around the updating of
vector register state.

This change does however introduce a race between performing the load
into thread context & the thread being preempted, saving its current
live context & clobbering the loaded value. This should be a rare
occureence, so optimise for the fast path by simply repeating the load if
we are preempted.

Additionally if the copy failed then the failure path was taken with
preemption left disabled, leading to the kernel typically encountering
further issues around sleeping whilst atomic. The change to where
preemption is disabled avoids this issue.

Fixes: e4aa1f153add "MIPS: MSA unaligned memory access support"
Reported-by: James Hogan <james.hogan@imgtec.com>
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: James Hogan <james.hogan@imgtec.com>
Cc: Leonid Yegoshin <Leonid.Yegoshin@imgtec.com>
Cc: Maciej W. Rozycki <macro@linux-mips.org>
Cc: James Cowgill <James.Cowgill@imgtec.com>
Cc: Markos Chandras <markos.chandras@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/12345/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/kernel/unaligned.c | 49 +++++++++++++++++++++---------------
 1 file changed, 29 insertions(+), 20 deletions(-)

diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c
index 490cea569d57..5c62065cbf22 100644
--- a/arch/mips/kernel/unaligned.c
+++ b/arch/mips/kernel/unaligned.c
@@ -885,7 +885,7 @@ static void emulate_load_store_insn(struct pt_regs *regs,
 {
 	union mips_instruction insn;
 	unsigned long value;
-	unsigned int res;
+	unsigned int res, preempted;
 	unsigned long origpc;
 	unsigned long orig31;
 	void __user *fault_addr = NULL;
@@ -1226,27 +1226,36 @@ static void emulate_load_store_insn(struct pt_regs *regs,
 			if (!access_ok(VERIFY_READ, addr, sizeof(*fpr)))
 				goto sigbus;
 
-			/*
-			 * Disable preemption to avoid a race between copying
-			 * state from userland, migrating to another CPU and
-			 * updating the hardware vector register below.
-			 */
-			preempt_disable();
+			do {
+				/*
+				 * If we have live MSA context keep track of
+				 * whether we get preempted in order to avoid
+				 * the register context we load being clobbered
+				 * by the live context as it's saved during
+				 * preemption. If we don't have live context
+				 * then it can't be saved to clobber the value
+				 * we load.
+				 */
+				preempted = test_thread_flag(TIF_USEDMSA);
 
-			res = __copy_from_user_inatomic(fpr, addr,
-							sizeof(*fpr));
-			if (res)
-				goto fault;
+				res = __copy_from_user_inatomic(fpr, addr,
+								sizeof(*fpr));
+				if (res)
+					goto fault;
 
-			/*
-			 * Update the hardware register if it is in use by the
-			 * task in this quantum, in order to avoid having to
-			 * save & restore the whole vector context.
-			 */
-			if (test_thread_flag(TIF_USEDMSA))
-				write_msa_wr(wd, fpr, df);
-
-			preempt_enable();
+				/*
+				 * Update the hardware register if it is in use
+				 * by the task in this quantum, in order to
+				 * avoid having to save & restore the whole
+				 * vector context.
+				 */
+				preempt_disable();
+				if (test_thread_flag(TIF_USEDMSA)) {
+					write_msa_wr(wd, fpr, df);
+					preempted = 0;
+				}
+				preempt_enable();
+			} while (preempted);
 			break;
 
 		case msa_st_op:

From 01f083c7e37adf613dca8e7177b460fc3e0a3e56 Mon Sep 17 00:00:00 2001
From: Govindraj Raja <Govindraj.Raja@imgtec.com>
Date: Fri, 4 Mar 2016 15:28:22 +0000
Subject: [PATCH 395/797] pinctrl: pistachio: fix mfio84-89 function
 description and pinmux.

commit e9adb336d0bf391be23e820975ca5cd12c31d781 upstream.

mfio 84 to 89 are described wrongly, fix it to describe
the right pin and add them to right pin-mux group.

The correct order is:
	pll1_lock => mips_pll	-- MFIO_83
	pll2_lock => audio_pll	-- MFIO_84
	pll3_lock => rpu_v_pll	-- MFIO_85
	pll4_lock => rpu_l_pll	-- MFIO_86
	pll5_lock => sys_pll	-- MFIO_87
	pll6_lock => wifi_pll	-- MFIO_88
	pll7_lock => bt_pll	-- MFIO_89

Cc: linux-gpio@vger.kernel.org
Cc: devicetree@vger.kernel.org
Cc: linux-mips@linux-mips.org
Cc: James Hartley <James.Hartley@imgtec.com>
Fixes: cefc03e5995e("pinctrl: Add Pistachio SoC pin control driver")
Signed-off-by: Govindraj Raja <Govindraj.Raja@imgtec.com>
Acked-by: Andrew Bresticker <abrestic@chromium.org>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../pinctrl/img,pistachio-pinctrl.txt         | 12 +++++-----
 drivers/pinctrl/pinctrl-pistachio.c           | 24 +++++++++----------
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/Documentation/devicetree/bindings/pinctrl/img,pistachio-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/img,pistachio-pinctrl.txt
index 08a4a32c8eb0..0326154c7925 100644
--- a/Documentation/devicetree/bindings/pinctrl/img,pistachio-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/img,pistachio-pinctrl.txt
@@ -134,12 +134,12 @@ mfio80		ddr_debug, mips_trace_data, mips_debug
 mfio81		dreq0, mips_trace_data, eth_debug
 mfio82		dreq1, mips_trace_data, eth_debug
 mfio83		mips_pll_lock, mips_trace_data, usb_debug
-mfio84		sys_pll_lock, mips_trace_data, usb_debug
-mfio85		wifi_pll_lock, mips_trace_data, sdhost_debug
-mfio86		bt_pll_lock, mips_trace_data, sdhost_debug
-mfio87		rpu_v_pll_lock, dreq2, socif_debug
-mfio88		rpu_l_pll_lock, dreq3, socif_debug
-mfio89		audio_pll_lock, dreq4, dreq5
+mfio84		audio_pll_lock, mips_trace_data, usb_debug
+mfio85		rpu_v_pll_lock, mips_trace_data, sdhost_debug
+mfio86		rpu_l_pll_lock, mips_trace_data, sdhost_debug
+mfio87		sys_pll_lock, dreq2, socif_debug
+mfio88		wifi_pll_lock, dreq3, socif_debug
+mfio89		bt_pll_lock, dreq4, dreq5
 tck
 trstn
 tdi
diff --git a/drivers/pinctrl/pinctrl-pistachio.c b/drivers/pinctrl/pinctrl-pistachio.c
index 85c9046c690e..6b1a47f8c096 100644
--- a/drivers/pinctrl/pinctrl-pistachio.c
+++ b/drivers/pinctrl/pinctrl-pistachio.c
@@ -469,27 +469,27 @@ static const char * const pistachio_mips_pll_lock_groups[] = {
 	"mfio83",
 };
 
-static const char * const pistachio_sys_pll_lock_groups[] = {
+static const char * const pistachio_audio_pll_lock_groups[] = {
 	"mfio84",
 };
 
-static const char * const pistachio_wifi_pll_lock_groups[] = {
+static const char * const pistachio_rpu_v_pll_lock_groups[] = {
 	"mfio85",
 };
 
-static const char * const pistachio_bt_pll_lock_groups[] = {
+static const char * const pistachio_rpu_l_pll_lock_groups[] = {
 	"mfio86",
 };
 
-static const char * const pistachio_rpu_v_pll_lock_groups[] = {
+static const char * const pistachio_sys_pll_lock_groups[] = {
 	"mfio87",
 };
 
-static const char * const pistachio_rpu_l_pll_lock_groups[] = {
+static const char * const pistachio_wifi_pll_lock_groups[] = {
 	"mfio88",
 };
 
-static const char * const pistachio_audio_pll_lock_groups[] = {
+static const char * const pistachio_bt_pll_lock_groups[] = {
 	"mfio89",
 };
 
@@ -559,12 +559,12 @@ enum pistachio_mux_option {
 	PISTACHIO_FUNCTION_DREQ4,
 	PISTACHIO_FUNCTION_DREQ5,
 	PISTACHIO_FUNCTION_MIPS_PLL_LOCK,
+	PISTACHIO_FUNCTION_AUDIO_PLL_LOCK,
+	PISTACHIO_FUNCTION_RPU_V_PLL_LOCK,
+	PISTACHIO_FUNCTION_RPU_L_PLL_LOCK,
 	PISTACHIO_FUNCTION_SYS_PLL_LOCK,
 	PISTACHIO_FUNCTION_WIFI_PLL_LOCK,
 	PISTACHIO_FUNCTION_BT_PLL_LOCK,
-	PISTACHIO_FUNCTION_RPU_V_PLL_LOCK,
-	PISTACHIO_FUNCTION_RPU_L_PLL_LOCK,
-	PISTACHIO_FUNCTION_AUDIO_PLL_LOCK,
 	PISTACHIO_FUNCTION_DEBUG_RAW_CCA_IND,
 	PISTACHIO_FUNCTION_DEBUG_ED_SEC20_CCA_IND,
 	PISTACHIO_FUNCTION_DEBUG_ED_SEC40_CCA_IND,
@@ -620,12 +620,12 @@ static const struct pistachio_function pistachio_functions[] = {
 	FUNCTION(dreq4),
 	FUNCTION(dreq5),
 	FUNCTION(mips_pll_lock),
+	FUNCTION(audio_pll_lock),
+	FUNCTION(rpu_v_pll_lock),
+	FUNCTION(rpu_l_pll_lock),
 	FUNCTION(sys_pll_lock),
 	FUNCTION(wifi_pll_lock),
 	FUNCTION(bt_pll_lock),
-	FUNCTION(rpu_v_pll_lock),
-	FUNCTION(rpu_l_pll_lock),
-	FUNCTION(audio_pll_lock),
 	FUNCTION(debug_raw_cca_ind),
 	FUNCTION(debug_ed_sec20_cca_ind),
 	FUNCTION(debug_ed_sec40_cca_ind),

From 8536e37882600eaf011663aafb8abeb556bfca86 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Mon, 7 Mar 2016 19:40:57 +0100
Subject: [PATCH 396/797] pinctrl: sh-pfc: only use dummy states for non-DT
 platforms

commit 0129801be4b87226bf502f18f5a9eabd356d1058 upstream.

If pinctrl_provide_dummies() is used unconditionally, then the dummy
state will be used even on DT platforms when the "init" state was
intentionally left out. Instead of "default", the dummy "init" state
will then be used during probe. Thus, when probing an I2C controller on
cold boot, communication triggered by bus notifiers broke because the
pins were not initialized.

Do it like OMAP2: use the dummy state only for non-DT platforms.

Fixes: ef0eebc05130 ("drivers/pinctrl: Add the concept of an "init" state")
Reported-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Acked-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pinctrl/sh-pfc/core.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/pinctrl/sh-pfc/core.c b/drivers/pinctrl/sh-pfc/core.c
index 181ea98a63b7..2b0d70217bbd 100644
--- a/drivers/pinctrl/sh-pfc/core.c
+++ b/drivers/pinctrl/sh-pfc/core.c
@@ -545,7 +545,9 @@ static int sh_pfc_probe(struct platform_device *pdev)
 			return ret;
 	}
 
-	pinctrl_provide_dummies();
+	/* Enable dummy states for those platforms without pinctrl support */
+	if (!of_have_populated_dt())
+		pinctrl_provide_dummies();
 
 	ret = sh_pfc_init_ranges(pfc);
 	if (ret < 0)

From 6bf00fff3c7b0ee91ccb9257ffadeb2f3a335069 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sat, 12 Mar 2016 19:44:57 +0100
Subject: [PATCH 397/797] pinctrl: sunxi: Fix A33 external interrupts not
 working

commit 5e7515ba78fff2f5407eaa2f97c1d5c07801ac3d upstream.

pinctrl-sun8i-a33.c (and the dts) declare only 2 interrupt banks,
where as the closely related a23 has 3 banks. This matches with the
datasheet for the A33 where only interrupt banks B and G are specified
where as the A23 has banks A, B and G.

However the A33 being the A23 derative it is means that the interrupt
configure/status io-addresses for the 2 banks it has are not changed
from the A23, iow they have the same address as if bank A was still
present. Where as the sunxi pinctrl currently tries to use the A23 bank
A addresses for bank B, since the pinctrl code does not know about the
removed bank A.

Add a irq_bank_base parameter and use this where appropriate to take
the missing bank A into account.

This fixes external interrupts not working on the A33 (tested with
an i2c touchscreen controller which uses an external interrupt).

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pinctrl/sunxi/pinctrl-sun8i-a33.c |  1 +
 drivers/pinctrl/sunxi/pinctrl-sunxi.c     | 17 ++++++++++-------
 drivers/pinctrl/sunxi/pinctrl-sunxi.h     | 21 +++++++++++----------
 3 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/drivers/pinctrl/sunxi/pinctrl-sun8i-a33.c b/drivers/pinctrl/sunxi/pinctrl-sun8i-a33.c
index 00265f0435a7..8b381d69df86 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sun8i-a33.c
+++ b/drivers/pinctrl/sunxi/pinctrl-sun8i-a33.c
@@ -485,6 +485,7 @@ static const struct sunxi_pinctrl_desc sun8i_a33_pinctrl_data = {
 	.pins = sun8i_a33_pins,
 	.npins = ARRAY_SIZE(sun8i_a33_pins),
 	.irq_banks = 2,
+	.irq_bank_base = 1,
 };
 
 static int sun8i_a33_pinctrl_probe(struct platform_device *pdev)
diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
index dead97daca35..a4a5b504c532 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c
+++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
@@ -578,7 +578,7 @@ static void sunxi_pinctrl_irq_release_resources(struct irq_data *d)
 static int sunxi_pinctrl_irq_set_type(struct irq_data *d, unsigned int type)
 {
 	struct sunxi_pinctrl *pctl = irq_data_get_irq_chip_data(d);
-	u32 reg = sunxi_irq_cfg_reg(d->hwirq);
+	u32 reg = sunxi_irq_cfg_reg(d->hwirq, pctl->desc->irq_bank_base);
 	u8 index = sunxi_irq_cfg_offset(d->hwirq);
 	unsigned long flags;
 	u32 regval;
@@ -625,7 +625,8 @@ static int sunxi_pinctrl_irq_set_type(struct irq_data *d, unsigned int type)
 static void sunxi_pinctrl_irq_ack(struct irq_data *d)
 {
 	struct sunxi_pinctrl *pctl = irq_data_get_irq_chip_data(d);
-	u32 status_reg = sunxi_irq_status_reg(d->hwirq);
+	u32 status_reg = sunxi_irq_status_reg(d->hwirq,
+					      pctl->desc->irq_bank_base);
 	u8 status_idx = sunxi_irq_status_offset(d->hwirq);
 
 	/* Clear the IRQ */
@@ -635,7 +636,7 @@ static void sunxi_pinctrl_irq_ack(struct irq_data *d)
 static void sunxi_pinctrl_irq_mask(struct irq_data *d)
 {
 	struct sunxi_pinctrl *pctl = irq_data_get_irq_chip_data(d);
-	u32 reg = sunxi_irq_ctrl_reg(d->hwirq);
+	u32 reg = sunxi_irq_ctrl_reg(d->hwirq, pctl->desc->irq_bank_base);
 	u8 idx = sunxi_irq_ctrl_offset(d->hwirq);
 	unsigned long flags;
 	u32 val;
@@ -652,7 +653,7 @@ static void sunxi_pinctrl_irq_mask(struct irq_data *d)
 static void sunxi_pinctrl_irq_unmask(struct irq_data *d)
 {
 	struct sunxi_pinctrl *pctl = irq_data_get_irq_chip_data(d);
-	u32 reg = sunxi_irq_ctrl_reg(d->hwirq);
+	u32 reg = sunxi_irq_ctrl_reg(d->hwirq, pctl->desc->irq_bank_base);
 	u8 idx = sunxi_irq_ctrl_offset(d->hwirq);
 	unsigned long flags;
 	u32 val;
@@ -744,7 +745,7 @@ static void sunxi_pinctrl_irq_handler(struct irq_desc *desc)
 	if (bank == pctl->desc->irq_banks)
 		return;
 
-	reg = sunxi_irq_status_reg_from_bank(bank);
+	reg = sunxi_irq_status_reg_from_bank(bank, pctl->desc->irq_bank_base);
 	val = readl(pctl->membase + reg);
 
 	if (val) {
@@ -1023,9 +1024,11 @@ int sunxi_pinctrl_init(struct platform_device *pdev,
 
 	for (i = 0; i < pctl->desc->irq_banks; i++) {
 		/* Mask and clear all IRQs before registering a handler */
-		writel(0, pctl->membase + sunxi_irq_ctrl_reg_from_bank(i));
+		writel(0, pctl->membase + sunxi_irq_ctrl_reg_from_bank(i,
+						pctl->desc->irq_bank_base));
 		writel(0xffffffff,
-			pctl->membase + sunxi_irq_status_reg_from_bank(i));
+		       pctl->membase + sunxi_irq_status_reg_from_bank(i,
+						pctl->desc->irq_bank_base));
 
 		irq_set_chained_handler_and_data(pctl->irq[i],
 						 sunxi_pinctrl_irq_handler,
diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.h b/drivers/pinctrl/sunxi/pinctrl-sunxi.h
index e248e81a0f9e..0afce1ab12d0 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sunxi.h
+++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.h
@@ -97,6 +97,7 @@ struct sunxi_pinctrl_desc {
 	int				npins;
 	unsigned			pin_base;
 	unsigned			irq_banks;
+	unsigned			irq_bank_base;
 	bool				irq_read_needs_mux;
 };
 
@@ -233,12 +234,12 @@ static inline u32 sunxi_pull_offset(u16 pin)
 	return pin_num * PULL_PINS_BITS;
 }
 
-static inline u32 sunxi_irq_cfg_reg(u16 irq)
+static inline u32 sunxi_irq_cfg_reg(u16 irq, unsigned bank_base)
 {
 	u8 bank = irq / IRQ_PER_BANK;
 	u8 reg = (irq % IRQ_PER_BANK) / IRQ_CFG_IRQ_PER_REG * 0x04;
 
-	return IRQ_CFG_REG + bank * IRQ_MEM_SIZE + reg;
+	return IRQ_CFG_REG + (bank_base + bank) * IRQ_MEM_SIZE + reg;
 }
 
 static inline u32 sunxi_irq_cfg_offset(u16 irq)
@@ -247,16 +248,16 @@ static inline u32 sunxi_irq_cfg_offset(u16 irq)
 	return irq_num * IRQ_CFG_IRQ_BITS;
 }
 
-static inline u32 sunxi_irq_ctrl_reg_from_bank(u8 bank)
+static inline u32 sunxi_irq_ctrl_reg_from_bank(u8 bank, unsigned bank_base)
 {
-	return IRQ_CTRL_REG + bank * IRQ_MEM_SIZE;
+	return IRQ_CTRL_REG + (bank_base + bank) * IRQ_MEM_SIZE;
 }
 
-static inline u32 sunxi_irq_ctrl_reg(u16 irq)
+static inline u32 sunxi_irq_ctrl_reg(u16 irq, unsigned bank_base)
 {
 	u8 bank = irq / IRQ_PER_BANK;
 
-	return sunxi_irq_ctrl_reg_from_bank(bank);
+	return sunxi_irq_ctrl_reg_from_bank(bank, bank_base);
 }
 
 static inline u32 sunxi_irq_ctrl_offset(u16 irq)
@@ -265,16 +266,16 @@ static inline u32 sunxi_irq_ctrl_offset(u16 irq)
 	return irq_num * IRQ_CTRL_IRQ_BITS;
 }
 
-static inline u32 sunxi_irq_status_reg_from_bank(u8 bank)
+static inline u32 sunxi_irq_status_reg_from_bank(u8 bank, unsigned bank_base)
 {
-	return IRQ_STATUS_REG + bank * IRQ_MEM_SIZE;
+	return IRQ_STATUS_REG + (bank_base + bank) * IRQ_MEM_SIZE;
 }
 
-static inline u32 sunxi_irq_status_reg(u16 irq)
+static inline u32 sunxi_irq_status_reg(u16 irq, unsigned bank_base)
 {
 	u8 bank = irq / IRQ_PER_BANK;
 
-	return sunxi_irq_status_reg_from_bank(bank);
+	return sunxi_irq_status_reg_from_bank(bank, bank_base);
 }
 
 static inline u32 sunxi_irq_status_offset(u16 irq)

From e7c6abc464ac71ca734ea57a778872793d51ff29 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 24 Mar 2016 13:15:45 +0100
Subject: [PATCH 398/797] pinctrl: nomadik: fix pull debug print inversion

commit 6ee334559324a55725e22463de633b99ad99fcad upstream.

Pull up was reported as pull down and vice versa. Fix this.

Fixes: 8f1774a2a971 "pinctrl: nomadik: improve GPIO debug prints"
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pinctrl/nomadik/pinctrl-nomadik.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik.c b/drivers/pinctrl/nomadik/pinctrl-nomadik.c
index eebfae0c9b7c..f844b4ae7f79 100644
--- a/drivers/pinctrl/nomadik/pinctrl-nomadik.c
+++ b/drivers/pinctrl/nomadik/pinctrl-nomadik.c
@@ -995,7 +995,7 @@ static void nmk_gpio_dbg_show_one(struct seq_file *s,
 		int val;
 
 		if (pull)
-			pullidx = data_out ? 1 : 2;
+			pullidx = data_out ? 2 : 1;
 
 		seq_printf(s, " gpio-%-3d (%-20.20s) in  %s %s",
 			   gpio,

From 2e638de516983b9575c845e878e1c41f5bcd2509 Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vz@mleia.com>
Date: Wed, 9 Mar 2016 02:45:36 +0200
Subject: [PATCH 399/797] pinctrl: freescale: imx: fix bogus check of
 of_iomap() return value

commit 9a4f424531dabd877259ae0071b8bcc4dede9eb5 upstream.

On error path of_iomap() returns NULL, hence IS_ERR() check is invalid
and may cause a NULL pointer dereference, the change fixes this
problem.

While we are here invert a device node check to simplify the code.

Fixes: 26d8cde5260b ("pinctrl: freescale: imx: add shared input select reg support")
Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
Acked-by: Shawn Guo <shawnguo@kernel.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pinctrl/freescale/pinctrl-imx.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/drivers/pinctrl/freescale/pinctrl-imx.c b/drivers/pinctrl/freescale/pinctrl-imx.c
index a5bb93987378..1029aa7889b5 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx.c
+++ b/drivers/pinctrl/freescale/pinctrl-imx.c
@@ -726,19 +726,18 @@ int imx_pinctrl_probe(struct platform_device *pdev,
 
 	if (of_property_read_bool(dev_np, "fsl,input-sel")) {
 		np = of_parse_phandle(dev_np, "fsl,input-sel", 0);
-		if (np) {
-			ipctl->input_sel_base = of_iomap(np, 0);
-			if (IS_ERR(ipctl->input_sel_base)) {
-				of_node_put(np);
-				dev_err(&pdev->dev,
-					"iomuxc input select base address not found\n");
-				return PTR_ERR(ipctl->input_sel_base);
-			}
-		} else {
+		if (!np) {
 			dev_err(&pdev->dev, "iomuxc fsl,input-sel property not found\n");
 			return -EINVAL;
 		}
+
+		ipctl->input_sel_base = of_iomap(np, 0);
 		of_node_put(np);
+		if (!ipctl->input_sel_base) {
+			dev_err(&pdev->dev,
+				"iomuxc input select base address not found\n");
+			return -ENOMEM;
+		}
 	}
 
 	imx_pinctrl_desc.name = dev_name(&pdev->dev);

From 5046b85eef40f725f509cae12a954f445c9c3e59 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Tue, 22 Mar 2016 01:04:05 -0300
Subject: [PATCH 400/797] au0828: fix au0828_v4l2_close() dev_state race
 condition

commit ed940cd27416f9887864b95e1f8f8845aa9d6391 upstream.

au0828_v4l2_close() check for dev_state == DEV_DISCONNECTED will fail to
detect the device disconnected state correctly, if au0828_v4l2_open() runs
to set the DEV_INITIALIZED bit. A loop test of bind/unbind found this bug
by increasing the likelihood of au0828_v4l2_open() occurring while unbind
is in progress. When au0828_v4l2_close() fails to detect that the device
is in disconnect state, it attempts to power down the device and fails with
the following general protection fault:

[  260.992962] Call Trace:
[  260.993008]  [<ffffffffa0f80f0f>] ? xc5000_sleep+0x8f/0xd0 [xc5000]
[  260.993095]  [<ffffffffa0f6803c>] ? fe_standby+0x3c/0x50 [tuner]
[  260.993186]  [<ffffffffa0ef541c>] au0828_v4l2_close+0x53c/0x620 [au0828]
[  260.993298]  [<ffffffffa0d08ec0>] v4l2_release+0xf0/0x210 [videodev]
[  260.993382]  [<ffffffff81570f9c>] __fput+0x1fc/0x6c0
[  260.993449]  [<ffffffff815714ce>] ____fput+0xe/0x10
[  260.993519]  [<ffffffff8116eb83>] task_work_run+0x133/0x1f0
[  260.993602]  [<ffffffff810035d0>] exit_to_usermode_loop+0x140/0x170
[  260.993681]  [<ffffffff810061ca>] syscall_return_slowpath+0x16a/0x1a0
[  260.993754]  [<ffffffff82835fb3>] entry_SYSCALL_64_fastpath+0xa6/0xa8

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/usb/au0828/au0828-video.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/usb/au0828/au0828-video.c b/drivers/media/usb/au0828/au0828-video.c
index 45c622e234f7..36fde46ea2cf 100644
--- a/drivers/media/usb/au0828/au0828-video.c
+++ b/drivers/media/usb/au0828/au0828-video.c
@@ -1007,7 +1007,7 @@ static int au0828_v4l2_close(struct file *filp)
 		del_timer_sync(&dev->vbi_timeout);
 	}
 
-	if (dev->dev_state == DEV_DISCONNECTED)
+	if (dev->dev_state & DEV_DISCONNECTED)
 		goto end;
 
 	if (dev->users == 1) {

From 797e21364c9323ccdb2e8e712d5ef66e7f277798 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Tue, 22 Mar 2016 09:21:57 -0300
Subject: [PATCH 401/797] au0828: Fix dev_state handling

commit e8e3039f5b941f7825d335f8ca11c12a8104db11 upstream.

The au0828 dev_state is actually a bit mask. It should not be
checking with "==" but, instead, with a logic and. There are some
places where it was doing it wrong.

Fix that by replacing the dev_state set/clear/test with the
bitops.

As reviewed by Shuah:
	"Looks good. Tested running bind/unbind au0828 loop for 1000 times.
	Didn't see any problems and the v4l2_querycap() problem has been
	fixed with this patch.

	After the above test, ran bind/unbind snd_usb_audio 1000 times.
	Didn't see any problems. Generated media graph and the graph
	looks good."

Reviewed-by: Shuah Khan <shuahkh@osg.samsung.com>
Tested-by: Shuah Khan <shuahkh@osg.samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/usb/au0828/au0828-core.c  |  2 +-
 drivers/media/usb/au0828/au0828-input.c |  4 +-
 drivers/media/usb/au0828/au0828-video.c | 63 ++++++++++++-------------
 drivers/media/usb/au0828/au0828.h       |  9 ++--
 4 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/drivers/media/usb/au0828/au0828-core.c b/drivers/media/usb/au0828/au0828-core.c
index 0934024fb89d..d91ded795c93 100644
--- a/drivers/media/usb/au0828/au0828-core.c
+++ b/drivers/media/usb/au0828/au0828-core.c
@@ -159,7 +159,7 @@ static void au0828_usb_disconnect(struct usb_interface *interface)
 	   Set the status so poll routines can check and avoid
 	   access after disconnect.
 	*/
-	dev->dev_state = DEV_DISCONNECTED;
+	set_bit(DEV_DISCONNECTED, &dev->dev_state);
 
 	au0828_rc_unregister(dev);
 	/* Digital TV */
diff --git a/drivers/media/usb/au0828/au0828-input.c b/drivers/media/usb/au0828/au0828-input.c
index b0f067971979..3d6687f0407d 100644
--- a/drivers/media/usb/au0828/au0828-input.c
+++ b/drivers/media/usb/au0828/au0828-input.c
@@ -130,7 +130,7 @@ static int au0828_get_key_au8522(struct au0828_rc *ir)
 	bool first = true;
 
 	/* do nothing if device is disconnected */
-	if (ir->dev->dev_state == DEV_DISCONNECTED)
+	if (test_bit(DEV_DISCONNECTED, &ir->dev->dev_state))
 		return 0;
 
 	/* Check IR int */
@@ -260,7 +260,7 @@ static void au0828_rc_stop(struct rc_dev *rc)
 	cancel_delayed_work_sync(&ir->work);
 
 	/* do nothing if device is disconnected */
-	if (ir->dev->dev_state != DEV_DISCONNECTED) {
+	if (!test_bit(DEV_DISCONNECTED, &ir->dev->dev_state)) {
 		/* Disable IR */
 		au8522_rc_clear(ir, 0xe0, 1 << 4);
 	}
diff --git a/drivers/media/usb/au0828/au0828-video.c b/drivers/media/usb/au0828/au0828-video.c
index 36fde46ea2cf..7b2fe1b56039 100644
--- a/drivers/media/usb/au0828/au0828-video.c
+++ b/drivers/media/usb/au0828/au0828-video.c
@@ -104,14 +104,13 @@ static inline void print_err_status(struct au0828_dev *dev,
 
 static int check_dev(struct au0828_dev *dev)
 {
-	if (dev->dev_state & DEV_DISCONNECTED) {
+	if (test_bit(DEV_DISCONNECTED, &dev->dev_state)) {
 		pr_info("v4l2 ioctl: device not present\n");
 		return -ENODEV;
 	}
 
-	if (dev->dev_state & DEV_MISCONFIGURED) {
-		pr_info("v4l2 ioctl: device is misconfigured; "
-		       "close and open it again\n");
+	if (test_bit(DEV_MISCONFIGURED, &dev->dev_state)) {
+		pr_info("v4l2 ioctl: device is misconfigured; close and open it again\n");
 		return -EIO;
 	}
 	return 0;
@@ -519,8 +518,8 @@ static inline int au0828_isoc_copy(struct au0828_dev *dev, struct urb *urb)
 	if (!dev)
 		return 0;
 
-	if ((dev->dev_state & DEV_DISCONNECTED) ||
-	    (dev->dev_state & DEV_MISCONFIGURED))
+	if (test_bit(DEV_DISCONNECTED, &dev->dev_state) ||
+	    test_bit(DEV_MISCONFIGURED, &dev->dev_state))
 		return 0;
 
 	if (urb->status < 0) {
@@ -766,10 +765,10 @@ static int au0828_stream_interrupt(struct au0828_dev *dev)
 	int ret = 0;
 
 	dev->stream_state = STREAM_INTERRUPT;
-	if (dev->dev_state == DEV_DISCONNECTED)
+	if (test_bit(DEV_DISCONNECTED, &dev->dev_state))
 		return -ENODEV;
 	else if (ret) {
-		dev->dev_state = DEV_MISCONFIGURED;
+		set_bit(DEV_MISCONFIGURED, &dev->dev_state);
 		dprintk(1, "%s device is misconfigured!\n", __func__);
 		return ret;
 	}
@@ -958,7 +957,7 @@ static int au0828_v4l2_open(struct file *filp)
 	int ret;
 
 	dprintk(1,
-		"%s called std_set %d dev_state %d stream users %d users %d\n",
+		"%s called std_set %d dev_state %ld stream users %d users %d\n",
 		__func__, dev->std_set_in_tuner_core, dev->dev_state,
 		dev->streaming_users, dev->users);
 
@@ -977,7 +976,7 @@ static int au0828_v4l2_open(struct file *filp)
 		au0828_analog_stream_enable(dev);
 		au0828_analog_stream_reset(dev);
 		dev->stream_state = STREAM_OFF;
-		dev->dev_state |= DEV_INITIALIZED;
+		set_bit(DEV_INITIALIZED, &dev->dev_state);
 	}
 	dev->users++;
 	mutex_unlock(&dev->lock);
@@ -991,7 +990,7 @@ static int au0828_v4l2_close(struct file *filp)
 	struct video_device *vdev = video_devdata(filp);
 
 	dprintk(1,
-		"%s called std_set %d dev_state %d stream users %d users %d\n",
+		"%s called std_set %d dev_state %ld stream users %d users %d\n",
 		__func__, dev->std_set_in_tuner_core, dev->dev_state,
 		dev->streaming_users, dev->users);
 
@@ -1007,7 +1006,7 @@ static int au0828_v4l2_close(struct file *filp)
 		del_timer_sync(&dev->vbi_timeout);
 	}
 
-	if (dev->dev_state & DEV_DISCONNECTED)
+	if (test_bit(DEV_DISCONNECTED, &dev->dev_state))
 		goto end;
 
 	if (dev->users == 1) {
@@ -1036,7 +1035,7 @@ static void au0828_init_tuner(struct au0828_dev *dev)
 		.type = V4L2_TUNER_ANALOG_TV,
 	};
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	if (dev->std_set_in_tuner_core)
@@ -1108,7 +1107,7 @@ static int vidioc_querycap(struct file *file, void  *priv,
 	struct video_device *vdev = video_devdata(file);
 	struct au0828_dev *dev = video_drvdata(file);
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	strlcpy(cap->driver, "au0828", sizeof(cap->driver));
@@ -1151,7 +1150,7 @@ static int vidioc_g_fmt_vid_cap(struct file *file, void *priv,
 {
 	struct au0828_dev *dev = video_drvdata(file);
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	f->fmt.pix.width = dev->width;
@@ -1170,7 +1169,7 @@ static int vidioc_try_fmt_vid_cap(struct file *file, void *priv,
 {
 	struct au0828_dev *dev = video_drvdata(file);
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	return au0828_set_format(dev, VIDIOC_TRY_FMT, f);
@@ -1182,7 +1181,7 @@ static int vidioc_s_fmt_vid_cap(struct file *file, void *priv,
 	struct au0828_dev *dev = video_drvdata(file);
 	int rc;
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	rc = check_dev(dev);
@@ -1204,7 +1203,7 @@ static int vidioc_s_std(struct file *file, void *priv, v4l2_std_id norm)
 {
 	struct au0828_dev *dev = video_drvdata(file);
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	if (norm == dev->std)
@@ -1236,7 +1235,7 @@ static int vidioc_g_std(struct file *file, void *priv, v4l2_std_id *norm)
 {
 	struct au0828_dev *dev = video_drvdata(file);
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	*norm = dev->std;
@@ -1259,7 +1258,7 @@ static int vidioc_enum_input(struct file *file, void *priv,
 		[AU0828_VMUX_DEBUG] = "tv debug"
 	};
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	tmp = input->index;
@@ -1289,7 +1288,7 @@ static int vidioc_g_input(struct file *file, void *priv, unsigned int *i)
 {
 	struct au0828_dev *dev = video_drvdata(file);
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	*i = dev->ctrl_input;
@@ -1300,7 +1299,7 @@ static void au0828_s_input(struct au0828_dev *dev, int index)
 {
 	int i;
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	switch (AUVI_INPUT(index).type) {
@@ -1385,7 +1384,7 @@ static int vidioc_g_audio(struct file *file, void *priv, struct v4l2_audio *a)
 {
 	struct au0828_dev *dev = video_drvdata(file);
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	a->index = dev->ctrl_ainput;
@@ -1405,7 +1404,7 @@ static int vidioc_s_audio(struct file *file, void *priv, const struct v4l2_audio
 	if (a->index != dev->ctrl_ainput)
 		return -EINVAL;
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 	return 0;
 }
@@ -1417,7 +1416,7 @@ static int vidioc_g_tuner(struct file *file, void *priv, struct v4l2_tuner *t)
 	if (t->index != 0)
 		return -EINVAL;
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	strcpy(t->name, "Auvitek tuner");
@@ -1437,7 +1436,7 @@ static int vidioc_s_tuner(struct file *file, void *priv,
 	if (t->index != 0)
 		return -EINVAL;
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	au0828_init_tuner(dev);
@@ -1459,7 +1458,7 @@ static int vidioc_g_frequency(struct file *file, void *priv,
 
 	if (freq->tuner != 0)
 		return -EINVAL;
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 	freq->frequency = dev->ctrl_freq;
 	return 0;
@@ -1474,7 +1473,7 @@ static int vidioc_s_frequency(struct file *file, void *priv,
 	if (freq->tuner != 0)
 		return -EINVAL;
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	au0828_init_tuner(dev);
@@ -1500,7 +1499,7 @@ static int vidioc_g_fmt_vbi_cap(struct file *file, void *priv,
 {
 	struct au0828_dev *dev = video_drvdata(file);
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	format->fmt.vbi.samples_per_line = dev->vbi_width;
@@ -1526,7 +1525,7 @@ static int vidioc_cropcap(struct file *file, void *priv,
 	if (cc->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
 		return -EINVAL;
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	cc->bounds.left = 0;
@@ -1548,7 +1547,7 @@ static int vidioc_g_register(struct file *file, void *priv,
 {
 	struct au0828_dev *dev = video_drvdata(file);
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	reg->val = au0828_read(dev, reg->reg);
@@ -1561,7 +1560,7 @@ static int vidioc_s_register(struct file *file, void *priv,
 {
 	struct au0828_dev *dev = video_drvdata(file);
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	return au0828_writereg(dev, reg->reg, reg->val);
diff --git a/drivers/media/usb/au0828/au0828.h b/drivers/media/usb/au0828/au0828.h
index 60b59391ea2a..d1b6405a05a4 100644
--- a/drivers/media/usb/au0828/au0828.h
+++ b/drivers/media/usb/au0828/au0828.h
@@ -21,6 +21,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/bitops.h>
 #include <linux/usb.h>
 #include <linux/i2c.h>
 #include <linux/i2c-algo-bit.h>
@@ -122,9 +123,9 @@ enum au0828_stream_state {
 
 /* device state */
 enum au0828_dev_state {
-	DEV_INITIALIZED = 0x01,
-	DEV_DISCONNECTED = 0x02,
-	DEV_MISCONFIGURED = 0x04
+	DEV_INITIALIZED = 0,
+	DEV_DISCONNECTED = 1,
+	DEV_MISCONFIGURED = 2
 };
 
 struct au0828_dev;
@@ -248,7 +249,7 @@ struct au0828_dev {
 	int input_type;
 	int std_set_in_tuner_core;
 	unsigned int ctrl_input;
-	enum au0828_dev_state dev_state;
+	long unsigned int dev_state; /* defined at enum au0828_dev_state */;
 	enum au0828_stream_state stream_state;
 	wait_queue_head_t open;
 

From 4d4b032bd7bb43650a71dd804eb9bbb589d3d818 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Fri, 26 Feb 2016 08:21:35 -0300
Subject: [PATCH 402/797] coda: fix error path in case of missing pdata on
 non-DT platform

commit bc717d5e92c8c079280eb4acbe335c6f25041aa2 upstream.

If we bail out this early, v4l2_device_register() has not been called
yet, so no need to call v4l2_device_unregister().

Fixes: b7bd660a51f0 ("[media] coda: Call v4l2_device_unregister() from a single location")

Reported-by: Michael Olbrich <m.olbrich@pengutronix.de>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Reviewed-by: Fabio Estevam <fabio.estevam@nxp.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/platform/coda/coda-common.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/media/platform/coda/coda-common.c b/drivers/media/platform/coda/coda-common.c
index 15516a6e3a39..323aad3c89de 100644
--- a/drivers/media/platform/coda/coda-common.c
+++ b/drivers/media/platform/coda/coda-common.c
@@ -2119,14 +2119,12 @@ static int coda_probe(struct platform_device *pdev)
 
 	pdev_id = of_id ? of_id->data : platform_get_device_id(pdev);
 
-	if (of_id) {
+	if (of_id)
 		dev->devtype = of_id->data;
-	} else if (pdev_id) {
+	else if (pdev_id)
 		dev->devtype = &coda_devdata[pdev_id->driver_data];
-	} else {
-		ret = -EINVAL;
-		goto err_v4l2_register;
-	}
+	else
+		return -EINVAL;
 
 	spin_lock_init(&dev->irqlock);
 	INIT_LIST_HEAD(&dev->instances);

From 5da980e3a596b34825678f5892d2e8a23626049b Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Wed, 9 Sep 2015 11:38:56 -0300
Subject: [PATCH 403/797] v4l: vsp1: Set the SRU CTRL0 register when starting
 the stream

commit f6acfcdc5b8cdc9ddd53a459361820b9efe958c4 upstream.

Commit 58f896d859ce ("[media] v4l: vsp1: sru: Make the intensity
controllable during streaming") refactored the stream start code and
removed the SRU CTRL0 register write by mistake. Add it back.

Fixes: 58f896d859ce ("[media] v4l: vsp1: sru: Make the intensity controllable during streaming")

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/platform/vsp1/vsp1_sru.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/media/platform/vsp1/vsp1_sru.c b/drivers/media/platform/vsp1/vsp1_sru.c
index 6310acab60e7..d41ae950d1a1 100644
--- a/drivers/media/platform/vsp1/vsp1_sru.c
+++ b/drivers/media/platform/vsp1/vsp1_sru.c
@@ -154,6 +154,7 @@ static int sru_s_stream(struct v4l2_subdev *subdev, int enable)
 	mutex_lock(sru->ctrls.lock);
 	ctrl0 |= vsp1_sru_read(sru, VI6_SRU_CTRL0)
 	       & (VI6_SRU_CTRL0_PARAM0_MASK | VI6_SRU_CTRL0_PARAM1_MASK);
+	vsp1_sru_write(sru, VI6_SRU_CTRL0, ctrl0);
 	mutex_unlock(sru->ctrls.lock);
 
 	vsp1_sru_write(sru, VI6_SRU_CTRL1, VI6_SRU_CTRL1_PARAM5);

From e4f2138e3ce6dd2ab4f7cf0ebef8dd44320deedb Mon Sep 17 00:00:00 2001
From: Manuel Lauss <manuel.lauss@gmail.com>
Date: Wed, 2 Mar 2016 10:34:43 +0100
Subject: [PATCH 404/797] pcmcia: db1xxx_ss: fix last irq_to_gpio user

commit e34b6fcf9b09ec9d93503edd5f81489791ffd602 upstream.

remove the usage of removed irq_to_gpio() function.  On pre-DB1200
boards, pass the actual carddetect GPIO number instead of the IRQ,
because we need the gpio to actually test card status (inserted or
not) and can get the irq number with gpio_to_irq() instead.

Tested on DB1300 and DB1500, this patch fixes PCMCIA on the DB1500,
which used irq_to_gpio().

Fixes: 832f5dacfa0b ("MIPS: Remove all the uses of custom gpio.h")
Signed-off-by: Manuel Lauss <manuel.lauss@gmail.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Cc: linux-pcmcia@lists.infradead.org
Cc: Linux-MIPS <linux-mips@linux-mips.org>
Patchwork: https://patchwork.linux-mips.org/patch/12747/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/alchemy/devboards/db1000.c | 18 ++++++++----------
 arch/mips/alchemy/devboards/db1550.c |  4 ++--
 drivers/pcmcia/db1xxx_ss.c           | 11 +++++++++--
 3 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/arch/mips/alchemy/devboards/db1000.c b/arch/mips/alchemy/devboards/db1000.c
index bdeed9d13c6f..433c4b9a9f0a 100644
--- a/arch/mips/alchemy/devboards/db1000.c
+++ b/arch/mips/alchemy/devboards/db1000.c
@@ -503,15 +503,15 @@ int __init db1000_dev_setup(void)
 	if (board == BCSR_WHOAMI_DB1500) {
 		c0 = AU1500_GPIO2_INT;
 		c1 = AU1500_GPIO5_INT;
-		d0 = AU1500_GPIO0_INT;
-		d1 = AU1500_GPIO3_INT;
+		d0 = 0;	/* GPIO number, NOT irq! */
+		d1 = 3; /* GPIO number, NOT irq! */
 		s0 = AU1500_GPIO1_INT;
 		s1 = AU1500_GPIO4_INT;
 	} else if (board == BCSR_WHOAMI_DB1100) {
 		c0 = AU1100_GPIO2_INT;
 		c1 = AU1100_GPIO5_INT;
-		d0 = AU1100_GPIO0_INT;
-		d1 = AU1100_GPIO3_INT;
+		d0 = 0; /* GPIO number, NOT irq! */
+		d1 = 3; /* GPIO number, NOT irq! */
 		s0 = AU1100_GPIO1_INT;
 		s1 = AU1100_GPIO4_INT;
 
@@ -545,15 +545,15 @@ int __init db1000_dev_setup(void)
 	} else if (board == BCSR_WHOAMI_DB1000) {
 		c0 = AU1000_GPIO2_INT;
 		c1 = AU1000_GPIO5_INT;
-		d0 = AU1000_GPIO0_INT;
-		d1 = AU1000_GPIO3_INT;
+		d0 = 0; /* GPIO number, NOT irq! */
+		d1 = 3; /* GPIO number, NOT irq! */
 		s0 = AU1000_GPIO1_INT;
 		s1 = AU1000_GPIO4_INT;
 		platform_add_devices(db1000_devs, ARRAY_SIZE(db1000_devs));
 	} else if ((board == BCSR_WHOAMI_PB1500) ||
 		   (board == BCSR_WHOAMI_PB1500R2)) {
 		c0 = AU1500_GPIO203_INT;
-		d0 = AU1500_GPIO201_INT;
+		d0 = 1; /* GPIO number, NOT irq! */
 		s0 = AU1500_GPIO202_INT;
 		twosocks = 0;
 		flashsize = 64;
@@ -566,7 +566,7 @@ int __init db1000_dev_setup(void)
 		 */
 	} else if (board == BCSR_WHOAMI_PB1100) {
 		c0 = AU1100_GPIO11_INT;
-		d0 = AU1100_GPIO9_INT;
+		d0 = 9; /* GPIO number, NOT irq! */
 		s0 = AU1100_GPIO10_INT;
 		twosocks = 0;
 		flashsize = 64;
@@ -583,7 +583,6 @@ int __init db1000_dev_setup(void)
 	} else
 		return 0; /* unknown board, no further dev setup to do */
 
-	irq_set_irq_type(d0, IRQ_TYPE_EDGE_BOTH);
 	irq_set_irq_type(c0, IRQ_TYPE_LEVEL_LOW);
 	irq_set_irq_type(s0, IRQ_TYPE_LEVEL_LOW);
 
@@ -597,7 +596,6 @@ int __init db1000_dev_setup(void)
 		c0, d0, /*s0*/0, 0, 0);
 
 	if (twosocks) {
-		irq_set_irq_type(d1, IRQ_TYPE_EDGE_BOTH);
 		irq_set_irq_type(c1, IRQ_TYPE_LEVEL_LOW);
 		irq_set_irq_type(s1, IRQ_TYPE_LEVEL_LOW);
 
diff --git a/arch/mips/alchemy/devboards/db1550.c b/arch/mips/alchemy/devboards/db1550.c
index 5740bcfdfc7f..6c37b9326f41 100644
--- a/arch/mips/alchemy/devboards/db1550.c
+++ b/arch/mips/alchemy/devboards/db1550.c
@@ -514,7 +514,7 @@ static void __init db1550_devices(void)
 		AU1000_PCMCIA_MEM_PHYS_ADDR  + 0x000400000 - 1,
 		AU1000_PCMCIA_IO_PHYS_ADDR,
 		AU1000_PCMCIA_IO_PHYS_ADDR   + 0x000010000 - 1,
-		AU1550_GPIO3_INT, AU1550_GPIO0_INT,
+		AU1550_GPIO3_INT, 0,
 		/*AU1550_GPIO21_INT*/0, 0, 0);
 
 	db1x_register_pcmcia_socket(
@@ -524,7 +524,7 @@ static void __init db1550_devices(void)
 		AU1000_PCMCIA_MEM_PHYS_ADDR  + 0x004400000 - 1,
 		AU1000_PCMCIA_IO_PHYS_ADDR   + 0x004000000,
 		AU1000_PCMCIA_IO_PHYS_ADDR   + 0x004010000 - 1,
-		AU1550_GPIO5_INT, AU1550_GPIO1_INT,
+		AU1550_GPIO5_INT, 1,
 		/*AU1550_GPIO22_INT*/0, 0, 1);
 
 	platform_device_register(&db1550_nand_dev);
diff --git a/drivers/pcmcia/db1xxx_ss.c b/drivers/pcmcia/db1xxx_ss.c
index 4c2fa05b4589..944674ee3464 100644
--- a/drivers/pcmcia/db1xxx_ss.c
+++ b/drivers/pcmcia/db1xxx_ss.c
@@ -56,6 +56,7 @@ struct db1x_pcmcia_sock {
 	int	stschg_irq;	/* card-status-change irq */
 	int	card_irq;	/* card irq */
 	int	eject_irq;	/* db1200/pb1200 have these */
+	int	insert_gpio;	/* db1000 carddetect gpio */
 
 #define BOARD_TYPE_DEFAULT	0	/* most boards */
 #define BOARD_TYPE_DB1200	1	/* IRQs aren't gpios */
@@ -83,7 +84,7 @@ static int db1200_card_inserted(struct db1x_pcmcia_sock *sock)
 /* carddetect gpio: low-active */
 static int db1000_card_inserted(struct db1x_pcmcia_sock *sock)
 {
-	return !gpio_get_value(irq_to_gpio(sock->insert_irq));
+	return !gpio_get_value(sock->insert_gpio);
 }
 
 static int db1x_card_inserted(struct db1x_pcmcia_sock *sock)
@@ -457,9 +458,15 @@ static int db1x_pcmcia_socket_probe(struct platform_device *pdev)
 	r = platform_get_resource_byname(pdev, IORESOURCE_IRQ, "card");
 	sock->card_irq = r ? r->start : 0;
 
-	/* insert: irq which triggers on card insertion/ejection */
+	/* insert: irq which triggers on card insertion/ejection
+	 * BIG FAT NOTE: on DB1000/1100/1500/1550 we pass a GPIO here!
+	 */
 	r = platform_get_resource_byname(pdev, IORESOURCE_IRQ, "insert");
 	sock->insert_irq = r ? r->start : -1;
+	if (sock->board_type == BOARD_TYPE_DEFAULT) {
+		sock->insert_gpio = r ? r->start : -1;
+		sock->insert_irq = r ? gpio_to_irq(r->start) : -1;
+	}
 
 	/* stschg: irq which trigger on card status change (optional) */
 	r = platform_get_resource_byname(pdev, IORESOURCE_IRQ, "stschg");

From 06e2d7dd90cbafd2d911f86785b69cec1bcd3b02 Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Tue, 5 Apr 2016 11:13:39 +0200
Subject: [PATCH 405/797] rbd: use GFP_NOIO consistently for request
 allocations

commit 2224d879c7c0f85c14183ef82eb48bd875ceb599 upstream.

As of 5a60e87603c4c533492c515b7f62578189b03c9c, RBD object request
allocations are made via rbd_obj_request_create() with GFP_NOIO.
However, subsequent OSD request allocations in rbd_osd_req_create*()
use GFP_ATOMIC.

With heavy page cache usage (e.g. OSDs running on same host as krbd
client), rbd_osd_req_create() order-1 GFP_ATOMIC allocations have been
observed to fail, where direct reclaim would have allowed GFP_NOIO
allocations to succeed.

Suggested-by: Vlastimil Babka <vbabka@suse.cz>
Suggested-by: Neil Brown <neilb@suse.com>
Signed-off-by: David Disseldorp <ddiss@suse.de>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/block/rbd.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 81ea69fee7ca..fbdddd6f94b8 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1955,7 +1955,7 @@ static struct ceph_osd_request *rbd_osd_req_create(
 
 	osdc = &rbd_dev->rbd_client->client->osdc;
 	osd_req = ceph_osdc_alloc_request(osdc, snapc, num_ops, false,
-					  GFP_ATOMIC);
+					  GFP_NOIO);
 	if (!osd_req)
 		return NULL;	/* ENOMEM */
 
@@ -2004,7 +2004,7 @@ rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request)
 	rbd_dev = img_request->rbd_dev;
 	osdc = &rbd_dev->rbd_client->client->osdc;
 	osd_req = ceph_osdc_alloc_request(osdc, snapc, num_osd_ops,
-						false, GFP_ATOMIC);
+						false, GFP_NOIO);
 	if (!osd_req)
 		return NULL;	/* ENOMEM */
 
@@ -2506,7 +2506,7 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request,
 					bio_chain_clone_range(&bio_list,
 								&bio_offset,
 								clone_size,
-								GFP_ATOMIC);
+								GFP_NOIO);
 			if (!obj_request->bio_list)
 				goto out_unwind;
 		} else if (type == OBJ_REQUEST_PAGES) {

From 5f6e35c2f9464c8b67b54ea5a919105c5f46f514 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sun, 3 Apr 2016 15:23:37 +0300
Subject: [PATCH 406/797] virtio: virtio 1.0 cs04 spec compliance for reset

commit 05dbcb430795b2e1fb1d5c757f8619d3dbed0a1c upstream.

The spec says: after writing 0 to device_status, the driver MUST wait
for a read of device_status to return 0 before reinitializing the
device.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/virtio/virtio_pci_modern.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index 8e5cf194cc0b..4469202eaa8e 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -17,6 +17,7 @@
  *
  */
 
+#include <linux/delay.h>
 #define VIRTIO_PCI_NO_LEGACY
 #include "virtio_pci_common.h"
 
@@ -271,9 +272,13 @@ static void vp_reset(struct virtio_device *vdev)
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	/* 0 status means a reset. */
 	vp_iowrite8(0, &vp_dev->common->device_status);
-	/* Flush out the status write, and flush in device writes,
-	 * including MSI-X interrupts, if any. */
-	vp_ioread8(&vp_dev->common->device_status);
+	/* After writing 0 to device_status, the driver MUST wait for a read of
+	 * device_status to return 0 before reinitializing the device.
+	 * This will flush out the status write, and flush in device writes,
+	 * including MSI-X interrupts, if any.
+	 */
+	while (vp_ioread8(&vp_dev->common->device_status))
+		msleep(1);
 	/* Flush pending VQ/configuration callbacks. */
 	vp_synchronize_vectors(vdev);
 }

From 7de4ebacd23756a6862e5691f326b807ccc4da91 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 31 Mar 2016 17:22:45 +0200
Subject: [PATCH 407/797] mac80211: properly deal with station hashtable insert
 errors

commit 62b14b241ca6f790a17ccd9dd9f62ce1b006d406 upstream.

The original hand-implemented hash-table in mac80211 couldn't result
in insertion errors, and while converting to rhashtable I evidently
forgot to check the errors.

This surfaced now only because Ben is adding many identical keys and
that resulted in hidden insertion errors.

Fixes: 7bedd0cfad4e1 ("mac80211: use rhashtable for station table")
Reported-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/mac80211/sta_info.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index f91d1873218c..4e2dbe52fd9c 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -256,11 +256,11 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta)
 }
 
 /* Caller must hold local->sta_mtx */
-static void sta_info_hash_add(struct ieee80211_local *local,
-			      struct sta_info *sta)
+static int sta_info_hash_add(struct ieee80211_local *local,
+			     struct sta_info *sta)
 {
-	rhashtable_insert_fast(&local->sta_hash, &sta->hash_node,
-			       sta_rht_params);
+	return rhashtable_insert_fast(&local->sta_hash, &sta->hash_node,
+				      sta_rht_params);
 }
 
 static void sta_deliver_ps_frames(struct work_struct *wk)
@@ -503,7 +503,9 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
 	set_sta_flag(sta, WLAN_STA_BLOCK_BA);
 
 	/* make the station visible */
-	sta_info_hash_add(local, sta);
+	err = sta_info_hash_add(local, sta);
+	if (err)
+		goto out_drop_sta;
 
 	list_add_tail_rcu(&sta->list, &local->sta_list);
 
@@ -538,6 +540,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
  out_remove:
 	sta_info_hash_del(local, sta);
 	list_del_rcu(&sta->list);
+ out_drop_sta:
 	local->num_sta--;
 	synchronize_net();
 	__cleanup_single_sta(sta);

From 9da98ac378b2aaccdaa02ba10c87123eb997ff22 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 26 Jan 2016 23:05:31 +0100
Subject: [PATCH 408/797] mac80211: avoid excessive stack usage in sta_info

commit 0ef049dc1167fe834d0ad5d63f89eddc5c70f6e4 upstream.

When CONFIG_OPTIMIZE_INLINING is set, the sta_info_insert_finish
function consumes more stack than normally, exceeding the
1024 byte limit on ARM:

net/mac80211/sta_info.c: In function 'sta_info_insert_finish':
net/mac80211/sta_info.c:561:1: error: the frame size of 1080 bytes is larger than 1024 bytes [-Werror=frame-larger-than=]

It turns out that there are two functions that put a 'struct station_info'
on the stack: __sta_info_destroy_part2 and sta_info_insert_finish, and
this structure alone requires up to 792 bytes.

Hoping that both are called rarely enough, this replaces the
on-stack structure with a dynamic allocation, which unfortunately
requires some suboptimal error handling for out-of-memory.

The __sta_info_destroy_part2 function is actually affected by the
stack usage twice because it calls cfg80211_del_sta_sinfo(), which
has another instance of struct station_info on its stack.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 98b6218388e3 ("mac80211/cfg80211: add station events")
Fixes: 6f7a8d26e266 ("mac80211: send statistics with delete station event")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/mac80211/sta_info.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 4e2dbe52fd9c..67066d048e6f 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -484,11 +484,17 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
 {
 	struct ieee80211_local *local = sta->local;
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
-	struct station_info sinfo;
+	struct station_info *sinfo;
 	int err = 0;
 
 	lockdep_assert_held(&local->sta_mtx);
 
+	sinfo = kzalloc(sizeof(struct station_info), GFP_KERNEL);
+	if (!sinfo) {
+		err = -ENOMEM;
+		goto out_err;
+	}
+
 	/* check if STA exists already */
 	if (sta_info_get_bss(sdata, sta->sta.addr)) {
 		err = -EEXIST;
@@ -522,10 +528,9 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
 	ieee80211_sta_debugfs_add(sta);
 	rate_control_add_sta_debugfs(sta);
 
-	memset(&sinfo, 0, sizeof(sinfo));
-	sinfo.filled = 0;
-	sinfo.generation = local->sta_generation;
-	cfg80211_new_sta(sdata->dev, sta->sta.addr, &sinfo, GFP_KERNEL);
+	sinfo->generation = local->sta_generation;
+	cfg80211_new_sta(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL);
+	kfree(sinfo);
 
 	sta_dbg(sdata, "Inserted STA %pM\n", sta->sta.addr);
 
@@ -885,7 +890,7 @@ static void __sta_info_destroy_part2(struct sta_info *sta)
 {
 	struct ieee80211_local *local = sta->local;
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
-	struct station_info sinfo = {};
+	struct station_info *sinfo;
 	int ret;
 
 	/*
@@ -923,8 +928,11 @@ static void __sta_info_destroy_part2(struct sta_info *sta)
 
 	sta_dbg(sdata, "Removed STA %pM\n", sta->sta.addr);
 
-	sta_set_sinfo(sta, &sinfo);
-	cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, &sinfo, GFP_KERNEL);
+	sinfo = kzalloc(sizeof(*sinfo), GFP_KERNEL);
+	if (sinfo)
+		sta_set_sinfo(sta, sinfo);
+	cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL);
+	kfree(sinfo);
 
 	rate_control_remove_sta_debugfs(sta);
 	ieee80211_sta_debugfs_remove(sta);

From 2c0824ac94b5bce8d07c2aa806e311ddfdcd46fe Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Mon, 25 Jan 2016 15:46:35 +0200
Subject: [PATCH 409/797] mac80211: fix ibss scan parameters

commit d321cd014e51baab475efbdec468255b9e0ec822 upstream.

When joining IBSS a full scan should be initiated in order to search
for existing cell, unless the fixed_channel parameter was set.
A default channel to create the IBSS on if no cell was found is
provided as well.
However - a scan is initiated only on the default channel provided
regardless of whether ifibss->fixed_channel is set or not, with the
obvious result of the cell not joining existing IBSS cell that is
on another channel.

Fixes: 76bed0f43b27 ("mac80211: IBSS fix scan request")
Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/mac80211/ibss.c | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 6a12b0f5cac8..980e9e9b6684 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -7,6 +7,7 @@
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2009, Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
+ * Copyright(c) 2016 Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -1484,14 +1485,21 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
 
 		sdata_info(sdata, "Trigger new scan to find an IBSS to join\n");
 
-		num = ieee80211_ibss_setup_scan_channels(local->hw.wiphy,
-							 &ifibss->chandef,
-							 channels,
-							 ARRAY_SIZE(channels));
 		scan_width = cfg80211_chandef_to_scan_width(&ifibss->chandef);
-		ieee80211_request_ibss_scan(sdata, ifibss->ssid,
-					    ifibss->ssid_len, channels, num,
-					    scan_width);
+
+		if (ifibss->fixed_channel) {
+			num = ieee80211_ibss_setup_scan_channels(local->hw.wiphy,
+								 &ifibss->chandef,
+								 channels,
+								 ARRAY_SIZE(channels));
+			ieee80211_request_ibss_scan(sdata, ifibss->ssid,
+						    ifibss->ssid_len, channels,
+						    num, scan_width);
+		} else {
+			ieee80211_request_ibss_scan(sdata, ifibss->ssid,
+						    ifibss->ssid_len, NULL,
+						    0, scan_width);
+		}
 	} else {
 		int interval = IEEE80211_SCAN_INTERVAL;
 

From 542afcb4e070c96ddf87436b49034a0c89ccd95b Mon Sep 17 00:00:00 2001
From: Michal Kazior <michal.kazior@tieto.com>
Date: Mon, 25 Jan 2016 14:43:24 +0100
Subject: [PATCH 410/797] mac80211: fix unnecessary frame drops in mesh fwding

commit cf44012810ccdd8fd947518e965cb04b7b8498be upstream.

The ieee80211_queue_stopped() expects hw queue
number but it was given raw WMM AC number instead.

This could cause frame drops and problems with
traffic in some cases - most notably if driver
doesn't map AC numbers to queue numbers 1:1 and
uses ieee80211_stop_queues() and
ieee80211_wake_queue() only without ever calling
ieee80211_wake_queues().

On ath10k it was possible to hit this problem in
the following case:

  1. wlan0 uses queue 0
     (ath10k maps queues per vif)
  2. offchannel uses queue 15
  3. queues 1-14 are unused
  4. ieee80211_stop_queues()
  5. ieee80211_wake_queue(q=0)
  6. ieee80211_wake_queue(q=15)
     (other queues are not woken up because both
      driver and mac80211 know other queues are
      unused)
  7. ieee80211_rx_h_mesh_fwding()
  8. ieee80211_select_queue_80211() returns 2
  9. ieee80211_queue_stopped(q=2) returns true
 10. frame is dropped (oops!)

Fixes: d3c1597b8d1b ("mac80211: fix forwarded mesh frame queue mapping")
Signed-off-by: Michal Kazior <michal.kazior@tieto.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/mac80211/rx.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 4cbf36cae806..a3bb8f7f5fc5 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2250,7 +2250,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 	struct ieee80211_local *local = rx->local;
 	struct ieee80211_sub_if_data *sdata = rx->sdata;
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
-	u16 q, hdrlen;
+	u16 ac, q, hdrlen;
 
 	hdr = (struct ieee80211_hdr *) skb->data;
 	hdrlen = ieee80211_hdrlen(hdr->frame_control);
@@ -2319,7 +2319,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 	    ether_addr_equal(sdata->vif.addr, hdr->addr3))
 		return RX_CONTINUE;
 
-	q = ieee80211_select_queue_80211(sdata, skb, hdr);
+	ac = ieee80211_select_queue_80211(sdata, skb, hdr);
+	q = sdata->vif.hw_queue[ac];
 	if (ieee80211_queue_stopped(&local->hw, q)) {
 		IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_congestion);
 		return RX_DROP_MONITOR;

From d70ab4ad8769944e2e01e678d1c04318d15a5332 Mon Sep 17 00:00:00 2001
From: Michal Kazior <michal.kazior@tieto.com>
Date: Thu, 21 Jan 2016 14:23:07 +0100
Subject: [PATCH 411/797] mac80211: fix txq queue related crashes

commit 2a58d42c1e018ad514d4e23fd33fb2ded95d3ee6 upstream.

The driver can access the queue simultanously
while mac80211 tears down the interface. Without
spinlock protection this could lead to corrupting
sk_buff_head and subsequently to an invalid
pointer dereference.

Fixes: ba8c3d6f16a1 ("mac80211: add an intermediate software queue implementation")
Signed-off-by: Michal Kazior <michal.kazior@tieto.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/mac80211/iface.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index c9e325d2e120..7a2b7915093b 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -977,7 +977,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 	if (sdata->vif.txq) {
 		struct txq_info *txqi = to_txq_info(sdata->vif.txq);
 
+		spin_lock_bh(&txqi->queue.lock);
 		ieee80211_purge_tx_queue(&local->hw, &txqi->queue);
+		spin_unlock_bh(&txqi->queue.lock);
+
 		atomic_set(&sdata->txqs_len[txqi->txq.ac], 0);
 	}
 

From f10d159204b6cdfba4d111f52a2668e960f8dd10 Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Thu, 10 Mar 2016 11:30:14 +0900
Subject: [PATCH 412/797] usb: renesas_usbhs: avoid NULL pointer derefernce in
 usbhsf_pkt_handler()

commit 894f2fc44f2f3f48c36c973b1123f6ab298be160 upstream.

When unexpected situation happened (e.g. tx/rx irq happened while
DMAC is used), the usbhsf_pkt_handler() was possible to cause NULL
pointer dereference like the followings:

Unable to handle kernel NULL pointer dereference at virtual address 00000000
pgd = c0004000
[00000000] *pgd=00000000
Internal error: Oops: 80000007 [#1] SMP ARM
Modules linked in: usb_f_acm u_serial g_serial libcomposite
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.5.0-rc6-00842-gac57066-dirty #63
Hardware name: Generic R8A7790 (Flattened Device Tree)
task: c0729c00 ti: c0724000 task.ti: c0724000
PC is at 0x0
LR is at usbhsf_pkt_handler+0xac/0x118
pc : [<00000000>]    lr : [<c03257e0>]    psr: 60000193
sp : c0725db8  ip : 00000000  fp : c0725df4
r10: 00000001  r9 : 00000193  r8 : ef3ccab4
r7 : ef3cca10  r6 : eea4586c  r5 : 00000000  r4 : ef19ceb4
r3 : 00000000  r2 : 0000009c  r1 : c0725dc4  r0 : ef19ceb4

This patch adds a condition to avoid the dereference.

Fixes: e73a989 ("usb: renesas_usbhs: add DMAEngine support")
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/renesas_usbhs/fifo.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c
index c0f5c652d272..32df73820726 100644
--- a/drivers/usb/renesas_usbhs/fifo.c
+++ b/drivers/usb/renesas_usbhs/fifo.c
@@ -190,7 +190,8 @@ static int usbhsf_pkt_handler(struct usbhs_pipe *pipe, int type)
 		goto __usbhs_pkt_handler_end;
 	}
 
-	ret = func(pkt, &is_done);
+	if (likely(func))
+		ret = func(pkt, &is_done);
 
 	if (is_done)
 		__usbhsf_pkt_del(pkt);

From 46e081b3015dd0d97e443babf0a7ec2058637d83 Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Thu, 10 Mar 2016 11:30:15 +0900
Subject: [PATCH 413/797] usb: renesas_usbhs: disable TX IRQ before starting TX
 DMAC transfer

commit 6490865c67825277b29638e839850882600b48ec upstream.

This patch adds a code to surely disable TX IRQ of the pipe before
starting TX DMAC transfer. Otherwise, a lot of unnecessary TX IRQs
may happen in rare cases when DMAC is used.

Fixes: e73a989 ("usb: renesas_usbhs: add DMAEngine support")
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/renesas_usbhs/fifo.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c
index 32df73820726..f1893e08e51a 100644
--- a/drivers/usb/renesas_usbhs/fifo.c
+++ b/drivers/usb/renesas_usbhs/fifo.c
@@ -890,6 +890,7 @@ static int usbhsf_dma_prepare_push(struct usbhs_pkt *pkt, int *is_done)
 
 	pkt->trans = len;
 
+	usbhsf_tx_irq_ctrl(pipe, 0);
 	INIT_WORK(&pkt->work, xfer_work);
 	schedule_work(&pkt->work);
 

From 277882f94e1d2e82285eb3dc91944634174a9b18 Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Mon, 4 Apr 2016 20:40:20 +0900
Subject: [PATCH 414/797] usb: renesas_usbhs: fix to avoid using a disabled ep
 in usbhsg_queue_done()

commit 4fccb0767fdbdb781a9c5b5c15ee7b219443c89d upstream.

This patch fixes an issue that usbhsg_queue_done() may cause kernel
panic when dma callback is running and usb_ep_disable() is called
by interrupt handler. (Especially, we can reproduce this issue using
g_audio with usb-dmac driver.)

For example of a flow:
 usbhsf_dma_complete (on tasklet)
  --> usbhsf_pkt_handler (on tasklet)
   --> usbhsg_queue_done (on tasklet)
    *** interrupt happened and usb_ep_disable() is called ***
    --> usbhsg_queue_pop (on tasklet)
     Then, oops happened.

Fixes: e73a989 ("usb: renesas_usbhs: add DMAEngine support")
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/renesas_usbhs/mod_gadget.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c
index 8f7a78e70975..fa14198daf77 100644
--- a/drivers/usb/renesas_usbhs/mod_gadget.c
+++ b/drivers/usb/renesas_usbhs/mod_gadget.c
@@ -158,10 +158,14 @@ static void usbhsg_queue_done(struct usbhs_priv *priv, struct usbhs_pkt *pkt)
 	struct usbhs_pipe *pipe = pkt->pipe;
 	struct usbhsg_uep *uep = usbhsg_pipe_to_uep(pipe);
 	struct usbhsg_request *ureq = usbhsg_pkt_to_ureq(pkt);
+	unsigned long flags;
 
 	ureq->req.actual = pkt->actual;
 
-	usbhsg_queue_pop(uep, ureq, 0);
+	usbhs_lock(priv, flags);
+	if (uep)
+		__usbhsg_queue_pop(uep, ureq, 0);
+	usbhs_unlock(priv, flags);
 }
 
 static void usbhsg_queue_push(struct usbhsg_uep *uep,

From 01e647a4aec55a156efccdf542579830fd1516eb Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 29 Mar 2016 22:27:27 +0200
Subject: [PATCH 415/797] iio: st_magn: always define ST_MAGN_TRIGGER_SET_STATE

commit 9b090a98e95c2530ef0ce474e3b6218621b8ae25 upstream.

When CONFIG_IIO_TRIGGER is enabled but CONFIG_IIO_BUFFER is
not, we get a build error in the st_magn driver:

drivers/iio/magnetometer/st_magn_core.c:573:23: error: 'ST_MAGN_TRIGGER_SET_STATE' undeclared here (not in a function)
  .set_trigger_state = ST_MAGN_TRIGGER_SET_STATE,
                       ^~~~~~~~~~~~~~~~~~~~~~~~~

Apparently, this ST_MAGN_TRIGGER_SET_STATE macro was meant to
be set to NULL when the definition is not available because
st_magn_buffer.c is not compiled, but the alternative definition
was not included in the original patch. This adds it.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 74f5683f35fe ("iio: st_magn: Add irq trigger handling")
Acked-by: Denis Ciocca <denis.ciocca@st.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iio/magnetometer/st_magn.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iio/magnetometer/st_magn.h b/drivers/iio/magnetometer/st_magn.h
index 06a4d9c35581..9daca4681922 100644
--- a/drivers/iio/magnetometer/st_magn.h
+++ b/drivers/iio/magnetometer/st_magn.h
@@ -44,6 +44,7 @@ static inline int st_magn_allocate_ring(struct iio_dev *indio_dev)
 static inline void st_magn_deallocate_ring(struct iio_dev *indio_dev)
 {
 }
+#define ST_MAGN_TRIGGER_SET_STATE NULL
 #endif /* CONFIG_IIO_BUFFER */
 
 #endif /* ST_MAGN_H */

From 82bd14ade909c156f297ad34f0e6935931c6149f Mon Sep 17 00:00:00 2001
From: Irina Tirdea <irina.tirdea@intel.com>
Date: Tue, 29 Mar 2016 15:35:45 +0300
Subject: [PATCH 416/797] iio: accel: bmc150: fix endianness when reading axes

commit 2215f31dc6f88634c1916362e922b1ecdce0a6b3 upstream.

For big endian platforms, reading the axes will return
invalid values.

The device stores each axis value in a 16 bit little
endian register. The driver uses regmap_read_bulk to get
the axis value, resulting in a 16 bit little endian value.
This needs to be converted to cpu endianness to work
on big endian platforms.

Fix endianness for big endian platforms by converting
the values for the axes read from little endian to
cpu.

This is also partially fixed in commit b6fb9b6d6552 ("iio:
accel: bmc150: optimize transfers in trigger handler").

Signed-off-by: Irina Tirdea <irina.tirdea@intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iio/accel/bmc150-accel-core.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/accel/bmc150-accel-core.c b/drivers/iio/accel/bmc150-accel-core.c
index 2d33f1e821db..291c61a41c9a 100644
--- a/drivers/iio/accel/bmc150-accel-core.c
+++ b/drivers/iio/accel/bmc150-accel-core.c
@@ -547,7 +547,7 @@ static int bmc150_accel_get_axis(struct bmc150_accel_data *data,
 {
 	int ret;
 	int axis = chan->scan_index;
-	unsigned int raw_val;
+	__le16 raw_val;
 
 	mutex_lock(&data->mutex);
 	ret = bmc150_accel_set_power_state(data, true);
@@ -557,14 +557,14 @@ static int bmc150_accel_get_axis(struct bmc150_accel_data *data,
 	}
 
 	ret = regmap_bulk_read(data->regmap, BMC150_ACCEL_AXIS_TO_REG(axis),
-			       &raw_val, 2);
+			       &raw_val, sizeof(raw_val));
 	if (ret < 0) {
 		dev_err(data->dev, "Error reading axis %d\n", axis);
 		bmc150_accel_set_power_state(data, false);
 		mutex_unlock(&data->mutex);
 		return ret;
 	}
-	*val = sign_extend32(raw_val >> chan->scan_type.shift,
+	*val = sign_extend32(le16_to_cpu(raw_val) >> chan->scan_type.shift,
 			     chan->scan_type.realbits - 1);
 	ret = bmc150_accel_set_power_state(data, false);
 	mutex_unlock(&data->mutex);
@@ -988,6 +988,7 @@ static const struct iio_event_spec bmc150_accel_event = {
 		.realbits = (bits),					\
 		.storagebits = 16,					\
 		.shift = 16 - (bits),					\
+		.endianness = IIO_LE,					\
 	},								\
 	.event_spec = &bmc150_accel_event,				\
 	.num_event_specs = 1						\

From 5a103e7d6bf05ff4d053ee73a001d255bbc8951a Mon Sep 17 00:00:00 2001
From: Irina Tirdea <irina.tirdea@intel.com>
Date: Mon, 28 Mar 2016 20:15:46 +0300
Subject: [PATCH 417/797] iio: gyro: bmg160: fix buffer read values

commit b475c59b113db1e66eb9527ffdec3c5241c847e5 upstream.

When reading gyroscope axes using iio buffers, the values
returned are always 0. In the interrupt handler, the return
value of the read operation is returned to the user instead
of the value read. Return the value read to the user.

This is also fixed in commit 82d8e5da1a33 ("iio:
accel: bmg160: optimize transfers in trigger handler").

Signed-off-by: Irina Tirdea <irina.tirdea@intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iio/gyro/bmg160_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/gyro/bmg160_core.c b/drivers/iio/gyro/bmg160_core.c
index 02ff789852a0..0d3edf97ec3a 100644
--- a/drivers/iio/gyro/bmg160_core.c
+++ b/drivers/iio/gyro/bmg160_core.c
@@ -780,7 +780,7 @@ static irqreturn_t bmg160_trigger_handler(int irq, void *p)
 			mutex_unlock(&data->mutex);
 			goto err;
 		}
-		data->buffer[i++] = ret;
+		data->buffer[i++] = val;
 	}
 	mutex_unlock(&data->mutex);
 

From 51789682df3111fa675c1bc0a8498e142fb97bdf Mon Sep 17 00:00:00 2001
From: Irina Tirdea <irina.tirdea@intel.com>
Date: Tue, 29 Mar 2016 15:37:30 +0300
Subject: [PATCH 418/797] iio: gyro: bmg160: fix endianness when reading axes

commit 95e7ff034175db7d8aefabe7716c4d42bea24fde upstream.

For big endian platforms, reading the axes will return
invalid values.

The device stores each axis value in a 16 bit little
endian register. The driver uses regmap_read_bulk to get
the axis value, resulting in a 16 bit little endian value.
This needs to be converted to cpu endianness to work
on big endian platforms.

Fix endianness for big endian platforms by converting
the values for the axes read from little endian to
cpu.

This is also partially fixed in commit 82d8e5da1a33 ("iio:
accel: bmg160: optimize transfers in trigger handler").

Signed-off-by: Irina Tirdea <irina.tirdea@intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iio/gyro/bmg160_core.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/gyro/bmg160_core.c b/drivers/iio/gyro/bmg160_core.c
index 0d3edf97ec3a..acb3b303d800 100644
--- a/drivers/iio/gyro/bmg160_core.c
+++ b/drivers/iio/gyro/bmg160_core.c
@@ -452,7 +452,7 @@ static int bmg160_get_temp(struct bmg160_data *data, int *val)
 static int bmg160_get_axis(struct bmg160_data *data, int axis, int *val)
 {
 	int ret;
-	unsigned int raw_val;
+	__le16 raw_val;
 
 	mutex_lock(&data->mutex);
 	ret = bmg160_set_power_state(data, true);
@@ -462,7 +462,7 @@ static int bmg160_get_axis(struct bmg160_data *data, int axis, int *val)
 	}
 
 	ret = regmap_bulk_read(data->regmap, BMG160_AXIS_TO_REG(axis), &raw_val,
-			       2);
+			       sizeof(raw_val));
 	if (ret < 0) {
 		dev_err(data->dev, "Error reading axis %d\n", axis);
 		bmg160_set_power_state(data, false);
@@ -470,7 +470,7 @@ static int bmg160_get_axis(struct bmg160_data *data, int axis, int *val)
 		return ret;
 	}
 
-	*val = sign_extend32(raw_val, 15);
+	*val = sign_extend32(le16_to_cpu(raw_val), 15);
 	ret = bmg160_set_power_state(data, false);
 	mutex_unlock(&data->mutex);
 	if (ret < 0)
@@ -733,6 +733,7 @@ static const struct iio_event_spec bmg160_event = {
 		.sign = 's',						\
 		.realbits = 16,					\
 		.storagebits = 16,					\
+		.endianness = IIO_LE,					\
 	},								\
 	.event_spec = &bmg160_event,					\
 	.num_event_specs = 1						\

From a7ac655b62b8855116049726347a11d054efd01b Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Mon, 28 Mar 2016 21:18:56 -0400
Subject: [PATCH 419/797] sd: Fix excessive capacity printing on devices with
 blocks bigger than 512 bytes

commit f08bb1e0dbdd0297258d0b8cd4dbfcc057e57b2a upstream.

During revalidate we check whether device capacity has changed before we
decide whether to output disk information or not.

The check for old capacity failed to take into account that we scaled
sdkp->capacity based on the reported logical block size. And therefore
the capacity test would always fail for devices with sectors bigger than
512 bytes and we would print several copies of the same discovery
information.

Avoid scaling sdkp->capacity and instead adjust the value on the fly
when setting the block device capacity and generating fake C/H/S
geometry.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reported-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Hannes Reinicke <hare@suse.de>
Reviewed-by: Ewan Milne <emilne@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/sd.c | 28 ++++++++--------------------
 drivers/scsi/sd.h |  7 ++++++-
 2 files changed, 14 insertions(+), 21 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index cc84ea7d09cc..0d7c6e86f149 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1275,18 +1275,19 @@ static int sd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk);
 	struct scsi_device *sdp = sdkp->device;
 	struct Scsi_Host *host = sdp->host;
+	sector_t capacity = logical_to_sectors(sdp, sdkp->capacity);
 	int diskinfo[4];
 
 	/* default to most commonly used values */
-        diskinfo[0] = 0x40;	/* 1 << 6 */
-       	diskinfo[1] = 0x20;	/* 1 << 5 */
-       	diskinfo[2] = sdkp->capacity >> 11;
-	
+	diskinfo[0] = 0x40;	/* 1 << 6 */
+	diskinfo[1] = 0x20;	/* 1 << 5 */
+	diskinfo[2] = capacity >> 11;
+
 	/* override with calculated, extended default, or driver values */
 	if (host->hostt->bios_param)
-		host->hostt->bios_param(sdp, bdev, sdkp->capacity, diskinfo);
+		host->hostt->bios_param(sdp, bdev, capacity, diskinfo);
 	else
-		scsicam_bios_param(bdev, sdkp->capacity, diskinfo);
+		scsicam_bios_param(bdev, capacity, diskinfo);
 
 	geo->heads = diskinfo[0];
 	geo->sectors = diskinfo[1];
@@ -2337,14 +2338,6 @@ sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer)
 	if (sdkp->capacity > 0xffffffff)
 		sdp->use_16_for_rw = 1;
 
-	/* Rescale capacity to 512-byte units */
-	if (sector_size == 4096)
-		sdkp->capacity <<= 3;
-	else if (sector_size == 2048)
-		sdkp->capacity <<= 2;
-	else if (sector_size == 1024)
-		sdkp->capacity <<= 1;
-
 	blk_queue_physical_block_size(sdp->request_queue,
 				      sdkp->physical_block_size);
 	sdkp->device->sector_size = sector_size;
@@ -2812,11 +2805,6 @@ static int sd_try_extended_inquiry(struct scsi_device *sdp)
 	return 0;
 }
 
-static inline u32 logical_to_sectors(struct scsi_device *sdev, u32 blocks)
-{
-	return blocks << (ilog2(sdev->sector_size) - 9);
-}
-
 /**
  *	sd_revalidate_disk - called the first time a new disk is seen,
  *	performs disk spin up, read_capacity, etc.
@@ -2900,7 +2888,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
 	/* Combine with controller limits */
 	q->limits.max_sectors = min(rw_max, queue_max_hw_sectors(q));
 
-	set_capacity(disk, sdkp->capacity);
+	set_capacity(disk, logical_to_sectors(sdp, sdkp->capacity));
 	sd_config_write_same(sdkp);
 	kfree(buffer);
 
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 5f2a84aff29f..654630bb7d0e 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -65,7 +65,7 @@ struct scsi_disk {
 	struct device	dev;
 	struct gendisk	*disk;
 	atomic_t	openers;
-	sector_t	capacity;	/* size in 512-byte sectors */
+	sector_t	capacity;	/* size in logical blocks */
 	u32		max_xfer_blocks;
 	u32		opt_xfer_blocks;
 	u32		max_ws_blocks;
@@ -146,6 +146,11 @@ static inline int scsi_medium_access_command(struct scsi_cmnd *scmd)
 	return 0;
 }
 
+static inline sector_t logical_to_sectors(struct scsi_device *sdev, sector_t blocks)
+{
+	return blocks << (ilog2(sdev->sector_size) - 9);
+}
+
 /*
  * A DIF-capable target device can be formatted with different
  * protection schemes.  Currently 0 through 3 are defined:

From c452dfc33274832a0f23b80ff2829b6fae9dd95d Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Sat, 26 Mar 2016 16:14:37 -0400
Subject: [PATCH 420/797] fs: add file_dentry()

commit d101a125954eae1d397adda94ca6319485a50493 upstream.

This series fixes bugs in nfs and ext4 due to 4bacc9c9234c ("overlayfs:
Make f_path always point to the overlay and f_inode to the underlay").

Regular files opened on overlayfs will result in the file being opened on
the underlying filesystem, while f_path points to the overlayfs
mount/dentry.

This confuses filesystems which get the dentry from struct file and assume
it's theirs.

Add a new helper, file_dentry() [*], to get the filesystem's own dentry
from the file.  This checks file->f_path.dentry->d_flags against
DCACHE_OP_REAL, and returns file->f_path.dentry if DCACHE_OP_REAL is not
set (this is the common, non-overlayfs case).

In the uncommon case it will call into overlayfs's ->d_real() to get the
underlying dentry, matching file_inode(file).

The reason we need to check against the inode is that if the file is copied
up while being open, d_real() would return the upper dentry, while the open
file comes from the lower dentry.

[*] If possible, it's better simply to use file_inode() instead.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Tested-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Reviewed-by: Trond Myklebust <trond.myklebust@primarydata.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Daniel Axtens <dja@axtens.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/dcache.c            |  5 ++++-
 fs/overlayfs/super.c   | 33 +++++++++++++++++++++++++++++++++
 include/linux/dcache.h | 10 ++++++++++
 include/linux/fs.h     | 10 ++++++++++
 4 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 877bcbbd03ff..18effa378f97 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1666,7 +1666,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
 				DCACHE_OP_REVALIDATE	|
 				DCACHE_OP_WEAK_REVALIDATE	|
 				DCACHE_OP_DELETE	|
-				DCACHE_OP_SELECT_INODE));
+				DCACHE_OP_SELECT_INODE	|
+				DCACHE_OP_REAL));
 	dentry->d_op = op;
 	if (!op)
 		return;
@@ -1684,6 +1685,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
 		dentry->d_flags |= DCACHE_OP_PRUNE;
 	if (op->d_select_inode)
 		dentry->d_flags |= DCACHE_OP_SELECT_INODE;
+	if (op->d_real)
+		dentry->d_flags |= DCACHE_OP_REAL;
 
 }
 EXPORT_SYMBOL(d_set_d_op);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 000b2ed05c29..a1acc6004a91 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -276,6 +276,37 @@ static void ovl_dentry_release(struct dentry *dentry)
 	}
 }
 
+static struct dentry *ovl_d_real(struct dentry *dentry, struct inode *inode)
+{
+	struct dentry *real;
+
+	if (d_is_dir(dentry)) {
+		if (!inode || inode == d_inode(dentry))
+			return dentry;
+		goto bug;
+	}
+
+	real = ovl_dentry_upper(dentry);
+	if (real && (!inode || inode == d_inode(real)))
+		return real;
+
+	real = ovl_dentry_lower(dentry);
+	if (!real)
+		goto bug;
+
+	if (!inode || inode == d_inode(real))
+		return real;
+
+	/* Handle recursion */
+	if (real->d_flags & DCACHE_OP_REAL)
+		return real->d_op->d_real(real, inode);
+
+bug:
+	WARN(1, "ovl_d_real(%pd4, %s:%lu\n): real dentry not found\n", dentry,
+	     inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
+	return dentry;
+}
+
 static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
 {
 	struct ovl_entry *oe = dentry->d_fsdata;
@@ -320,11 +351,13 @@ static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
 static const struct dentry_operations ovl_dentry_operations = {
 	.d_release = ovl_dentry_release,
 	.d_select_inode = ovl_d_select_inode,
+	.d_real = ovl_d_real,
 };
 
 static const struct dentry_operations ovl_reval_dentry_operations = {
 	.d_release = ovl_dentry_release,
 	.d_select_inode = ovl_d_select_inode,
+	.d_real = ovl_d_real,
 	.d_revalidate = ovl_dentry_revalidate,
 	.d_weak_revalidate = ovl_dentry_weak_revalidate,
 };
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 8a2e009c8a5a..f513dd855cb2 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -161,6 +161,7 @@ struct dentry_operations {
 	struct vfsmount *(*d_automount)(struct path *);
 	int (*d_manage)(struct dentry *, bool);
 	struct inode *(*d_select_inode)(struct dentry *, unsigned);
+	struct dentry *(*d_real)(struct dentry *, struct inode *);
 } ____cacheline_aligned;
 
 /*
@@ -227,6 +228,7 @@ struct dentry_operations {
 #define DCACHE_MAY_FREE			0x00800000
 #define DCACHE_FALLTHRU			0x01000000 /* Fall through to lower layer */
 #define DCACHE_OP_SELECT_INODE		0x02000000 /* Unioned entry: dcache op selects inode */
+#define DCACHE_OP_REAL			0x08000000
 
 extern seqlock_t rename_lock;
 
@@ -582,4 +584,12 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper)
 	return upper;
 }
 
+static inline struct dentry *d_real(struct dentry *dentry)
+{
+	if (unlikely(dentry->d_flags & DCACHE_OP_REAL))
+		return dentry->d_op->d_real(dentry, NULL);
+	else
+		return dentry;
+}
+
 #endif	/* __LINUX_DCACHE_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 22c5a0cf16e3..ab3d8d9bb3ef 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1207,6 +1207,16 @@ static inline struct inode *file_inode(const struct file *f)
 	return f->f_inode;
 }
 
+static inline struct dentry *file_dentry(const struct file *file)
+{
+	struct dentry *dentry = file->f_path.dentry;
+
+	if (unlikely(dentry->d_flags & DCACHE_OP_REAL))
+		return dentry->d_op->d_real(dentry, file_inode(file));
+	else
+		return dentry;
+}
+
 static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl)
 {
 	return locks_lock_inode_wait(file_inode(filp), fl);

From fda9797a6aaad1a8044614fbbdb265dda4328c41 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Sat, 26 Mar 2016 16:14:39 -0400
Subject: [PATCH 421/797] nfs: use file_dentry()

commit be62a1a8fd116f5cd9e53726601f970e16e17558 upstream.

NFS may be used as lower layer of overlayfs and accessing f_path.dentry can
lead to a crash.

Fix by replacing direct access of file->f_path.dentry with the
file_dentry() accessor, which will always return a native object.

Fixes: 4bacc9c9234c ("overlayfs: Make f_path always point to the overlay and f_inode to the underlay")
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Tested-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: David Howells <dhowells@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfs/dir.c      | 6 +++---
 fs/nfs/inode.c    | 2 +-
 fs/nfs/nfs4file.c | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ce5a21861074..5fc2162afb67 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -377,7 +377,7 @@ int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
  again:
 	timestamp = jiffies;
 	gencount = nfs_inc_attr_generation_counter();
-	error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, entry->cookie, pages,
+	error = NFS_PROTO(inode)->readdir(file_dentry(file), cred, entry->cookie, pages,
 					  NFS_SERVER(inode)->dtsize, desc->plus);
 	if (error < 0) {
 		/* We requested READDIRPLUS, but the server doesn't grok it */
@@ -560,7 +560,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
 		count++;
 
 		if (desc->plus != 0)
-			nfs_prime_dcache(desc->file->f_path.dentry, entry);
+			nfs_prime_dcache(file_dentry(desc->file), entry);
 
 		status = nfs_readdir_add_to_array(entry, page);
 		if (status != 0)
@@ -864,7 +864,7 @@ static bool nfs_dir_mapping_need_revalidate(struct inode *dir)
  */
 static int nfs_readdir(struct file *file, struct dir_context *ctx)
 {
-	struct dentry	*dentry = file->f_path.dentry;
+	struct dentry	*dentry = file_dentry(file);
 	struct inode	*inode = d_inode(dentry);
 	nfs_readdir_descriptor_t my_desc,
 			*desc = &my_desc;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 3e2071a177fd..f714b98cfd74 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -927,7 +927,7 @@ int nfs_open(struct inode *inode, struct file *filp)
 {
 	struct nfs_open_context *ctx;
 
-	ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode);
+	ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 	nfs_file_set_open_context(filp, ctx);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index db9b5fea5b3e..679e003818b1 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -26,7 +26,7 @@ static int
 nfs4_file_open(struct inode *inode, struct file *filp)
 {
 	struct nfs_open_context *ctx;
-	struct dentry *dentry = filp->f_path.dentry;
+	struct dentry *dentry = file_dentry(filp);
 	struct dentry *parent = NULL;
 	struct inode *dir;
 	unsigned openflags = filp->f_flags;
@@ -57,7 +57,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
 	parent = dget_parent(dentry);
 	dir = d_inode(parent);
 
-	ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode);
+	ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode);
 	err = PTR_ERR(ctx);
 	if (IS_ERR(ctx))
 		goto out;

From df041ded89c13d75f1408b768980fed31eef8dea Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 30 Mar 2016 19:03:13 -0400
Subject: [PATCH 422/797] btrfs: fix crash/invalid memory access on fsync when
 using overlayfs

commit de17e793b104d690e1d007dfc5cb6b4f649598ca upstream.

If the lower or upper directory of an overlayfs mount belong to a btrfs
file system and we fsync the file through the overlayfs' merged directory
we ended up accessing an inode that didn't belong to btrfs as if it were
a btrfs inode at btrfs_sync_file() resulting in a crash like the following:

[ 7782.588845] BUG: unable to handle kernel NULL pointer dereference at 0000000000000544
[ 7782.590624] IP: [<ffffffffa030b7ab>] btrfs_sync_file+0x11b/0x3e9 [btrfs]
[ 7782.591931] PGD 4d954067 PUD 1e878067 PMD 0
[ 7782.592016] Oops: 0002 [#6] PREEMPT SMP DEBUG_PAGEALLOC
[ 7782.592016] Modules linked in: btrfs overlay ppdev crc32c_generic evdev xor raid6_pq psmouse pcspkr sg serio_raw acpi_cpufreq parport_pc parport tpm_tis i2c_piix4 tpm i2c_core processor button loop autofs4 ext4 crc16 mbcache jbd2 sr_mod cdrom sd_mod ata_generic virtio_scsi ata_piix virtio_pci libata virtio_ring virtio scsi_mod e1000 floppy [last unloaded: btrfs]
[ 7782.592016] CPU: 10 PID: 16437 Comm: xfs_io Tainted: G      D         4.5.0-rc6-btrfs-next-26+ #1
[ 7782.592016] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS by qemu-project.org 04/01/2014
[ 7782.592016] task: ffff88001b8d40c0 ti: ffff880137488000 task.ti: ffff880137488000
[ 7782.592016] RIP: 0010:[<ffffffffa030b7ab>]  [<ffffffffa030b7ab>] btrfs_sync_file+0x11b/0x3e9 [btrfs]
[ 7782.592016] RSP: 0018:ffff88013748be40  EFLAGS: 00010286
[ 7782.592016] RAX: 0000000080000000 RBX: ffff880133b30c88 RCX: 0000000000000001
[ 7782.592016] RDX: 0000000000000001 RSI: ffffffff8148fec0 RDI: 00000000ffffffff
[ 7782.592016] RBP: ffff88013748bec0 R08: 0000000000000001 R09: 0000000000000000
[ 7782.624248] R10: ffff88013748be40 R11: 0000000000000246 R12: 0000000000000000
[ 7782.624248] R13: 0000000000000000 R14: 00000000009305a0 R15: ffff880015e3be40
[ 7782.624248] FS:  00007fa83b9cb700(0000) GS:ffff88023ed40000(0000) knlGS:0000000000000000
[ 7782.624248] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 7782.624248] CR2: 0000000000000544 CR3: 00000001fa652000 CR4: 00000000000006e0
[ 7782.624248] Stack:
[ 7782.624248]  ffffffff8108b5cc ffff88013748bec0 0000000000000246 ffff8800b005ded0
[ 7782.624248]  ffff880133b30d60 8000000000000000 7fffffffffffffff 0000000000000246
[ 7782.624248]  0000000000000246 ffffffff81074f9b ffffffff8104357c ffff880015e3be40
[ 7782.624248] Call Trace:
[ 7782.624248]  [<ffffffff8108b5cc>] ? arch_local_irq_save+0x9/0xc
[ 7782.624248]  [<ffffffff81074f9b>] ? ___might_sleep+0xce/0x217
[ 7782.624248]  [<ffffffff8104357c>] ? __do_page_fault+0x3c0/0x43a
[ 7782.624248]  [<ffffffff811a2351>] vfs_fsync_range+0x8c/0x9e
[ 7782.624248]  [<ffffffff811a237f>] vfs_fsync+0x1c/0x1e
[ 7782.624248]  [<ffffffff811a24d6>] do_fsync+0x31/0x4a
[ 7782.624248]  [<ffffffff811a2700>] SyS_fsync+0x10/0x14
[ 7782.624248]  [<ffffffff81493617>] entry_SYSCALL_64_fastpath+0x12/0x6b
[ 7782.624248] Code: 85 c0 0f 85 e2 02 00 00 48 8b 45 b0 31 f6 4c 29 e8 48 ff c0 48 89 45 a8 48 8d 83 d8 00 00 00 48 89 c7 48 89 45 a0 e8 fc 43 18 e1 <f0> 41 ff 84 24 44 05 00 00 48 8b 83 58 ff ff ff 48 c1 e8 07 83
[ 7782.624248] RIP  [<ffffffffa030b7ab>] btrfs_sync_file+0x11b/0x3e9 [btrfs]
[ 7782.624248]  RSP <ffff88013748be40>
[ 7782.624248] CR2: 0000000000000544
[ 7782.661994] ---[ end trace 721e14960eb939bc ]---

This started happening since commit 4bacc9c9234 (overlayfs: Make f_path
always point to the overlay and f_inode to the underlay) and even though
after this change we could still access the btrfs inode through
struct file->f_mapping->host or struct file->f_inode, we would end up
resulting in more similar issues later on at check_parent_dirs_for_sync()
because the dentry we got (from struct file->f_path.dentry) was from
overlayfs and not from btrfs, that is, we had no way of getting the dentry
that belonged to btrfs (we always got the dentry that belonged to
overlayfs).

The new patch from Miklos Szeredi, titled "vfs: add file_dentry()" and
recently submitted to linux-fsdevel, adds a file_dentry() API that allows
us to get the btrfs dentry from the input file and therefore being able
to fsync when the upper and lower directories belong to btrfs filesystems.

This issue has been reported several times by users in the mailing list
and bugzilla. A test case for xfstests is being submitted as well.

Fixes: 4bacc9c9234c ("overlayfs: Make f_path always point to the overlay and f_inode to the underlay")
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=101951
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=109791
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 0f09526aa7d9..5e5db3687e34 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1885,7 +1885,7 @@ static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
  */
 int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 {
-	struct dentry *dentry = file->f_path.dentry;
+	struct dentry *dentry = file_dentry(file);
 	struct inode *inode = d_inode(dentry);
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_trans_handle *trans;

From 321299a96e20cbc6aac615a4daae95f42235b467 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Fri, 1 Apr 2016 01:31:28 -0400
Subject: [PATCH 423/797] ext4: add lockdep annotations for i_data_sem

commit daf647d2dd58cec59570d7698a45b98e580f2076 upstream.

With the internal Quota feature, mke2fs creates empty quota inodes and
quota usage tracking is enabled as soon as the file system is mounted.
Since quotacheck is no longer preallocating all of the blocks in the
quota inode that are likely needed to be written to, we are now seeing
a lockdep false positive caused by needing to allocate a quota block
from inside ext4_map_blocks(), while holding i_data_sem for a data
inode.  This results in this complaint:

  Possible unsafe locking scenario:

        CPU0                    CPU1
        ----                    ----
   lock(&ei->i_data_sem);
                                lock(&s->s_dquot.dqio_mutex);
                                lock(&ei->i_data_sem);
   lock(&s->s_dquot.dqio_mutex);

Google-Bug-Id: 27907753

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/ext4.h        | 23 +++++++++++++++++++++++
 fs/ext4/move_extent.c | 11 +++++++++--
 fs/ext4/super.c       | 25 +++++++++++++++++++++++--
 3 files changed, 55 insertions(+), 4 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index cc7ca4e87144..d4156e1c128d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -849,6 +849,29 @@ do {									       \
 
 #include "extents_status.h"
 
+/*
+ * Lock subclasses for i_data_sem in the ext4_inode_info structure.
+ *
+ * These are needed to avoid lockdep false positives when we need to
+ * allocate blocks to the quota inode during ext4_map_blocks(), while
+ * holding i_data_sem for a normal (non-quota) inode.  Since we don't
+ * do quota tracking for the quota inode, this avoids deadlock (as
+ * well as infinite recursion, since it isn't turtles all the way
+ * down...)
+ *
+ *  I_DATA_SEM_NORMAL - Used for most inodes
+ *  I_DATA_SEM_OTHER  - Used by move_inode.c for the second normal inode
+ *			  where the second inode has larger inode number
+ *			  than the first
+ *  I_DATA_SEM_QUOTA  - Used for quota inodes only
+ */
+enum {
+	I_DATA_SEM_NORMAL = 0,
+	I_DATA_SEM_OTHER,
+	I_DATA_SEM_QUOTA,
+};
+
+
 /*
  * fourth extended file system inode data in memory
  */
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index e032a0423e35..9bdbf98240a0 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -60,10 +60,10 @@ ext4_double_down_write_data_sem(struct inode *first, struct inode *second)
 {
 	if (first < second) {
 		down_write(&EXT4_I(first)->i_data_sem);
-		down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
+		down_write_nested(&EXT4_I(second)->i_data_sem, I_DATA_SEM_OTHER);
 	} else {
 		down_write(&EXT4_I(second)->i_data_sem);
-		down_write_nested(&EXT4_I(first)->i_data_sem, SINGLE_DEPTH_NESTING);
+		down_write_nested(&EXT4_I(first)->i_data_sem, I_DATA_SEM_OTHER);
 
 	}
 }
@@ -483,6 +483,13 @@ mext_check_arguments(struct inode *orig_inode,
 		return -EBUSY;
 	}
 
+	if (IS_NOQUOTA(orig_inode) || IS_NOQUOTA(donor_inode)) {
+		ext4_debug("ext4 move extent: The argument files should "
+			"not be quota files [ino:orig %lu, donor %lu]\n",
+			orig_inode->i_ino, donor_inode->i_ino);
+		return -EBUSY;
+	}
+
 	/* Ext4 move extent supports only extent based file */
 	if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) {
 		ext4_debug("ext4 move extent: orig file is not extents "
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c9ab67da6e5a..b54cb0025646 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4936,6 +4936,20 @@ static int ext4_quota_on_mount(struct super_block *sb, int type)
 					EXT4_SB(sb)->s_jquota_fmt, type);
 }
 
+static void lockdep_set_quota_inode(struct inode *inode, int subclass)
+{
+	struct ext4_inode_info *ei = EXT4_I(inode);
+
+	/* The first argument of lockdep_set_subclass has to be
+	 * *exactly* the same as the argument to init_rwsem() --- in
+	 * this case, in init_once() --- or lockdep gets unhappy
+	 * because the name of the lock is set using the
+	 * stringification of the argument to init_rwsem().
+	 */
+	(void) ei;	/* shut up clang warning if !CONFIG_LOCKDEP */
+	lockdep_set_subclass(&ei->i_data_sem, subclass);
+}
+
 /*
  * Standard function to be called on quota_on
  */
@@ -4975,8 +4989,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 		if (err)
 			return err;
 	}
-
-	return dquot_quota_on(sb, type, format_id, path);
+	lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA);
+	err = dquot_quota_on(sb, type, format_id, path);
+	if (err)
+		lockdep_set_quota_inode(path->dentry->d_inode,
+					     I_DATA_SEM_NORMAL);
+	return err;
 }
 
 static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
@@ -5002,8 +5020,11 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
 
 	/* Don't account quota for quota files to avoid recursion */
 	qf_inode->i_flags |= S_NOQUOTA;
+	lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA);
 	err = dquot_enable(qf_inode, type, format_id, flags);
 	iput(qf_inode);
+	if (err)
+		lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL);
 
 	return err;
 }

From ee8516a130918aa1421d426ec978985240a672ed Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sun, 3 Apr 2016 17:03:37 -0400
Subject: [PATCH 424/797] ext4: ignore quota mount options if the quota feature
 is enabled

commit c325a67c72903e1cc30e990a15ce745bda0dbfde upstream.

Previously, ext4 would fail the mount if the file system had the quota
feature enabled and quota mount options (used for the older quota
setups) were present.  This broke xfstests, since xfs silently ignores
the usrquote and grpquota mount options if they are specified.  This
commit changes things so that we are consistent with xfs; having the
mount options specified is harmless, so no sense break users by
forbidding them.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/super.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b54cb0025646..ba1cf0bf2f81 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1292,9 +1292,9 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
 		return -1;
 	}
 	if (ext4_has_feature_quota(sb)) {
-		ext4_msg(sb, KERN_ERR, "Cannot set journaled quota options "
-			 "when QUOTA feature is enabled");
-		return -1;
+		ext4_msg(sb, KERN_INFO, "Journaled quota options "
+			 "ignored when QUOTA feature is enabled");
+		return 1;
 	}
 	qname = match_strdup(args);
 	if (!qname) {
@@ -1657,10 +1657,10 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
 			return -1;
 		}
 		if (ext4_has_feature_quota(sb)) {
-			ext4_msg(sb, KERN_ERR,
-				 "Cannot set journaled quota options "
+			ext4_msg(sb, KERN_INFO,
+				 "Quota format mount options ignored "
 				 "when QUOTA feature is enabled");
-			return -1;
+			return 1;
 		}
 		sbi->s_jquota_fmt = m->mount_opt;
 #endif
@@ -1721,11 +1721,11 @@ static int parse_options(char *options, struct super_block *sb,
 #ifdef CONFIG_QUOTA
 	if (ext4_has_feature_quota(sb) &&
 	    (test_opt(sb, USRQUOTA) || test_opt(sb, GRPQUOTA))) {
-		ext4_msg(sb, KERN_ERR, "Cannot set quota options when QUOTA "
-			 "feature is enabled");
-		return 0;
-	}
-	if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
+		ext4_msg(sb, KERN_INFO, "Quota feature enabled, usrquota and grpquota "
+			 "mount options ignored.");
+		clear_opt(sb, USRQUOTA);
+		clear_opt(sb, GRPQUOTA);
+	} else if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
 		if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
 			clear_opt(sb, USRQUOTA);
 

From ab6c5069d1b14ac316b8c9f6aeda986dfe87fe47 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Mon, 4 Apr 2016 15:47:48 +0200
Subject: [PATCH 425/797] iommu: Don't overwrite domain pointer when there is
 no default_domain

commit eebb8034a5be8c2177cbf07ca2ecd2ff8a058958 upstream.

IOMMU drivers that do not support default domains, but make
use of the the group->domain pointer can get that pointer
overwritten with NULL on device add/remove.

Make sure this can't happen by only overwriting the domain
pointer when it is NULL.

Fixes: 1228236de5f9 ('iommu: Move default domain allocation to iommu_group_get_for_dev()')
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iommu/iommu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 0e3b0092ec92..515bb8b80952 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -848,7 +848,8 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev)
 	if (!group->default_domain) {
 		group->default_domain = __iommu_domain_alloc(dev->bus,
 							     IOMMU_DOMAIN_DMA);
-		group->domain = group->default_domain;
+		if (!group->domain)
+			group->domain = group->default_domain;
 	}
 
 	ret = iommu_group_add_device(group, dev);

From 1653a3b0e9436c10eb307c318776cf91fe18ff08 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 30 Mar 2016 23:37:21 +0100
Subject: [PATCH 426/797] Btrfs: fix file/data loss caused by fsync after
 rename and new inode

commit 56f23fdbb600e6087db7b009775b95ce07cc3195 upstream.

If we rename an inode A (be it a file or a directory), create a new
inode B with the old name of inode A and under the same parent directory,
fsync inode B and then power fail, at log tree replay time we end up
removing inode A completely. If inode A is a directory then all its files
are gone too.

Example scenarios where this happens:
This is reproducible with the following steps, taken from a couple of
test cases written for fstests which are going to be submitted upstream
soon:

   # Scenario 1

   mkfs.btrfs -f /dev/sdc
   mount /dev/sdc /mnt
   mkdir -p /mnt/a/x
   echo "hello" > /mnt/a/x/foo
   echo "world" > /mnt/a/x/bar
   sync
   mv /mnt/a/x /mnt/a/y
   mkdir /mnt/a/x
   xfs_io -c fsync /mnt/a/x
   <power failure happens>

   The next time the fs is mounted, log tree replay happens and
   the directory "y" does not exist nor do the files "foo" and
   "bar" exist anywhere (neither in "y" nor in "x", nor the root
   nor anywhere).

   # Scenario 2

   mkfs.btrfs -f /dev/sdc
   mount /dev/sdc /mnt
   mkdir /mnt/a
   echo "hello" > /mnt/a/foo
   sync
   mv /mnt/a/foo /mnt/a/bar
   echo "world" > /mnt/a/foo
   xfs_io -c fsync /mnt/a/foo
   <power failure happens>

   The next time the fs is mounted, log tree replay happens and the
   file "bar" does not exists anymore. A file with the name "foo"
   exists and it matches the second file we created.

Another related problem that does not involve file/data loss is when a
new inode is created with the name of a deleted snapshot and we fsync it:

   mkfs.btrfs -f /dev/sdc
   mount /dev/sdc /mnt
   mkdir /mnt/testdir
   btrfs subvolume snapshot /mnt /mnt/testdir/snap
   btrfs subvolume delete /mnt/testdir/snap
   rmdir /mnt/testdir
   mkdir /mnt/testdir
   xfs_io -c fsync /mnt/testdir # or fsync some file inside /mnt/testdir
   <power failure>

   The next time the fs is mounted the log replay procedure fails because
   it attempts to delete the snapshot entry (which has dir item key type
   of BTRFS_ROOT_ITEM_KEY) as if it were a regular (non-root) entry,
   resulting in the following error that causes mount to fail:

   [52174.510532] BTRFS info (device dm-0): failed to delete reference to snap, inode 257 parent 257
   [52174.512570] ------------[ cut here ]------------
   [52174.513278] WARNING: CPU: 12 PID: 28024 at fs/btrfs/inode.c:3986 __btrfs_unlink_inode+0x178/0x351 [btrfs]()
   [52174.514681] BTRFS: Transaction aborted (error -2)
   [52174.515630] Modules linked in: btrfs dm_flakey dm_mod overlay crc32c_generic ppdev xor raid6_pq acpi_cpufreq parport_pc tpm_tis sg parport tpm evdev i2c_piix4 proc
   [52174.521568] CPU: 12 PID: 28024 Comm: mount Tainted: G        W       4.5.0-rc6-btrfs-next-27+ #1
   [52174.522805] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS by qemu-project.org 04/01/2014
   [52174.524053]  0000000000000000 ffff8801df2a7710 ffffffff81264e93 ffff8801df2a7758
   [52174.524053]  0000000000000009 ffff8801df2a7748 ffffffff81051618 ffffffffa03591cd
   [52174.524053]  00000000fffffffe ffff88015e6e5000 ffff88016dbc3c88 ffff88016dbc3c88
   [52174.524053] Call Trace:
   [52174.524053]  [<ffffffff81264e93>] dump_stack+0x67/0x90
   [52174.524053]  [<ffffffff81051618>] warn_slowpath_common+0x99/0xb2
   [52174.524053]  [<ffffffffa03591cd>] ? __btrfs_unlink_inode+0x178/0x351 [btrfs]
   [52174.524053]  [<ffffffff81051679>] warn_slowpath_fmt+0x48/0x50
   [52174.524053]  [<ffffffffa03591cd>] __btrfs_unlink_inode+0x178/0x351 [btrfs]
   [52174.524053]  [<ffffffff8118f5e9>] ? iput+0xb0/0x284
   [52174.524053]  [<ffffffffa0359fe8>] btrfs_unlink_inode+0x1c/0x3d [btrfs]
   [52174.524053]  [<ffffffffa038631e>] check_item_in_log+0x1fe/0x29b [btrfs]
   [52174.524053]  [<ffffffffa0386522>] replay_dir_deletes+0x167/0x1cf [btrfs]
   [52174.524053]  [<ffffffffa038739e>] fixup_inode_link_count+0x289/0x2aa [btrfs]
   [52174.524053]  [<ffffffffa038748a>] fixup_inode_link_counts+0xcb/0x105 [btrfs]
   [52174.524053]  [<ffffffffa038a5ec>] btrfs_recover_log_trees+0x258/0x32c [btrfs]
   [52174.524053]  [<ffffffffa03885b2>] ? replay_one_extent+0x511/0x511 [btrfs]
   [52174.524053]  [<ffffffffa034f288>] open_ctree+0x1dd4/0x21b9 [btrfs]
   [52174.524053]  [<ffffffffa032b753>] btrfs_mount+0x97e/0xaed [btrfs]
   [52174.524053]  [<ffffffff8108e1b7>] ? trace_hardirqs_on+0xd/0xf
   [52174.524053]  [<ffffffff8117bafa>] mount_fs+0x67/0x131
   [52174.524053]  [<ffffffff81193003>] vfs_kern_mount+0x6c/0xde
   [52174.524053]  [<ffffffffa032af81>] btrfs_mount+0x1ac/0xaed [btrfs]
   [52174.524053]  [<ffffffff8108e1b7>] ? trace_hardirqs_on+0xd/0xf
   [52174.524053]  [<ffffffff8108c262>] ? lockdep_init_map+0xb9/0x1b3
   [52174.524053]  [<ffffffff8117bafa>] mount_fs+0x67/0x131
   [52174.524053]  [<ffffffff81193003>] vfs_kern_mount+0x6c/0xde
   [52174.524053]  [<ffffffff8119590f>] do_mount+0x8a6/0x9e8
   [52174.524053]  [<ffffffff811358dd>] ? strndup_user+0x3f/0x59
   [52174.524053]  [<ffffffff81195c65>] SyS_mount+0x77/0x9f
   [52174.524053]  [<ffffffff814935d7>] entry_SYSCALL_64_fastpath+0x12/0x6b
   [52174.561288] ---[ end trace 6b53049efb1a3ea6 ]---

Fix this by forcing a transaction commit when such cases happen.
This means we check in the commit root of the subvolume tree if there
was any other inode with the same reference when the inode we are
fsync'ing is a new inode (created in the current transaction).

Test cases for fstests, covering all the scenarios given above, were
submitted upstream for fstests:

  * fstests: generic test for fsync after renaming directory
    https://patchwork.kernel.org/patch/8694281/

  * fstests: generic test for fsync after renaming file
    https://patchwork.kernel.org/patch/8694301/

  * fstests: add btrfs test for fsync after snapshot deletion
    https://patchwork.kernel.org/patch/8670671/

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/tree-log.c | 137 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 137 insertions(+)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 323e12cc9d2f..0e044d7ee721 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4406,6 +4406,127 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
 	return ret;
 }
 
+/*
+ * When we are logging a new inode X, check if it doesn't have a reference that
+ * matches the reference from some other inode Y created in a past transaction
+ * and that was renamed in the current transaction. If we don't do this, then at
+ * log replay time we can lose inode Y (and all its files if it's a directory):
+ *
+ * mkdir /mnt/x
+ * echo "hello world" > /mnt/x/foobar
+ * sync
+ * mv /mnt/x /mnt/y
+ * mkdir /mnt/x                 # or touch /mnt/x
+ * xfs_io -c fsync /mnt/x
+ * <power fail>
+ * mount fs, trigger log replay
+ *
+ * After the log replay procedure, we would lose the first directory and all its
+ * files (file foobar).
+ * For the case where inode Y is not a directory we simply end up losing it:
+ *
+ * echo "123" > /mnt/foo
+ * sync
+ * mv /mnt/foo /mnt/bar
+ * echo "abc" > /mnt/foo
+ * xfs_io -c fsync /mnt/foo
+ * <power fail>
+ *
+ * We also need this for cases where a snapshot entry is replaced by some other
+ * entry (file or directory) otherwise we end up with an unreplayable log due to
+ * attempts to delete the snapshot entry (entry of type BTRFS_ROOT_ITEM_KEY) as
+ * if it were a regular entry:
+ *
+ * mkdir /mnt/x
+ * btrfs subvolume snapshot /mnt /mnt/x/snap
+ * btrfs subvolume delete /mnt/x/snap
+ * rmdir /mnt/x
+ * mkdir /mnt/x
+ * fsync /mnt/x or fsync some new file inside it
+ * <power fail>
+ *
+ * The snapshot delete, rmdir of x, mkdir of a new x and the fsync all happen in
+ * the same transaction.
+ */
+static int btrfs_check_ref_name_override(struct extent_buffer *eb,
+					 const int slot,
+					 const struct btrfs_key *key,
+					 struct inode *inode)
+{
+	int ret;
+	struct btrfs_path *search_path;
+	char *name = NULL;
+	u32 name_len = 0;
+	u32 item_size = btrfs_item_size_nr(eb, slot);
+	u32 cur_offset = 0;
+	unsigned long ptr = btrfs_item_ptr_offset(eb, slot);
+
+	search_path = btrfs_alloc_path();
+	if (!search_path)
+		return -ENOMEM;
+	search_path->search_commit_root = 1;
+	search_path->skip_locking = 1;
+
+	while (cur_offset < item_size) {
+		u64 parent;
+		u32 this_name_len;
+		u32 this_len;
+		unsigned long name_ptr;
+		struct btrfs_dir_item *di;
+
+		if (key->type == BTRFS_INODE_REF_KEY) {
+			struct btrfs_inode_ref *iref;
+
+			iref = (struct btrfs_inode_ref *)(ptr + cur_offset);
+			parent = key->offset;
+			this_name_len = btrfs_inode_ref_name_len(eb, iref);
+			name_ptr = (unsigned long)(iref + 1);
+			this_len = sizeof(*iref) + this_name_len;
+		} else {
+			struct btrfs_inode_extref *extref;
+
+			extref = (struct btrfs_inode_extref *)(ptr +
+							       cur_offset);
+			parent = btrfs_inode_extref_parent(eb, extref);
+			this_name_len = btrfs_inode_extref_name_len(eb, extref);
+			name_ptr = (unsigned long)&extref->name;
+			this_len = sizeof(*extref) + this_name_len;
+		}
+
+		if (this_name_len > name_len) {
+			char *new_name;
+
+			new_name = krealloc(name, this_name_len, GFP_NOFS);
+			if (!new_name) {
+				ret = -ENOMEM;
+				goto out;
+			}
+			name_len = this_name_len;
+			name = new_name;
+		}
+
+		read_extent_buffer(eb, name, name_ptr, this_name_len);
+		di = btrfs_lookup_dir_item(NULL, BTRFS_I(inode)->root,
+					   search_path, parent,
+					   name, this_name_len, 0);
+		if (di && !IS_ERR(di)) {
+			ret = 1;
+			goto out;
+		} else if (IS_ERR(di)) {
+			ret = PTR_ERR(di);
+			goto out;
+		}
+		btrfs_release_path(search_path);
+
+		cur_offset += this_len;
+	}
+	ret = 0;
+out:
+	btrfs_free_path(search_path);
+	kfree(name);
+	return ret;
+}
+
 /* log a single inode in the tree log.
  * At least one parent directory for this inode must exist in the tree
  * or be logged already.
@@ -4578,6 +4699,22 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 		if (min_key.type == BTRFS_INODE_ITEM_KEY)
 			need_log_inode_item = false;
 
+		if ((min_key.type == BTRFS_INODE_REF_KEY ||
+		     min_key.type == BTRFS_INODE_EXTREF_KEY) &&
+		    BTRFS_I(inode)->generation == trans->transid) {
+			ret = btrfs_check_ref_name_override(path->nodes[0],
+							    path->slots[0],
+							    &min_key, inode);
+			if (ret < 0) {
+				err = ret;
+				goto out_unlock;
+			} else if (ret > 0) {
+				err = 1;
+				btrfs_set_log_full_commit(root->fs_info, trans);
+				goto out_unlock;
+			}
+		}
+
 		/* Skip xattrs, we log them later with btrfs_log_all_xattrs() */
 		if (min_key.type == BTRFS_XATTR_ITEM_KEY) {
 			if (ins_nr == 0)

From f6dffe77180ba8ac38e94247cf2a323614f2e876 Mon Sep 17 00:00:00 2001
From: Yang Shi <yang.shi@linaro.org>
Date: Mon, 8 Feb 2016 14:49:24 -0800
Subject: [PATCH 427/797] arm64: replace read_lock to rcu lock in
 call_step_hook

commit cf0a25436f05753aca5151891aea4fd130556e2a upstream.

BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:917
in_atomic(): 1, irqs_disabled(): 128, pid: 383, name: sh
Preemption disabled at:[<ffff800000124c18>] kgdb_cpu_enter+0x158/0x6b8

CPU: 3 PID: 383 Comm: sh Tainted: G        W       4.1.13-rt13 #2
Hardware name: Freescale Layerscape 2085a RDB Board (DT)
Call trace:
[<ffff8000000885e8>] dump_backtrace+0x0/0x128
[<ffff800000088734>] show_stack+0x24/0x30
[<ffff80000079a7c4>] dump_stack+0x80/0xa0
[<ffff8000000bd324>] ___might_sleep+0x18c/0x1a0
[<ffff8000007a20ac>] __rt_spin_lock+0x2c/0x40
[<ffff8000007a2268>] rt_read_lock+0x40/0x58
[<ffff800000085328>] single_step_handler+0x38/0xd8
[<ffff800000082368>] do_debug_exception+0x58/0xb8
Exception stack(0xffff80834a1e7c80 to 0xffff80834a1e7da0)
7c80: ffffff9c ffffffff 92c23ba0 0000ffff 4a1e7e40 ffff8083 001bfcc4 ffff8000
7ca0: f2000400 00000000 00000000 00000000 4a1e7d80 ffff8083 0049501c ffff8000
7cc0: 00005402 00000000 00aaa210 ffff8000 4a1e7ea0 ffff8083 000833f4 ffff8000
7ce0: ffffff9c ffffffff 92c23ba0 0000ffff 4a1e7ea0 ffff8083 001bfcc0 ffff8000
7d00: 4a0fc400 ffff8083 00005402 00000000 4a1e7d40 ffff8083 00490324 ffff8000
7d20: ffffff9c 00000000 92c23ba0 0000ffff 000a0000 00000000 00000000 00000000
7d40: 00000008 00000000 00080000 00000000 92c23b8b 0000ffff 92c23b8e 0000ffff
7d60: 00000038 00000000 00001cb2 00000000 00000005 00000000 92d7b498 0000ffff
7d80: 01010101 01010101 92be9000 0000ffff 00000000 00000000 00000030 00000000
[<ffff8000000833f4>] el1_dbg+0x18/0x6c

This issue is similar with 62c6c61("arm64: replace read_lock to rcu lock in
call_break_hook"), but comes to single_step_handler.

This also solves kgdbts boot test silent hang issue on 4.4 -rt kernel.

Signed-off-by: Yang Shi <yang.shi@linaro.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/kernel/debug-monitors.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 8aee3aeec3e6..c1492ba1f6d1 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -186,20 +186,21 @@ static void clear_regs_spsr_ss(struct pt_regs *regs)
 
 /* EL1 Single Step Handler hooks */
 static LIST_HEAD(step_hook);
-static DEFINE_RWLOCK(step_hook_lock);
+static DEFINE_SPINLOCK(step_hook_lock);
 
 void register_step_hook(struct step_hook *hook)
 {
-	write_lock(&step_hook_lock);
-	list_add(&hook->node, &step_hook);
-	write_unlock(&step_hook_lock);
+	spin_lock(&step_hook_lock);
+	list_add_rcu(&hook->node, &step_hook);
+	spin_unlock(&step_hook_lock);
 }
 
 void unregister_step_hook(struct step_hook *hook)
 {
-	write_lock(&step_hook_lock);
-	list_del(&hook->node);
-	write_unlock(&step_hook_lock);
+	spin_lock(&step_hook_lock);
+	list_del_rcu(&hook->node);
+	spin_unlock(&step_hook_lock);
+	synchronize_rcu();
 }
 
 /*
@@ -213,15 +214,15 @@ static int call_step_hook(struct pt_regs *regs, unsigned int esr)
 	struct step_hook *hook;
 	int retval = DBG_HOOK_ERROR;
 
-	read_lock(&step_hook_lock);
+	rcu_read_lock();
 
-	list_for_each_entry(hook, &step_hook, node)	{
+	list_for_each_entry_rcu(hook, &step_hook, node)	{
 		retval = hook->fn(regs, esr);
 		if (retval == DBG_HOOK_HANDLED)
 			break;
 	}
 
-	read_unlock(&step_hook_lock);
+	rcu_read_unlock();
 
 	return retval;
 }

From 3c1a5d344e9721bed684382aab375ca5a143ef92 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 24 Feb 2016 18:45:41 +0100
Subject: [PATCH 428/797] perf: Do not double free

commit 130056275ade730e7a79c110212c8815202773ee upstream.

In case of: err_file: fput(event_file), we'll end up calling
perf_release() which in turn will free the event.

Do not then free the event _again_.

Tested-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dvyukov@google.com
Cc: eranian@google.com
Cc: oleg@redhat.com
Cc: panand@redhat.com
Cc: sasha.levin@oracle.com
Cc: vince@deater.net
Link: http://lkml.kernel.org/r/20160224174947.697350349@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/events/core.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index faf2067fc8e2..060c66ea61b6 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8583,7 +8583,12 @@ SYSCALL_DEFINE5(perf_event_open,
 	perf_unpin_context(ctx);
 	put_ctx(ctx);
 err_alloc:
-	free_event(event);
+	/*
+	 * If event_file is set, the fput() above will have called ->release()
+	 * and that will take care of freeing the event.
+	 */
+	if (!event_file)
+		free_event(event);
 err_cpus:
 	put_online_cpus();
 err_task:

From 695ca6389e0949e44ce1bdbcd422b37e5eb38f4c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 24 Feb 2016 18:45:46 +0100
Subject: [PATCH 429/797] perf: Cure event->pending_disable race

commit 28a967c3a2f99fa3b5f762f25cb2a319d933571b upstream.

Because event_sched_out() checks event->pending_disable _before_
actually disabling the event, it can happen that the event fires after
it checks but before it gets disabled.

This would leave event->pending_disable set and the queued irq_work
will try and process it.

However, if the event trigger was during schedule(), the event might
have been de-scheduled by the time the irq_work runs, and
perf_event_disable_local() will fail.

Fix this by checking event->pending_disable _after_ we call
event->pmu->del(). This depends on the latter being a compiler
barrier, such that the compiler does not lift the load and re-creates
the problem.

Tested-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dvyukov@google.com
Cc: eranian@google.com
Cc: oleg@redhat.com
Cc: panand@redhat.com
Cc: sasha.levin@oracle.com
Cc: vince@deater.net
Link: http://lkml.kernel.org/r/20160224174948.040469884@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/events/core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 060c66ea61b6..1e889a078dbc 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1580,14 +1580,14 @@ event_sched_out(struct perf_event *event,
 
 	perf_pmu_disable(event->pmu);
 
+	event->tstamp_stopped = tstamp;
+	event->pmu->del(event, 0);
+	event->oncpu = -1;
 	event->state = PERF_EVENT_STATE_INACTIVE;
 	if (event->pending_disable) {
 		event->pending_disable = 0;
 		event->state = PERF_EVENT_STATE_OFF;
 	}
-	event->tstamp_stopped = tstamp;
-	event->pmu->del(event, 0);
-	event->oncpu = -1;
 
 	if (!is_software_event(event))
 		cpuctx->active_oncpu--;

From e16b94ab4ccd5b31fa160978c601206a169de2bc Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 4 Apr 2016 12:40:37 +0300
Subject: [PATCH 430/797] mmc: sdhci-pci: Add support and PCI IDs for more
 Broxton host controllers

commit 01d6b2a40a0fa73c90e05b1033f181a51fec9292 upstream.

Add support and PCI IDs for more Broxton host controllers

Other BXT IDs were added in v4.4 so cc'ing stable. This patch
is dependent on commit 163cbe31e516 ("mmc: sdhci-pci: Fix card
detect race for Intel BXT/APL") but that is already in stable
since v4.4.4.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mmc/host/sdhci-pci-core.c | 25 +++++++++++++++++++++++++
 drivers/mmc/host/sdhci-pci.h      |  3 +++
 2 files changed, 28 insertions(+)

diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
index 45ee07d3a761..610154836d79 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -390,6 +390,7 @@ static int byt_sd_probe_slot(struct sdhci_pci_slot *slot)
 	slot->cd_idx = 0;
 	slot->cd_override_level = true;
 	if (slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_BXT_SD ||
+	    slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_BXTM_SD ||
 	    slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_APL_SD)
 		slot->host->mmc_host_ops.get_cd = bxt_get_cd;
 
@@ -1171,6 +1172,30 @@ static const struct pci_device_id pci_ids[] = {
 		.driver_data	= (kernel_ulong_t)&sdhci_intel_byt_sd,
 	},
 
+	{
+		.vendor		= PCI_VENDOR_ID_INTEL,
+		.device		= PCI_DEVICE_ID_INTEL_BXTM_EMMC,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (kernel_ulong_t)&sdhci_intel_byt_emmc,
+	},
+
+	{
+		.vendor		= PCI_VENDOR_ID_INTEL,
+		.device		= PCI_DEVICE_ID_INTEL_BXTM_SDIO,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (kernel_ulong_t)&sdhci_intel_byt_sdio,
+	},
+
+	{
+		.vendor		= PCI_VENDOR_ID_INTEL,
+		.device		= PCI_DEVICE_ID_INTEL_BXTM_SD,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (kernel_ulong_t)&sdhci_intel_byt_sd,
+	},
+
 	{
 		.vendor		= PCI_VENDOR_ID_INTEL,
 		.device		= PCI_DEVICE_ID_INTEL_APL_EMMC,
diff --git a/drivers/mmc/host/sdhci-pci.h b/drivers/mmc/host/sdhci-pci.h
index d1a0b4db60db..89e7151684a1 100644
--- a/drivers/mmc/host/sdhci-pci.h
+++ b/drivers/mmc/host/sdhci-pci.h
@@ -28,6 +28,9 @@
 #define PCI_DEVICE_ID_INTEL_BXT_SD	0x0aca
 #define PCI_DEVICE_ID_INTEL_BXT_EMMC	0x0acc
 #define PCI_DEVICE_ID_INTEL_BXT_SDIO	0x0ad0
+#define PCI_DEVICE_ID_INTEL_BXTM_SD	0x1aca
+#define PCI_DEVICE_ID_INTEL_BXTM_EMMC	0x1acc
+#define PCI_DEVICE_ID_INTEL_BXTM_SDIO	0x1ad0
 #define PCI_DEVICE_ID_INTEL_APL_SD	0x5aca
 #define PCI_DEVICE_ID_INTEL_APL_EMMC	0x5acc
 #define PCI_DEVICE_ID_INTEL_APL_SDIO	0x5ad0

From 39bd2591a80eb0ccd291a5a97456e667083bbcd0 Mon Sep 17 00:00:00 2001
From: David Henningsson <david.henningsson@canonical.com>
Date: Thu, 25 Feb 2016 09:37:05 +0100
Subject: [PATCH 431/797] ALSA: hda - Fixup speaker pass-through control for
 nid 0x14 on ALC225

commit 2ae955774f29bbd7d16149cb0ae8d0319bf2ecc4 upstream.

On one of the machines we enable, we found that the actual speaker volume
did not always correspond to the volume set in alsamixer. This patch
fixes that problem.

This patch was orginally written by Kailang @ Realtek, I've rebased it
to fit sound git master.

BugLink: https://bugs.launchpad.net/bugs/1549660
Co-Authored-By: Kailang <kailang@realtek.com>
Signed-off-by: David Henningsson <david.henningsson@canonical.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_realtek.c | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 3671eb89dd28..c709efc68a0c 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -3801,6 +3801,10 @@ static void alc_headset_mode_mic_in(struct hda_codec *codec, hda_nid_t hp_pin,
 
 static void alc_headset_mode_default(struct hda_codec *codec)
 {
+	static struct coef_fw coef0225[] = {
+		UPDATE_COEF(0x45, 0x3f<<10, 0x34<<10),
+		{}
+	};
 	static struct coef_fw coef0255[] = {
 		WRITE_COEF(0x45, 0xc089),
 		WRITE_COEF(0x45, 0xc489),
@@ -3842,6 +3846,9 @@ static void alc_headset_mode_default(struct hda_codec *codec)
 	};
 
 	switch (codec->core.vendor_id) {
+	case 0x10ec0225:
+		alc_process_coef_fw(codec, coef0225);
+		break;
 	case 0x10ec0255:
 	case 0x10ec0256:
 		alc_process_coef_fw(codec, coef0255);
@@ -4750,6 +4757,7 @@ enum {
 	ALC293_FIXUP_LENOVO_SPK_NOISE,
 	ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY,
 	ALC255_FIXUP_DELL_SPK_NOISE,
+	ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -5375,6 +5383,17 @@ static const struct hda_fixup alc269_fixups[] = {
 		.chained = true,
 		.chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE
 	},
+	[ALC225_FIXUP_DELL1_MIC_NO_PRESENCE] = {
+		.type = HDA_FIXUP_VERBS,
+		.v.verbs = (const struct hda_verb[]) {
+			/* Disable pass-through path for FRONT 14h */
+			{ 0x20, AC_VERB_SET_COEF_INDEX, 0x36 },
+			{ 0x20, AC_VERB_SET_PROC_COEF, 0x57d7 },
+			{}
+		},
+		.chained = true,
+		.chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE
+	},
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -5648,10 +5667,10 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
 	{0x21, 0x03211020}
 
 static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
-	SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
+	SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
 		ALC225_STANDARD_PINS,
 		{0x14, 0x901701a0}),
-	SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
+	SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
 		ALC225_STANDARD_PINS,
 		{0x14, 0x901701b0}),
 	SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL2_MIC_NO_PRESENCE,

From 478c9f35a987a879189afedc1de1c6603487791a Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 25 Feb 2016 14:31:59 +0100
Subject: [PATCH 432/797] ALSA: hda - Fix headset support and noise on HP
 EliteBook 755 G2

commit f883982dc1b117f04579f0896821cd9f2e397f94 upstream.

HP EliteBook 755 G2 with ALC3228 (ALC280) codec [103c:221c] requires
the known fixup (ALC269_FIXUP_HEADSET_MIC) for making the headset mic
working.  Also, it suffers from the loopback noise problem, so we
should disable aamix path as well.

Reported-by: Derick Eddington <derick.eddington@gmail.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_realtek.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index c709efc68a0c..73978c79981f 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -4758,6 +4758,7 @@ enum {
 	ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY,
 	ALC255_FIXUP_DELL_SPK_NOISE,
 	ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
+	ALC280_FIXUP_HP_HEADSET_MIC,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -5394,6 +5395,12 @@ static const struct hda_fixup alc269_fixups[] = {
 		.chained = true,
 		.chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE
 	},
+	[ALC280_FIXUP_HP_HEADSET_MIC] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc_fixup_disable_aamix,
+		.chained = true,
+		.chain_id = ALC269_FIXUP_HEADSET_MIC,
+	},
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -5498,6 +5505,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x103c, 0x2335, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x2336, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x2337, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+	SND_PCI_QUIRK(0x103c, 0x221c, "HP EliteBook 755 G2", ALC280_FIXUP_HP_HEADSET_MIC),
 	SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
 	SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
 	SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),

From c362f778f725a8d606e91403e830bc9d7fa10f57 Mon Sep 17 00:00:00 2001
From: Hui Wang <hui.wang@canonical.com>
Date: Fri, 1 Apr 2016 11:00:15 +0800
Subject: [PATCH 433/797] ALSA: hda - fix front mic problem for a HP desktop

commit e549d190f7b5f94e9ab36bd965028112914d010d upstream.

The front mic jack (pink color) can't detect any plug or unplug. After
applying this fix, both detecting function and recording function
work well.

BugLink: https://bugs.launchpad.net/bugs/1564712
Signed-off-by: Hui Wang <hui.wang@canonical.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_realtek.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 73978c79981f..fefe83f2beab 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -4759,6 +4759,7 @@ enum {
 	ALC255_FIXUP_DELL_SPK_NOISE,
 	ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
 	ALC280_FIXUP_HP_HEADSET_MIC,
+	ALC221_FIXUP_HP_FRONT_MIC,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -5401,6 +5402,13 @@ static const struct hda_fixup alc269_fixups[] = {
 		.chained = true,
 		.chain_id = ALC269_FIXUP_HEADSET_MIC,
 	},
+	[ALC221_FIXUP_HP_FRONT_MIC] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x19, 0x02a19020 }, /* Front Mic */
+			{ }
+		},
+	},
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -5506,6 +5514,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x103c, 0x2336, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x2337, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x221c, "HP EliteBook 755 G2", ALC280_FIXUP_HP_HEADSET_MIC),
+	SND_PCI_QUIRK(0x103c, 0x8256, "HP", ALC221_FIXUP_HP_FRONT_MIC),
 	SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
 	SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
 	SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),

From f0cf5ccef6fc0390aa42987e822dd63732d4d2da Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Mon, 11 Apr 2016 16:55:26 +0200
Subject: [PATCH 434/797] ALSA: hda/realtek - Enable the ALC292 dock fixup on
 the Thinkpad T460s

commit c636b95ec5980345674ad7960a3c67135a84b687 upstream.

The Lenovo Thinkpad T460s requires the alc_fixup_tpt440_dock as well in
order to get working sound output on the docking stations headphone jack.

Patch tested on a Thinkpad T460s (20F9CT01WW) using a ThinkPad Ultradock
on kernel 4.4.6.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Tested-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_realtek.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index fefe83f2beab..1402ba954b3d 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -4760,6 +4760,7 @@ enum {
 	ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
 	ALC280_FIXUP_HP_HEADSET_MIC,
 	ALC221_FIXUP_HP_FRONT_MIC,
+	ALC292_FIXUP_TPT460,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -5409,6 +5410,12 @@ static const struct hda_fixup alc269_fixups[] = {
 			{ }
 		},
 	},
+	[ALC292_FIXUP_TPT460] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc_fixup_tpt440_dock,
+		.chained = true,
+		.chain_id = ALC293_FIXUP_LENOVO_SPK_NOISE,
+	},
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -5563,7 +5570,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x17aa, 0x2218, "Thinkpad X1 Carbon 2nd", ALC292_FIXUP_TPT440_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x2223, "ThinkPad T550", ALC292_FIXUP_TPT440_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x2226, "ThinkPad X250", ALC292_FIXUP_TPT440_DOCK),
-	SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC293_FIXUP_LENOVO_SPK_NOISE),
+	SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC292_FIXUP_TPT460),
 	SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
 	SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
 	SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
@@ -5658,6 +5665,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
 	{.id = ALC283_FIXUP_SENSE_COMBO_JACK, .name = "alc283-sense-combo"},
 	{.id = ALC292_FIXUP_TPT440_DOCK, .name = "tpt440-dock"},
 	{.id = ALC292_FIXUP_TPT440, .name = "tpt440"},
+	{.id = ALC292_FIXUP_TPT460, .name = "tpt460"},
 	{}
 };
 #define ALC225_STANDARD_PINS \

From 77ffc8a9e9b36c2311ee1443a9dc5f1a5cf534d3 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 4 Apr 2016 11:47:50 +0200
Subject: [PATCH 435/797] ALSA: usb-audio: Add a sample rate quirk for Phoenix
 Audio TMX320

commit f03b24a851d32ca85dacab01785b24a7ee717d37 upstream.

Phoenix Audio TMX320 gives the similar error when the sample rate is
asked:
  usb 2-1.3: 2:1: cannot get freq at ep 0x85
  usb 2-1.3: 1:1: cannot get freq at ep 0x2
  ....

Add the corresponding USB-device ID (1de7:0014) to
snd_usb_get_sample_rate_quirk() list.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=110221
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/usb/quirks.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index cd7eac28edee..4f2dedfa7645 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1138,6 +1138,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
 	case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */
 	case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */
 	case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */
+	case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */
 	case USB_ID(0x21B4, 0x0081): /* AudioQuest DragonFly */
 		return true;
 	}

From 157fb2daebc0c42901432b8028488c5ebb4509b1 Mon Sep 17 00:00:00 2001
From: Dennis Kadioglu <denk@post.com>
Date: Wed, 6 Apr 2016 08:39:01 +0200
Subject: [PATCH 436/797] ALSA: usb-audio: Add a quirk for Plantronics BT300

commit b4203ff5464da00b7812e7b480192745b0d66bbf upstream.

Plantronics BT300 does not support reading the sample rate which leads
to many lines of "cannot get freq at ep 0x1". This patch adds the USB
ID of the BT300 to quirks.c and avoids those error messages.

Signed-off-by: Dennis Kadioglu <denk@post.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/usb/quirks.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 4f2dedfa7645..001fb4dc0722 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1135,6 +1135,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
 	case USB_ID(0x045E, 0x076F): /* MS Lifecam HD-6000 */
 	case USB_ID(0x045E, 0x0772): /* MS Lifecam Studio */
 	case USB_ID(0x045E, 0x0779): /* MS Lifecam HD-3000 */
+	case USB_ID(0x047F, 0x0415): /* Plantronics BT-300 */
 	case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */
 	case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */
 	case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */

From d622dad32621a1a33ba74b1b29a19cfd5abe069f Mon Sep 17 00:00:00 2001
From: Kailang Yang <kailang@realtek.com>
Date: Tue, 12 Apr 2016 10:55:03 +0800
Subject: [PATCH 437/797] ALSA: usb-audio: Skip volume controls triggers hangup
 on Dell USB Dock

commit adcdd0d5a1cb779f6d455ae70882c19c527627a8 upstream.

This is Dell usb dock audio workaround.
It was fixed the master volume keep lower.

[Some background: the patch essentially skips the controls of a couple
 of FU volumes.  Although the firmware exposes the dB and the value
 information via the usb descriptor, changing the values (we set the
 min volume as default) screws up the device.  Although this has been
 fixed in the newer firmware, the devices are shipped with the old
 firmware, thus we need the workaround in the driver side.  -- tiwai]

Signed-off-by: Kailang Yang <kailang@realtek.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/usb/mixer_maps.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c
index ddca6547399b..1f8fb0d904e0 100644
--- a/sound/usb/mixer_maps.c
+++ b/sound/usb/mixer_maps.c
@@ -348,6 +348,16 @@ static struct usbmix_name_map bose_companion5_map[] = {
 	{ 0 }	/* terminator */
 };
 
+/*
+ * Dell usb dock with ALC4020 codec had a firmware problem where it got
+ * screwed up when zero volume is passed; just skip it as a workaround
+ */
+static const struct usbmix_name_map dell_alc4020_map[] = {
+	{ 16, NULL },
+	{ 19, NULL },
+	{ 0 }
+};
+
 /*
  * Control map entries
  */
@@ -430,6 +440,10 @@ static struct usbmix_ctl_map usbmix_ctl_maps[] = {
 		.id = USB_ID(0x0ccd, 0x0028),
 		.map = aureon_51_2_map,
 	},
+	{
+		.id = USB_ID(0x0bda, 0x4014),
+		.map = dell_alc4020_map,
+	},
 	{
 		.id = USB_ID(0x0dba, 0x1000),
 		.map = mbox1_map,

From f174a1fd2c0e577646a1263f7c7d01e588ca28bd Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Fri, 25 Mar 2016 15:26:55 +0100
Subject: [PATCH 438/797] HID: wacom: fix Bamboo ONE oops

commit 580549ef6b3e3fb3b958de490ca99f43a089a2cf upstream.

Looks like recent changes in the Wacom driver made the Bamboo ONE crashes.
The tablet behaves as if it was a regular Bamboo device with pen, touch
and pad, but there is no physical pad connected to it.
The weird part is that the pad is still sending events and given that
there is no input node connected to it, we get  anull pointer exception.

Link: https://bugzilla.redhat.com/show_bug.cgi?id=1317116

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Acked-by: Ping Cheng <pingc@wacom.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hid/wacom_wac.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 01a4f05c1642..3c0f47ac8e53 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -2492,6 +2492,17 @@ void wacom_setup_device_quirks(struct wacom *wacom)
 		}
 	}
 
+	/*
+	 * Hack for the Bamboo One:
+	 * the device presents a PAD/Touch interface as most Bamboos and even
+	 * sends ghosts PAD data on it. However, later, we must disable this
+	 * ghost interface, and we can not detect it unless we set it here
+	 * to WACOM_DEVICETYPE_PAD or WACOM_DEVICETYPE_TOUCH.
+	 */
+	if (features->type == BAMBOO_PEN &&
+	    features->pktlen == WACOM_PKGLEN_BBTOUCH3)
+		features->device_type |= WACOM_DEVICETYPE_PAD;
+
 	/*
 	 * Raw Wacom-mode pen and touch events both come from interface
 	 * 0, whose HID descriptor has an application usage of 0xFF0D

From b66a7a3a0947ced3b8dff340e815b708cc4a98a7 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 23 Mar 2016 12:17:09 -0400
Subject: [PATCH 439/797] HID: usbhid: fix inconsistent
 reset/resume/reset-resume behavior

commit 972e6a993f278b416a8ee3ec65475724fc36feb2 upstream.

The usbhid driver has inconsistently duplicated code in its post-reset,
resume, and reset-resume pathways.

	reset-resume doesn't check HID_STARTED before trying to
	restart the I/O queues.

	resume fails to clear the HID_SUSPENDED flag if HID_STARTED
	isn't set.

	resume calls usbhid_restart_queues() with usbhid->lock held
	and the others call it without holding the lock.

The first item in particular causes a problem following a reset-resume
if the driver hasn't started up its I/O.  URB submission fails because
usbhid->urbin is NULL, and this triggers an unending reset-retry loop.

This patch fixes the problem by creating a new subroutine,
hid_restart_io(), to carry out all the common activities.  It also
adds some checks that were missing in the original code:

	After a reset, there's no need to clear any halted endpoints.

	After a resume, if a reset is pending there's no need to
	restart any I/O until the reset is finished.

	After a resume, if the interrupt-IN endpoint is halted there's
	no need to submit the input URB until the halt has been
	cleared.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reported-by: Daniel Fraga <fragabr@gmail.com>
Tested-by: Daniel Fraga <fragabr@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hid/usbhid/hid-core.c | 73 ++++++++++++++++++-----------------
 1 file changed, 37 insertions(+), 36 deletions(-)

diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index 5dd426fee8cc..0df32fe0e345 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -951,14 +951,6 @@ static int usbhid_output_report(struct hid_device *hid, __u8 *buf, size_t count)
 	return ret;
 }
 
-static void usbhid_restart_queues(struct usbhid_device *usbhid)
-{
-	if (usbhid->urbout && !test_bit(HID_OUT_RUNNING, &usbhid->iofl))
-		usbhid_restart_out_queue(usbhid);
-	if (!test_bit(HID_CTRL_RUNNING, &usbhid->iofl))
-		usbhid_restart_ctrl_queue(usbhid);
-}
-
 static void hid_free_buffers(struct usb_device *dev, struct hid_device *hid)
 {
 	struct usbhid_device *usbhid = hid->driver_data;
@@ -1404,6 +1396,37 @@ static void hid_cease_io(struct usbhid_device *usbhid)
 	usb_kill_urb(usbhid->urbout);
 }
 
+static void hid_restart_io(struct hid_device *hid)
+{
+	struct usbhid_device *usbhid = hid->driver_data;
+	int clear_halt = test_bit(HID_CLEAR_HALT, &usbhid->iofl);
+	int reset_pending = test_bit(HID_RESET_PENDING, &usbhid->iofl);
+
+	spin_lock_irq(&usbhid->lock);
+	clear_bit(HID_SUSPENDED, &usbhid->iofl);
+	usbhid_mark_busy(usbhid);
+
+	if (clear_halt || reset_pending)
+		schedule_work(&usbhid->reset_work);
+	usbhid->retry_delay = 0;
+	spin_unlock_irq(&usbhid->lock);
+
+	if (reset_pending || !test_bit(HID_STARTED, &usbhid->iofl))
+		return;
+
+	if (!clear_halt) {
+		if (hid_start_in(hid) < 0)
+			hid_io_error(hid);
+	}
+
+	spin_lock_irq(&usbhid->lock);
+	if (usbhid->urbout && !test_bit(HID_OUT_RUNNING, &usbhid->iofl))
+		usbhid_restart_out_queue(usbhid);
+	if (!test_bit(HID_CTRL_RUNNING, &usbhid->iofl))
+		usbhid_restart_ctrl_queue(usbhid);
+	spin_unlock_irq(&usbhid->lock);
+}
+
 /* Treat USB reset pretty much the same as suspend/resume */
 static int hid_pre_reset(struct usb_interface *intf)
 {
@@ -1453,14 +1476,14 @@ static int hid_post_reset(struct usb_interface *intf)
 		return 1;
 	}
 
+	/* No need to do another reset or clear a halted endpoint */
 	spin_lock_irq(&usbhid->lock);
 	clear_bit(HID_RESET_PENDING, &usbhid->iofl);
+	clear_bit(HID_CLEAR_HALT, &usbhid->iofl);
 	spin_unlock_irq(&usbhid->lock);
 	hid_set_idle(dev, intf->cur_altsetting->desc.bInterfaceNumber, 0, 0);
-	status = hid_start_in(hid);
-	if (status < 0)
-		hid_io_error(hid);
-	usbhid_restart_queues(usbhid);
+
+	hid_restart_io(hid);
 
 	return 0;
 }
@@ -1483,25 +1506,9 @@ void usbhid_put_power(struct hid_device *hid)
 #ifdef CONFIG_PM
 static int hid_resume_common(struct hid_device *hid, bool driver_suspended)
 {
-	struct usbhid_device *usbhid = hid->driver_data;
-	int status;
-
-	spin_lock_irq(&usbhid->lock);
-	clear_bit(HID_SUSPENDED, &usbhid->iofl);
-	usbhid_mark_busy(usbhid);
-
-	if (test_bit(HID_CLEAR_HALT, &usbhid->iofl) ||
-			test_bit(HID_RESET_PENDING, &usbhid->iofl))
-		schedule_work(&usbhid->reset_work);
-	usbhid->retry_delay = 0;
-
-	usbhid_restart_queues(usbhid);
-	spin_unlock_irq(&usbhid->lock);
-
-	status = hid_start_in(hid);
-	if (status < 0)
-		hid_io_error(hid);
+	int status = 0;
 
+	hid_restart_io(hid);
 	if (driver_suspended && hid->driver && hid->driver->resume)
 		status = hid->driver->resume(hid);
 	return status;
@@ -1570,12 +1577,8 @@ static int hid_suspend(struct usb_interface *intf, pm_message_t message)
 static int hid_resume(struct usb_interface *intf)
 {
 	struct hid_device *hid = usb_get_intfdata (intf);
-	struct usbhid_device *usbhid = hid->driver_data;
 	int status;
 
-	if (!test_bit(HID_STARTED, &usbhid->iofl))
-		return 0;
-
 	status = hid_resume_common(hid, true);
 	dev_dbg(&intf->dev, "resume status %d\n", status);
 	return 0;
@@ -1584,10 +1587,8 @@ static int hid_resume(struct usb_interface *intf)
 static int hid_reset_resume(struct usb_interface *intf)
 {
 	struct hid_device *hid = usb_get_intfdata(intf);
-	struct usbhid_device *usbhid = hid->driver_data;
 	int status;
 
-	clear_bit(HID_SUSPENDED, &usbhid->iofl);
 	status = hid_post_reset(intf);
 	if (status >= 0 && hid->driver && hid->driver->reset_resume) {
 		int ret = hid->driver->reset_resume(hid);

From fc9683f24bc4dce3ac67f78f8b393f3c4159319c Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Wed, 17 Feb 2016 12:26:33 -0600
Subject: [PATCH 440/797] Revert "x86/PCI: Don't alloc pcibios-irq when MSI is
 enabled"

commit fe25d078874f2c29c38f4160467d74f5756537c9 upstream.

Revert 8affb487d4a4 ("x86/PCI: Don't alloc pcibios-irq when MSI is
enabled").

This is part of reverting 991de2e59090 ("PCI, x86: Implement
pcibios_alloc_irq() and pcibios_free_irq()") to fix regressions it
introduced.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=111211
Fixes: 991de2e59090 ("PCI, x86: Implement pcibios_alloc_irq() and pcibios_free_irq()")
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Rafael J. Wysocki <rafael@kernel.org>
CC: Jiang Liu <jiang.liu@linux.intel.com>
CC: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/pci/common.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index eccd4d99e6a4..dc78a4a9a466 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -675,14 +675,6 @@ int pcibios_add_device(struct pci_dev *dev)
 
 int pcibios_alloc_irq(struct pci_dev *dev)
 {
-	/*
-	 * If the PCI device was already claimed by core code and has
-	 * MSI enabled, probing of the pcibios IRQ will overwrite
-	 * dev->irq.  So bail out if MSI is already enabled.
-	 */
-	if (pci_dev_msi_enabled(dev))
-		return -EBUSY;
-
 	return pcibios_enable_irq(dev);
 }
 

From 2d0d0011ff48f000ec789f9b7e3378886225ec68 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Wed, 17 Feb 2016 12:26:38 -0600
Subject: [PATCH 441/797] Revert "PCI: Add helpers to manage pci_dev->irq and
 pci_dev->irq_managed"

commit 67b4eab91caf2ad574cab1b17ae09180ea2e116e upstream.

Revert 811a4e6fce09 ("PCI: Add helpers to manage pci_dev->irq and
pci_dev->irq_managed").

This is part of reverting 991de2e59090 ("PCI, x86: Implement
pcibios_alloc_irq() and pcibios_free_irq()") to fix regressions it
introduced.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=111211
Fixes: 991de2e59090 ("PCI, x86: Implement pcibios_alloc_irq() and pcibios_free_irq()")
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Rafael J. Wysocki <rafael@kernel.org>
CC: Jiang Liu <jiang.liu@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/pci/intel_mid_pci.c |  4 ++--
 arch/x86/pci/irq.c           | 10 ++++++----
 drivers/acpi/pci_irq.c       | 10 ++++++----
 include/linux/pci.h          | 17 -----------------
 4 files changed, 14 insertions(+), 27 deletions(-)

diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index 0d24e7c10145..8826ff593ebc 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -215,7 +215,7 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
 	int polarity;
 	int ret;
 
-	if (pci_has_managed_irq(dev))
+	if (dev->irq_managed && dev->irq > 0)
 		return 0;
 
 	switch (intel_mid_identify_cpu()) {
@@ -256,7 +256,7 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
 
 static void intel_mid_pci_irq_disable(struct pci_dev *dev)
 {
-	if (pci_has_managed_irq(dev)) {
+	if (dev->irq_managed && dev->irq > 0) {
 		mp_unmap_irq(dev->irq);
 		dev->irq_managed = 0;
 		/*
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 32e70343e6fd..72108f0b66b1 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -1202,7 +1202,7 @@ static int pirq_enable_irq(struct pci_dev *dev)
 			struct pci_dev *temp_dev;
 			int irq;
 
-			if (pci_has_managed_irq(dev))
+			if (dev->irq_managed && dev->irq > 0)
 				return 0;
 
 			irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
@@ -1230,7 +1230,8 @@ static int pirq_enable_irq(struct pci_dev *dev)
 			}
 			dev = temp_dev;
 			if (irq >= 0) {
-				pci_set_managed_irq(dev, irq);
+				dev->irq_managed = 1;
+				dev->irq = irq;
 				dev_info(&dev->dev, "PCI->APIC IRQ transform: "
 					 "INT %c -> IRQ %d\n", 'A' + pin - 1, irq);
 				return 0;
@@ -1258,8 +1259,9 @@ static int pirq_enable_irq(struct pci_dev *dev)
 
 static void pirq_disable_irq(struct pci_dev *dev)
 {
-	if (io_apic_assign_pci_irqs && pci_has_managed_irq(dev)) {
+	if (io_apic_assign_pci_irqs && dev->irq_managed && dev->irq) {
 		mp_unmap_irq(dev->irq);
-		pci_reset_managed_irq(dev);
+		dev->irq = 0;
+		dev->irq_managed = 0;
 	}
 }
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index c9336751e5e3..172b74df0fa7 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -409,7 +409,7 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
 		return 0;
 	}
 
-	if (pci_has_managed_irq(dev))
+	if (dev->irq_managed && dev->irq > 0)
 		return 0;
 
 	entry = acpi_pci_irq_lookup(dev, pin);
@@ -454,7 +454,8 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
 		kfree(entry);
 		return rc;
 	}
-	pci_set_managed_irq(dev, rc);
+	dev->irq = rc;
+	dev->irq_managed = 1;
 
 	if (link)
 		snprintf(link_desc, sizeof(link_desc), " -> Link[%s]", link);
@@ -477,7 +478,7 @@ void acpi_pci_irq_disable(struct pci_dev *dev)
 	u8 pin;
 
 	pin = dev->pin;
-	if (!pin || !pci_has_managed_irq(dev))
+	if (!pin || !dev->irq_managed || dev->irq <= 0)
 		return;
 
 	entry = acpi_pci_irq_lookup(dev, pin);
@@ -499,6 +500,7 @@ void acpi_pci_irq_disable(struct pci_dev *dev)
 	dev_dbg(&dev->dev, "PCI INT %c disabled\n", pin_name(pin));
 	if (gsi >= 0) {
 		acpi_unregister_gsi(gsi);
-		pci_reset_managed_irq(dev);
+		dev->irq_managed = 0;
+		dev->irq = 0;
 	}
 }
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4e554bfff129..e89c7ee7e803 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -989,23 +989,6 @@ static inline int pci_is_managed(struct pci_dev *pdev)
 	return pdev->is_managed;
 }
 
-static inline void pci_set_managed_irq(struct pci_dev *pdev, unsigned int irq)
-{
-	pdev->irq = irq;
-	pdev->irq_managed = 1;
-}
-
-static inline void pci_reset_managed_irq(struct pci_dev *pdev)
-{
-	pdev->irq = 0;
-	pdev->irq_managed = 0;
-}
-
-static inline bool pci_has_managed_irq(struct pci_dev *pdev)
-{
-	return pdev->irq_managed && pdev->irq > 0;
-}
-
 void pci_disable_device(struct pci_dev *dev);
 
 extern unsigned int pcibios_max_latency;

From c1491657c533307ac2f341e1b7ecdf156de3f647 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Wed, 17 Feb 2016 12:26:42 -0600
Subject: [PATCH 442/797] Revert "PCI, x86: Implement pcibios_alloc_irq() and
 pcibios_free_irq()"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 6c777e8799a93e3bdb67bec622429e1b48dc90fb upstream.

991de2e59090 ("PCI, x86: Implement pcibios_alloc_irq() and
pcibios_free_irq()") appeared in v4.3 and helps support IOAPIC hotplug.

Олег reported that the Elcus-1553 TA1-PCI driver worked in v4.2 but not
v4.3 and bisected it to 991de2e59090.  Sunjin reported that the RocketRAID
272x driver worked in v4.2 but not v4.3.  In both cases booting with
"pci=routirq" is a workaround.

I think the problem is that after 991de2e59090, we no longer call
pcibios_enable_irq() for upstream bridges.  Prior to 991de2e59090, when a
driver called pci_enable_device(), we recursively called
pcibios_enable_irq() for upstream bridges via pci_enable_bridge().

After 991de2e59090, we call pcibios_enable_irq() from pci_device_probe()
instead of the pci_enable_device() path, which does *not* call
pcibios_enable_irq() for upstream bridges.

Revert 991de2e59090 to fix these driver regressions.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=111211
Fixes: 991de2e59090 ("PCI, x86: Implement pcibios_alloc_irq() and pcibios_free_irq()")
Reported-and-tested-by: Олег Мороз <oleg.moroz@mcc.vniiem.ru>
Reported-by: Sunjin Yang <fan4326@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Rafael J. Wysocki <rafael@kernel.org>
CC: Jiang Liu <jiang.liu@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/pci_x86.h |  2 ++
 arch/x86/pci/common.c          | 26 ++++++++++++++------------
 arch/x86/pci/intel_mid_pci.c   |  7 ++-----
 arch/x86/pci/irq.c             | 15 ++++++++++++++-
 drivers/acpi/pci_irq.c         |  9 ++++++++-
 5 files changed, 40 insertions(+), 19 deletions(-)

diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index fa1195dae425..164e3f8d3c3d 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -93,6 +93,8 @@ extern raw_spinlock_t pci_config_lock;
 extern int (*pcibios_enable_irq)(struct pci_dev *dev);
 extern void (*pcibios_disable_irq)(struct pci_dev *dev);
 
+extern bool mp_should_keep_irq(struct device *dev);
+
 struct pci_raw_ops {
 	int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn,
 						int reg, int len, u32 *val);
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index dc78a4a9a466..8fd6f44aee83 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -673,20 +673,22 @@ int pcibios_add_device(struct pci_dev *dev)
 	return 0;
 }
 
-int pcibios_alloc_irq(struct pci_dev *dev)
-{
-	return pcibios_enable_irq(dev);
-}
-
-void pcibios_free_irq(struct pci_dev *dev)
-{
-	if (pcibios_disable_irq)
-		pcibios_disable_irq(dev);
-}
-
 int pcibios_enable_device(struct pci_dev *dev, int mask)
 {
-	return pci_enable_resources(dev, mask);
+	int err;
+
+	if ((err = pci_enable_resources(dev, mask)) < 0)
+		return err;
+
+	if (!pci_dev_msi_enabled(dev))
+		return pcibios_enable_irq(dev);
+	return 0;
+}
+
+void pcibios_disable_device (struct pci_dev *dev)
+{
+	if (!pci_dev_msi_enabled(dev) && pcibios_disable_irq)
+		pcibios_disable_irq(dev);
 }
 
 int pci_ext_cfg_avail(void)
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index 8826ff593ebc..8b93e634af84 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -256,13 +256,10 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
 
 static void intel_mid_pci_irq_disable(struct pci_dev *dev)
 {
-	if (dev->irq_managed && dev->irq > 0) {
+	if (!mp_should_keep_irq(&dev->dev) && dev->irq_managed &&
+	    dev->irq > 0) {
 		mp_unmap_irq(dev->irq);
 		dev->irq_managed = 0;
-		/*
-		 * Don't reset dev->irq here, otherwise
-		 * intel_mid_pci_irq_enable() will fail on next call.
-		 */
 	}
 }
 
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 72108f0b66b1..9bd115484745 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -1257,9 +1257,22 @@ static int pirq_enable_irq(struct pci_dev *dev)
 	return 0;
 }
 
+bool mp_should_keep_irq(struct device *dev)
+{
+	if (dev->power.is_prepared)
+		return true;
+#ifdef CONFIG_PM
+	if (dev->power.runtime_status == RPM_SUSPENDING)
+		return true;
+#endif
+
+	return false;
+}
+
 static void pirq_disable_irq(struct pci_dev *dev)
 {
-	if (io_apic_assign_pci_irqs && dev->irq_managed && dev->irq) {
+	if (io_apic_assign_pci_irqs && !mp_should_keep_irq(&dev->dev) &&
+	    dev->irq_managed && dev->irq) {
 		mp_unmap_irq(dev->irq);
 		dev->irq = 0;
 		dev->irq_managed = 0;
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index 172b74df0fa7..8a10a7ae6a8a 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -481,6 +481,14 @@ void acpi_pci_irq_disable(struct pci_dev *dev)
 	if (!pin || !dev->irq_managed || dev->irq <= 0)
 		return;
 
+	/* Keep IOAPIC pin configuration when suspending */
+	if (dev->dev.power.is_prepared)
+		return;
+#ifdef	CONFIG_PM
+	if (dev->dev.power.runtime_status == RPM_SUSPENDING)
+		return;
+#endif
+
 	entry = acpi_pci_irq_lookup(dev, pin);
 	if (!entry)
 		return;
@@ -501,6 +509,5 @@ void acpi_pci_irq_disable(struct pci_dev *dev)
 	if (gsi >= 0) {
 		acpi_unregister_gsi(gsi);
 		dev->irq_managed = 0;
-		dev->irq = 0;
 	}
 }

From 440e9a240ca22cbed85bca3d6950cd75e1349775 Mon Sep 17 00:00:00 2001
From: Liviu Dudau <Liviu.Dudau@arm.com>
Date: Thu, 21 Jan 2016 11:57:47 +0000
Subject: [PATCH 443/797] staging: android: ion: Set the length of the DMA sg
 entries in buffer

commit 70bc916b2c80913753fb188d4daee50a64d21ba0 upstream.

ion_buffer_create() will allocate a buffer and then create a DMA
mapping for it, but it forgot to set the length of the page entries.

Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
Signed-off-by: Jon Medhurst <tixy@linaro.org>
Acked-by: Laura Abbott <labbott@redhat.com>
Cc: Shawn Guo <shawnguo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/android/ion/ion.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index e237e9f3312d..df560216d702 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -251,8 +251,10 @@ static struct ion_buffer *ion_buffer_create(struct ion_heap *heap,
 	 * memory coming from the heaps is ready for dma, ie if it has a
 	 * cached mapping that mapping has been invalidated
 	 */
-	for_each_sg(buffer->sg_table->sgl, sg, buffer->sg_table->nents, i)
+	for_each_sg(buffer->sg_table->sgl, sg, buffer->sg_table->nents, i) {
 		sg_dma_address(sg) = sg_phys(sg);
+		sg_dma_len(sg) = sg->length;
+	}
 	mutex_lock(&dev->buffer_lock);
 	ion_buffer_add(dev, buffer);
 	mutex_unlock(&dev->buffer_lock);

From 0d8c1f17e86919fbc645ae9283304738476dd67c Mon Sep 17 00:00:00 2001
From: Vladis Dronov <vdronov@redhat.com>
Date: Mon, 16 Nov 2015 15:55:11 -0200
Subject: [PATCH 444/797] usbvision: fix crash on detecting device with invalid
 configuration

commit fa52bd506f274b7619955917abfde355e3d19ffe upstream.

The usbvision driver crashes when a specially crafted usb device with invalid
number of interfaces or endpoints is detected. This fix adds checks that the
device has proper configuration expected by the driver.

Reported-by: Ralf Spenneberg <ralf@spenneberg.net>
Signed-off-by: Vladis Dronov <vdronov@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/usb/usbvision/usbvision-video.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/media/usb/usbvision/usbvision-video.c b/drivers/media/usb/usbvision/usbvision-video.c
index b693206f66dd..d1dc1a198e3e 100644
--- a/drivers/media/usb/usbvision/usbvision-video.c
+++ b/drivers/media/usb/usbvision/usbvision-video.c
@@ -1463,9 +1463,23 @@ static int usbvision_probe(struct usb_interface *intf,
 
 	if (usbvision_device_data[model].interface >= 0)
 		interface = &dev->actconfig->interface[usbvision_device_data[model].interface]->altsetting[0];
-	else
+	else if (ifnum < dev->actconfig->desc.bNumInterfaces)
 		interface = &dev->actconfig->interface[ifnum]->altsetting[0];
+	else {
+		dev_err(&intf->dev, "interface %d is invalid, max is %d\n",
+		    ifnum, dev->actconfig->desc.bNumInterfaces - 1);
+		ret = -ENODEV;
+		goto err_usb;
+	}
+
+	if (interface->desc.bNumEndpoints < 2) {
+		dev_err(&intf->dev, "interface %d has %d endpoints, but must"
+		    " have minimum 2\n", ifnum, interface->desc.bNumEndpoints);
+		ret = -ENODEV;
+		goto err_usb;
+	}
 	endpoint = &interface->endpoint[1].desc;
+
 	if (!usb_endpoint_xfer_isoc(endpoint)) {
 		dev_err(&intf->dev, "%s: interface %d. has non-ISO endpoint!\n",
 		    __func__, ifnum);

From 4b59a38da5983852008270e81140f611df6f0bfd Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 20 Feb 2016 14:19:34 -0800
Subject: [PATCH 445/797] Revert "usb: hub: do not clear BOS field during reset
 device"

commit e5bdfd50d6f76077bf8441d130c606229e100d40 upstream.

This reverts commit d8f00cd685f5c8e0def8593e520a7fef12c22407.

Tony writes:

This upstream commit is causing an oops:
d8f00cd685f5 ("usb: hub: do not clear BOS field during reset device")

This patch has already been included in several -stable kernels.  Here
are the affected kernels:
4.5.0-rc4 (current git)
4.4.2
4.3.6 (currently in review)
4.1.18
3.18.27
3.14.61

How to reproduce the problem:
Boot kernel with slub debugging enabled (otherwise memory corruption
will cause random oopses later instead of immediately)
Plug in USB 3.0 disk to xhci USB 3.0 port
dd if=/dev/sdc of=/dev/null bs=65536
(where /dev/sdc is the USB 3.0 disk)
Unplug USB cable while dd is still going
Oops is immediate:

Reported-by: Tony Battersby <tonyb@cybernetics.com>
Cc: Du, Changbin <changbin.du@intel.com>
Cc: Roger Quadros <rogerq@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 2a274884c7ea..84df093639ac 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -5392,6 +5392,7 @@ static int usb_reset_and_verify_device(struct usb_device *udev)
 	}
 
 	bos = udev->bos;
+	udev->bos = NULL;
 
 	for (i = 0; i < SET_CONFIG_TRIES; ++i) {
 
@@ -5484,11 +5485,8 @@ static int usb_reset_and_verify_device(struct usb_device *udev)
 	usb_set_usb2_hardware_lpm(udev, 1);
 	usb_unlocked_enable_lpm(udev);
 	usb_enable_ltm(udev);
-	/* release the new BOS descriptor allocated  by hub_port_init() */
-	if (udev->bos != bos) {
-		usb_release_bos_descriptor(udev);
-		udev->bos = bos;
-	}
+	usb_release_bos_descriptor(udev);
+	udev->bos = bos;
 	return 0;
 
 re_enumerate:

From 8c9aef03d3b540b6885e7534a885ea25f62dd9ed Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 20 Apr 2016 15:44:02 +0900
Subject: [PATCH 446/797] Linux 4.4.8

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 5a493e785aca..1928fcd539cc 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 4
-SUBLEVEL = 7
+SUBLEVEL = 8
 EXTRAVERSION =
 NAME = Blurry Fish Butt
 

From 5b616a05de88d4be0136156a26fae9da855939f6 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Wed, 30 Mar 2016 08:46:31 +0800
Subject: [PATCH 447/797] block: partition: initialize percpuref before sending
 out KOBJ_ADD

commit b30a337ca27c4f40439e4bfb290cba5f88d73bb7 upstream.

The initialization of partition's percpu_ref should have been done before
sending out KOBJ_ADD uevent, which may cause userspace to read partition
table. So the uninitialized percpu_ref may be accessed in data path.

This patch fixes this issue reported by Naveen.

Reported-by: Naveen Kaje <nkaje@codeaurora.org>
Tested-by: Naveen Kaje <nkaje@codeaurora.org>
Fixes: 6c71013ecb7e2(block: partition: convert percpu ref)
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 block/partition-generic.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/block/partition-generic.c b/block/partition-generic.c
index 746935a5973c..a241e3900bc9 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -349,15 +349,20 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
 			goto out_del;
 	}
 
+	err = hd_ref_init(p);
+	if (err) {
+		if (flags & ADDPART_FLAG_WHOLEDISK)
+			goto out_remove_file;
+		goto out_del;
+	}
+
 	/* everything is up and running, commence */
 	rcu_assign_pointer(ptbl->part[partno], p);
 
 	/* suppress uevent if the disk suppresses it */
 	if (!dev_get_uevent_suppress(ddev))
 		kobject_uevent(&pdev->kobj, KOBJ_ADD);
-
-	if (!hd_ref_init(p))
-		return p;
+	return p;
 
 out_free_info:
 	free_part_info(p);
@@ -366,6 +371,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
 out_free:
 	kfree(p);
 	return ERR_PTR(err);
+out_remove_file:
+	device_remove_file(pdev, &dev_attr_whole_disk);
 out_del:
 	kobject_put(p->holder_dir);
 	device_del(pdev);

From 9fed24fe30c1217c640d2b38403034c2c7fdce12 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Fri, 15 Apr 2016 18:51:28 +0800
Subject: [PATCH 448/797] block: loop: fix filesystem corruption in case of
 aio/dio

commit a7297a6a3a3322b054592e8e988981d2f5f29cc4 upstream.

Starting from commit e36f620428(block: split bios to max possible length),
block core starts to split bio in the middle of bvec.

Unfortunately loop dio/aio doesn't consider this situation, and
always treat 'iter.iov_offset' as zero. Then filesystem corruption
is observed.

This patch figures out the offset of the base bvevc via
'bio->bi_iter.bi_bvec_done' and fixes the issue by passing the offset
to iov iterator.

Fixes: e36f6204288088f (block: split bios to max possible length)
Cc: Keith Busch <keith.busch@intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/block/loop.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 423f4ca7d712..80cf8add46ff 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -488,6 +488,12 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
 	bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
 	iov_iter_bvec(&iter, ITER_BVEC | rw, bvec,
 		      bio_segments(bio), blk_rq_bytes(cmd->rq));
+	/*
+	 * This bio may be started from the middle of the 'bvec'
+	 * because of bio splitting, so offset from the bvec must
+	 * be passed to iov iterator
+	 */
+	iter.iov_offset = bio->bi_iter.bi_bvec_done;
 
 	cmd->iocb.ki_pos = pos;
 	cmd->iocb.ki_filp = file;

From adbe236b953f4537f9e5ce86d1c7ace613dec38c Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Wed, 6 Apr 2016 10:05:16 +0200
Subject: [PATCH 449/797] x86/mce: Avoid using object after free in genpool

commit a3125494cff084b098c80bb36fbe2061ffed9d52 upstream.

When we loop over all queued machine check error records to pass them
to the registered notifiers we use llist_for_each_entry(). But the loop
calls gen_pool_free() for the entry in the body of the loop - and then
the iterator looks at node->next after the free.

Use llist_for_each_entry_safe() instead.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Gong Chen <gong.chen@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/0205920@agluck-desk.sc.intel.com
Link: http://lkml.kernel.org/r/1459929916-12852-4-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/mcheck/mce-genpool.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
index 0a850100c594..2658e2af74ec 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-genpool.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
@@ -29,7 +29,7 @@ static char gen_pool_buf[MCE_POOLSZ];
 void mce_gen_pool_process(void)
 {
 	struct llist_node *head;
-	struct mce_evt_llist *node;
+	struct mce_evt_llist *node, *tmp;
 	struct mce *mce;
 
 	head = llist_del_all(&mce_event_llist);
@@ -37,7 +37,7 @@ void mce_gen_pool_process(void)
 		return;
 
 	head = llist_reverse_order(head);
-	llist_for_each_entry(node, head, llnode) {
+	llist_for_each_entry_safe(node, tmp, head, llnode) {
 		mce = &node->mce;
 		atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
 		gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node));

From 1c8497d2035d95e4e26bdf6cec34150bfb972776 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Wed, 30 Mar 2016 12:24:47 -0700
Subject: [PATCH 450/797] kvm: x86: do not leak guest xcr0 into host interrupt
 handlers

commit fc5b7f3bf1e1414bd4e91db6918c85ace0c873a5 upstream.

An interrupt handler that uses the fpu can kill a KVM VM, if it runs
under the following conditions:
 - the guest's xcr0 register is loaded on the cpu
 - the guest's fpu context is not loaded
 - the host is using eagerfpu

Note that the guest's xcr0 register and fpu context are not loaded as
part of the atomic world switch into "guest mode". They are loaded by
KVM while the cpu is still in "host mode".

Usage of the fpu in interrupt context is gated by irq_fpu_usable(). The
interrupt handler will look something like this:

if (irq_fpu_usable()) {
        kernel_fpu_begin();

        [... code that uses the fpu ...]

        kernel_fpu_end();
}

As long as the guest's fpu is not loaded and the host is using eager
fpu, irq_fpu_usable() returns true (interrupted_kernel_fpu_idle()
returns true). The interrupt handler proceeds to use the fpu with
the guest's xcr0 live.

kernel_fpu_begin() saves the current fpu context. If this uses
XSAVE[OPT], it may leave the xsave area in an undesirable state.
According to the SDM, during XSAVE bit i of XSTATE_BV is not modified
if bit i is 0 in xcr0. So it's possible that XSTATE_BV[i] == 1 and
xcr0[i] == 0 following an XSAVE.

kernel_fpu_end() restores the fpu context. Now if any bit i in
XSTATE_BV == 1 while xcr0[i] == 0, XRSTOR generates a #GP. The
fault is trapped and SIGSEGV is delivered to the current process.

Only pre-4.2 kernels appear to be vulnerable to this sequence of
events. Commit 653f52c ("kvm,x86: load guest FPU context more eagerly")
from 4.2 forces the guest's fpu to always be loaded on eagerfpu hosts.

This patch fixes the bug by keeping the host's xcr0 loaded outside
of the interrupts-disabled region where KVM switches into guest mode.

Suggested-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: David Matlack <dmatlack@google.com>
[Move load after goto cancel_injection. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/x86.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7eb4ebd3ebea..605cea75eb0d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -697,7 +697,6 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
 		if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512)
 			return 1;
 	}
-	kvm_put_guest_xcr0(vcpu);
 	vcpu->arch.xcr0 = xcr0;
 
 	if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
@@ -6495,8 +6494,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->prepare_guest_switch(vcpu);
 	if (vcpu->fpu_active)
 		kvm_load_guest_fpu(vcpu);
-	kvm_load_guest_xcr0(vcpu);
-
 	vcpu->mode = IN_GUEST_MODE;
 
 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
@@ -6519,6 +6516,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		goto cancel_injection;
 	}
 
+	kvm_load_guest_xcr0(vcpu);
+
 	if (req_immediate_exit)
 		smp_send_reschedule(vcpu->cpu);
 
@@ -6568,6 +6567,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	vcpu->mode = OUTSIDE_GUEST_MODE;
 	smp_wmb();
 
+	kvm_put_guest_xcr0(vcpu);
+
 	/* Interrupt is enabled by handle_external_intr() */
 	kvm_x86_ops->handle_external_intr(vcpu);
 
@@ -7215,7 +7216,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 	 * and assume host would use all available bits.
 	 * Guest xcr0 would be loaded later.
 	 */
-	kvm_put_guest_xcr0(vcpu);
 	vcpu->guest_fpu_loaded = 1;
 	__kernel_fpu_begin();
 	__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state);
@@ -7224,8 +7224,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 {
-	kvm_put_guest_xcr0(vcpu);
-
 	if (!vcpu->guest_fpu_loaded) {
 		vcpu->fpu_counter = 0;
 		return;

From 5716a93fef70b4d305e9b3afea50c3027d22cc3c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 6 Apr 2016 09:37:22 +0100
Subject: [PATCH 451/797] KVM: arm/arm64: Handle forward time correction
 gracefully

commit 1c5631c73fc2261a5df64a72c155cb53dcdc0c45 upstream.

On a host that runs NTP, corrections can have a direct impact on
the background timer that we program on the behalf of a vcpu.

In particular, NTP performing a forward correction will result in
a timer expiring sooner than expected from a guest point of view.
Not a big deal, we kick the vcpu anyway.

But on wake-up, the vcpu thread is going to perform a check to
find out whether or not it should block. And at that point, the
timer check is going to say "timer has not expired yet, go back
to sleep". This results in the timer event being lost forever.

There are multiple ways to handle this. One would be record that
the timer has expired and let kvm_cpu_has_pending_timer return
true in that case, but that would be fairly invasive. Another is
to check for the "short sleep" condition in the hrtimer callback,
and restart the timer for the remaining time when the condition
is detected.

This patch implements the latter, with a bit of refactoring in
order to avoid too much code duplication.

Reported-by: Alexander Graf <agraf@suse.de>
Reviewed-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 virt/kvm/arm/arch_timer.c | 49 +++++++++++++++++++++++++++++++--------
 1 file changed, 39 insertions(+), 10 deletions(-)

diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index ea6064696fe4..a7b9022b5c8f 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -86,6 +86,8 @@ static void kvm_timer_inject_irq_work(struct work_struct *work)
 	vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired);
 	vcpu->arch.timer_cpu.armed = false;
 
+	WARN_ON(!kvm_timer_should_fire(vcpu));
+
 	/*
 	 * If the vcpu is blocked we want to wake it up so that it will see
 	 * the timer has expired when entering the guest.
@@ -93,10 +95,46 @@ static void kvm_timer_inject_irq_work(struct work_struct *work)
 	kvm_vcpu_kick(vcpu);
 }
 
+static u64 kvm_timer_compute_delta(struct kvm_vcpu *vcpu)
+{
+	cycle_t cval, now;
+
+	cval = vcpu->arch.timer_cpu.cntv_cval;
+	now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
+
+	if (now < cval) {
+		u64 ns;
+
+		ns = cyclecounter_cyc2ns(timecounter->cc,
+					 cval - now,
+					 timecounter->mask,
+					 &timecounter->frac);
+		return ns;
+	}
+
+	return 0;
+}
+
 static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
 {
 	struct arch_timer_cpu *timer;
+	struct kvm_vcpu *vcpu;
+	u64 ns;
+
 	timer = container_of(hrt, struct arch_timer_cpu, timer);
+	vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu);
+
+	/*
+	 * Check that the timer has really expired from the guest's
+	 * PoV (NTP on the host may have forced it to expire
+	 * early). If we should have slept longer, restart it.
+	 */
+	ns = kvm_timer_compute_delta(vcpu);
+	if (unlikely(ns)) {
+		hrtimer_forward_now(hrt, ns_to_ktime(ns));
+		return HRTIMER_RESTART;
+	}
+
 	queue_work(wqueue, &timer->expired);
 	return HRTIMER_NORESTART;
 }
@@ -170,8 +208,6 @@ static int kvm_timer_update_state(struct kvm_vcpu *vcpu)
 void kvm_timer_schedule(struct kvm_vcpu *vcpu)
 {
 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-	u64 ns;
-	cycle_t cval, now;
 
 	BUG_ON(timer_is_armed(timer));
 
@@ -191,14 +227,7 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu)
 		return;
 
 	/*  The timer has not yet expired, schedule a background timer */
-	cval = timer->cntv_cval;
-	now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
-
-	ns = cyclecounter_cyc2ns(timecounter->cc,
-				 cval - now,
-				 timecounter->mask,
-				 &timecounter->frac);
-	timer_arm(timer, ns);
+	timer_arm(timer, kvm_timer_compute_delta(vcpu));
 }
 
 void kvm_timer_unschedule(struct kvm_vcpu *vcpu)

From 4bb48b5f95a9e40451e259e295d03cd301740440 Mon Sep 17 00:00:00 2001
From: Lokesh Vutla <lokeshvutla@ti.com>
Date: Tue, 8 Mar 2016 12:24:35 +0530
Subject: [PATCH 452/797] ARM: dts: AM43x-epos: Fix clk parent for synctimer

commit cfe1580a6415bc37fd62d79eb8102a618f7650b2 upstream.

commit 55ee7017ee31 ("arm: omap2: board-generic: use omap4_local_timer_init
for AM437x") makes synctimer32k as the clocksource on AM43xx. By default
the synctimer32k is clocked by 32K RTC OSC on AM43xx. But this 32K RTC OSC
is not available on epos boards which makes it fail to boot.

Synctimer32k can also be clocked by a peripheral PLL, so making this as
clock parent for synctimer3k on epos boards.

Fixes: 55ee7017ee31 ("arm: omap2: board-generic: use omap4_local_timer_init for AM437x")
Reported-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Lokesh Vutla <lokeshvutla@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/am43x-epos-evm.dts | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/arm/boot/dts/am43x-epos-evm.dts b/arch/arm/boot/dts/am43x-epos-evm.dts
index 47954ed990f8..00707aac72fc 100644
--- a/arch/arm/boot/dts/am43x-epos-evm.dts
+++ b/arch/arm/boot/dts/am43x-epos-evm.dts
@@ -792,3 +792,8 @@ &mcasp1 {
 	tx-num-evt = <32>;
 	rx-num-evt = <32>;
 };
+
+&synctimer_32kclk {
+	assigned-clocks = <&mux_synctimer32k_ck>;
+	assigned-clock-parents = <&clkdiv32k_ick>;
+};

From c6cf3b71df047f61b568795fe926f107806eae82 Mon Sep 17 00:00:00 2001
From: Patrick Uiterwijk <patrick@puiterwijk.org>
Date: Tue, 29 Mar 2016 16:57:40 +0000
Subject: [PATCH 453/797] ARM: mvebu: Correct unit address for linksys

commit 199831c77c50e6913e893b6bc268ba9f4a9a2bf8 upstream.

The USB2 port for Armada 38x is defined to be at 58000, not at
50000.

Fixes: 2d0a7addbd10 ("ARM: Kirkwood: Add support for many Synology NAS devices")
Signed-off-by: Patrick Uiterwijk <patrick@puiterwijk.org>
Acked-by: Imre Kaloz <kaloz@openwrt.org>
Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/armada-385-linksys.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/armada-385-linksys.dtsi b/arch/arm/boot/dts/armada-385-linksys.dtsi
index 3710755c6d76..85d2c377c332 100644
--- a/arch/arm/boot/dts/armada-385-linksys.dtsi
+++ b/arch/arm/boot/dts/armada-385-linksys.dtsi
@@ -117,7 +117,7 @@ sata@a8000 {
 			};
 
 			/* USB part of the eSATA/USB 2.0 port */
-			usb@50000 {
+			usb@58000 {
 				status = "okay";
 			};
 

From 81b5ed00246258100df11c87f118a27f0ebceba3 Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Fri, 11 Mar 2016 10:12:28 -0600
Subject: [PATCH 454/797] ARM: OMAP2: Fix up interconnect barrier
 initialization for DRA7

commit 456e8d53482537616899a146b706eccd095404e6 upstream.

The following commits:
commit 3fa609755c11 ("ARM: omap2: restore OMAP4 barrier behaviour")
commit f746929ffdc8 ("Revert "ARM: OMAP4: remove dead kconfig option OMAP4_ERRATA_I688"")
and
commit ea827ad5ffbb ("ARM: DRA7: Provide proper IO map table")
came in around the same time, unfortunately this seem to have missed
initializing the barrier for DRA7 platforms - omap5_map_io was reused
for dra7 till it was split out by the last patch. barrier_init
needs to be hence carried forward as it is valid for DRA7 family of
processors as they are for OMAP5.

Fixes: ea827ad5ffbb7 ("ARM: DRA7: Provide proper IO map table")
Reported-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reported-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-omap2/io.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c
index 3eaeaca5da05..3a911d8dea8b 100644
--- a/arch/arm/mach-omap2/io.c
+++ b/arch/arm/mach-omap2/io.c
@@ -368,6 +368,7 @@ void __init omap5_map_io(void)
 void __init dra7xx_map_io(void)
 {
 	iotable_init(dra7xx_io_desc, ARRAY_SIZE(dra7xx_io_desc));
+	omap_barriers_init();
 }
 #endif
 /*

From 882e790b572a7dadf5323e373d0139bfbc6dce15 Mon Sep 17 00:00:00 2001
From: Lokesh Vutla <lokeshvutla@ti.com>
Date: Sat, 26 Mar 2016 23:08:55 -0600
Subject: [PATCH 455/797] ARM: OMAP2+: hwmod: Fix updating of sysconfig
 register

commit 3ca4a238106dedc285193ee47f494a6584b6fd2f upstream.

Commit 127500ccb766f ("ARM: OMAP2+: Only write the sysconfig on idle
when necessary") talks about verification of sysconfig cache value before
updating it, only during idle path. But the patch is adding the
verification in the enable path. So, adding the check in a proper place
as per the commit description.

Not keeping this check during enable path as there is a chance of losing
context and it is safe to do on idle as the context of the register will
never be lost while the device is active.

Signed-off-by: Lokesh Vutla <lokeshvutla@ti.com>
Acked-by: Tero Kristo <t-kristo@ti.com>
Cc: Jon Hunter <jonathanh@nvidia.com>
Fixes: commit 127500ccb766 "ARM: OMAP2+: Only write the sysconfig on idle when necessary"
[paul@pwsan.com: appears to have been caused by my own mismerge of the
 originally posted patch]
Signed-off-by: Paul Walmsley <paul@pwsan.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-omap2/omap_hwmod.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index 8e0bd5939e5a..147c90e70b2e 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -1416,9 +1416,7 @@ static void _enable_sysc(struct omap_hwmod *oh)
 	    (sf & SYSC_HAS_CLOCKACTIVITY))
 		_set_clockactivity(oh, oh->class->sysc->clockact, &v);
 
-	/* If the cached value is the same as the new value, skip the write */
-	if (oh->_sysc_cache != v)
-		_write_sysconfig(v, oh);
+	_write_sysconfig(v, oh);
 
 	/*
 	 * Set the autoidle bit only after setting the smartidle bit
@@ -1481,7 +1479,9 @@ static void _idle_sysc(struct omap_hwmod *oh)
 		_set_master_standbymode(oh, idlemode, &v);
 	}
 
-	_write_sysconfig(v, oh);
+	/* If the cached value is the same as the new value, skip the write */
+	if (oh->_sysc_cache != v)
+		_write_sysconfig(v, oh);
 }
 
 /**

From 6905c7a4aa1ef675825bc2ab56fd965a573ffb74 Mon Sep 17 00:00:00 2001
From: Jerome Marchand <jmarchan@redhat.com>
Date: Wed, 6 Apr 2016 14:06:48 +0100
Subject: [PATCH 456/797] assoc_array: don't call compare_object() on a node

commit 8d4a2ec1e0b41b0cf9a0c5cd4511da7f8e4f3de2 upstream.

Changes since V1: fixed the description and added KASan warning.

In assoc_array_insert_into_terminal_node(), we call the
compare_object() method on all non-empty slots, even when they're
not leaves, passing a pointer to an unexpected structure to
compare_object(). Currently it causes an out-of-bound read access
in keyring_compare_object detected by KASan (see below). The issue
is easily reproduced with keyutils testsuite.
Only call compare_object() when the slot is a leave.

KASan warning:
==================================================================
BUG: KASAN: slab-out-of-bounds in keyring_compare_object+0x213/0x240 at addr ffff880060a6f838
Read of size 8 by task keyctl/1655
=============================================================================
BUG kmalloc-192 (Not tainted): kasan: bad access detected
-----------------------------------------------------------------------------

Disabling lock debugging due to kernel taint
INFO: Allocated in assoc_array_insert+0xfd0/0x3a60 age=69 cpu=1 pid=1647
	___slab_alloc+0x563/0x5c0
	__slab_alloc+0x51/0x90
	kmem_cache_alloc_trace+0x263/0x300
	assoc_array_insert+0xfd0/0x3a60
	__key_link_begin+0xfc/0x270
	key_create_or_update+0x459/0xaf0
	SyS_add_key+0x1ba/0x350
	entry_SYSCALL_64_fastpath+0x12/0x76
INFO: Slab 0xffffea0001829b80 objects=16 used=8 fp=0xffff880060a6f550 flags=0x3fff8000004080
INFO: Object 0xffff880060a6f740 @offset=5952 fp=0xffff880060a6e5d1

Bytes b4 ffff880060a6f730: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
Object ffff880060a6f740: d1 e5 a6 60 00 88 ff ff 0e 00 00 00 00 00 00 00  ...`............
Object ffff880060a6f750: 02 cf 8e 60 00 88 ff ff 02 c0 8e 60 00 88 ff ff  ...`.......`....
Object ffff880060a6f760: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
Object ffff880060a6f770: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
Object ffff880060a6f780: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
Object ffff880060a6f790: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
Object ffff880060a6f7a0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
Object ffff880060a6f7b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
Object ffff880060a6f7c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
Object ffff880060a6f7d0: 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
Object ffff880060a6f7e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
Object ffff880060a6f7f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
CPU: 0 PID: 1655 Comm: keyctl Tainted: G    B           4.5.0-rc4-kasan+ #291
Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
 0000000000000000 000000001b2800b4 ffff880060a179e0 ffffffff81b60491
 ffff88006c802900 ffff880060a6f740 ffff880060a17a10 ffffffff815e2969
 ffff88006c802900 ffffea0001829b80 ffff880060a6f740 ffff880060a6e650
Call Trace:
 [<ffffffff81b60491>] dump_stack+0x85/0xc4
 [<ffffffff815e2969>] print_trailer+0xf9/0x150
 [<ffffffff815e9454>] object_err+0x34/0x40
 [<ffffffff815ebe50>] kasan_report_error+0x230/0x550
 [<ffffffff819949be>] ? keyring_get_key_chunk+0x13e/0x210
 [<ffffffff815ec62d>] __asan_report_load_n_noabort+0x5d/0x70
 [<ffffffff81994cc3>] ? keyring_compare_object+0x213/0x240
 [<ffffffff81994cc3>] keyring_compare_object+0x213/0x240
 [<ffffffff81bc238c>] assoc_array_insert+0x86c/0x3a60
 [<ffffffff81bc1b20>] ? assoc_array_cancel_edit+0x70/0x70
 [<ffffffff8199797d>] ? __key_link_begin+0x20d/0x270
 [<ffffffff8199786c>] __key_link_begin+0xfc/0x270
 [<ffffffff81993389>] key_create_or_update+0x459/0xaf0
 [<ffffffff8128ce0d>] ? trace_hardirqs_on+0xd/0x10
 [<ffffffff81992f30>] ? key_type_lookup+0xc0/0xc0
 [<ffffffff8199e19d>] ? lookup_user_key+0x13d/0xcd0
 [<ffffffff81534763>] ? memdup_user+0x53/0x80
 [<ffffffff819983ea>] SyS_add_key+0x1ba/0x350
 [<ffffffff81998230>] ? key_get_type_from_user.constprop.6+0xa0/0xa0
 [<ffffffff828bcf4e>] ? retint_user+0x18/0x23
 [<ffffffff8128cc7e>] ? trace_hardirqs_on_caller+0x3fe/0x580
 [<ffffffff81004017>] ? trace_hardirqs_on_thunk+0x17/0x19
 [<ffffffff828bc432>] entry_SYSCALL_64_fastpath+0x12/0x76
Memory state around the buggy address:
 ffff880060a6f700: fc fc fc fc fc fc fc fc 00 00 00 00 00 00 00 00
 ffff880060a6f780: 00 00 00 00 00 00 00 00 00 00 00 fc fc fc fc fc
>ffff880060a6f800: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
                                        ^
 ffff880060a6f880: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ffff880060a6f900: fc fc fc fc fc fc 00 00 00 00 00 00 00 00 00 00
==================================================================

Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/assoc_array.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/assoc_array.c b/lib/assoc_array.c
index 03dd576e6773..59fd7c0b119c 100644
--- a/lib/assoc_array.c
+++ b/lib/assoc_array.c
@@ -524,7 +524,9 @@ static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit,
 			free_slot = i;
 			continue;
 		}
-		if (ops->compare_object(assoc_array_ptr_to_leaf(ptr), index_key)) {
+		if (assoc_array_ptr_is_leaf(ptr) &&
+		    ops->compare_object(assoc_array_ptr_to_leaf(ptr),
+					index_key)) {
 			pr_devel("replace in slot %d\n", i);
 			edit->leaf_p = &node->slots[i];
 			edit->dead_leaf = node->slots[i];

From 6a20c0a043a73e39b5cd952d7eaf7fd7831e73ac Mon Sep 17 00:00:00 2001
From: Rafal Redzimski <rafal.f.redzimski@intel.com>
Date: Fri, 8 Apr 2016 16:25:05 +0300
Subject: [PATCH 457/797] usb: xhci: applying XHCI_PME_STUCK_QUIRK to Intel BXT
 B0 host

commit 0d46faca6f887a849efb07c1655b5a9f7c288b45 upstream.

Broxton B0 also requires XHCI_PME_STUCK_QUIRK.
Adding PCI device ID for Broxton B and adding to quirk.

Signed-off-by: Rafal Redzimski <rafal.f.redzimski@intel.com>
Signed-off-by: Robert Dobrowolski <robert.dobrowolski@linux.intel.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-pci.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index c2d65206ec6c..6a120a71ca3d 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -48,6 +48,7 @@
 #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI		0xa12f
 #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI	0x9d2f
 #define PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI		0x0aa8
+#define PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI		0x1aa8
 
 static const char hcd_name[] = "xhci_hcd";
 
@@ -156,7 +157,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 		(pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI ||
 		 pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI ||
 		 pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI ||
-		 pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI)) {
+		 pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI ||
+		 pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI)) {
 		xhci->quirks |= XHCI_PME_STUCK_QUIRK;
 	}
 	if (pdev->vendor == PCI_VENDOR_ID_ETRON &&

From ba7aa9a970dc12054252042e2b30e1dedcdc5968 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Fri, 8 Apr 2016 16:25:06 +0300
Subject: [PATCH 458/797] xhci: resume USB 3 roothub first

commit 671ffdff5b13314b1fc65d62cf7604b873fb5dc4 upstream.

Give USB3 devices a better chance to enumerate at USB 3 speeds if
they are connected to a suspended host.
Solves an issue with NEC uPD720200 host hanging when partially
enumerating a USB3 device as USB2 after host controller runtime resume.

Tested-by: Mike Murdoch <main.haarp@gmail.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 776d59c32bc5..0be5beaf3f85 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1103,8 +1103,8 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
 		/* Resume root hubs only when have pending events. */
 		status = readl(&xhci->op_regs->status);
 		if (status & STS_EINT) {
-			usb_hcd_resume_root_hub(hcd);
 			usb_hcd_resume_root_hub(xhci->shared_hcd);
+			usb_hcd_resume_root_hub(hcd);
 		}
 	}
 
@@ -1119,10 +1119,10 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
 
 	/* Re-enable port polling. */
 	xhci_dbg(xhci, "%s: starting port polling.\n", __func__);
-	set_bit(HCD_FLAG_POLL_RH, &hcd->flags);
-	usb_hcd_poll_rh_status(hcd);
 	set_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags);
 	usb_hcd_poll_rh_status(xhci->shared_hcd);
+	set_bit(HCD_FLAG_POLL_RH, &hcd->flags);
+	usb_hcd_poll_rh_status(hcd);
 
 	return retval;
 }

From bb6adb50beb03da007c63e86866f6be81d671075 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Fri, 8 Apr 2016 16:25:09 +0300
Subject: [PATCH 459/797] usb: xhci: fix wild pointers in xhci_mem_cleanup

commit 71504062a7c34838c3fccd92c447f399d3cb5797 upstream.

This patch fixes some wild pointers produced by xhci_mem_cleanup.
These wild pointers will cause system crash if xhci_mem_cleanup()
is called twice.

Reported-and-tested-by: Pengcheng Li <lpc.li@hisilicon.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-mem.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index c48cbe731356..d8dbd7e5194b 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -1875,6 +1875,12 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci)
 	kfree(xhci->rh_bw);
 	kfree(xhci->ext_caps);
 
+	xhci->usb2_ports = NULL;
+	xhci->usb3_ports = NULL;
+	xhci->port_array = NULL;
+	xhci->rh_bw = NULL;
+	xhci->ext_caps = NULL;
+
 	xhci->page_size = 0;
 	xhci->page_shift = 0;
 	xhci->bus_state[0].bus_suspended = 0;

From 0eb1e16bf9feb36441440b0bd9fb0ced0fcdfdb6 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Fri, 8 Apr 2016 16:25:10 +0300
Subject: [PATCH 460/797] xhci: fix 10 second timeout on removal of PCI
 hotpluggable xhci controllers

commit 98d74f9ceaefc2b6c4a6440050163a83be0abede upstream.

PCI hotpluggable xhci controllers such as some Alpine Ridge solutions will
remove the xhci controller from the PCI bus when the last USB device is
disconnected.

Add a flag to indicate that the host is being removed to avoid queueing
configure_endpoint commands for the dropped endpoints.
For PCI hotplugged controllers this will prevent 5 second command timeouts
For static xhci controllers the configure_endpoint command is not needed
in the removal case as everything will be returned, freed, and the
controller is reset.

For now the flag is only set for PCI connected host controllers.

Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-pci.c  | 1 +
 drivers/usb/host/xhci-ring.c | 3 ++-
 drivers/usb/host/xhci.c      | 8 +++++---
 drivers/usb/host/xhci.h      | 1 +
 4 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 6a120a71ca3d..ea4fb4b0cd44 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -301,6 +301,7 @@ static void xhci_pci_remove(struct pci_dev *dev)
 	struct xhci_hcd *xhci;
 
 	xhci = hcd_to_xhci(pci_get_drvdata(dev));
+	xhci->xhc_state |= XHCI_STATE_REMOVING;
 	if (xhci->shared_hcd) {
 		usb_remove_hcd(xhci->shared_hcd);
 		usb_put_hcd(xhci->shared_hcd);
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index db0f0831b94f..2b63969c2bbf 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -4008,7 +4008,8 @@ static int queue_command(struct xhci_hcd *xhci, struct xhci_command *cmd,
 	int reserved_trbs = xhci->cmd_ring_reserved_trbs;
 	int ret;
 
-	if (xhci->xhc_state) {
+	if ((xhci->xhc_state & XHCI_STATE_DYING) ||
+		(xhci->xhc_state & XHCI_STATE_HALTED)) {
 		xhci_dbg(xhci, "xHCI dying or halted, can't queue_command\n");
 		return -ESHUTDOWN;
 	}
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 0be5beaf3f85..ec9e758d5fcd 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -146,7 +146,8 @@ static int xhci_start(struct xhci_hcd *xhci)
 				"waited %u microseconds.\n",
 				XHCI_MAX_HALT_USEC);
 	if (!ret)
-		xhci->xhc_state &= ~(XHCI_STATE_HALTED | XHCI_STATE_DYING);
+		/* clear state flags. Including dying, halted or removing */
+		xhci->xhc_state = 0;
 
 	return ret;
 }
@@ -2753,7 +2754,8 @@ int xhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev)
 	if (ret <= 0)
 		return ret;
 	xhci = hcd_to_xhci(hcd);
-	if (xhci->xhc_state & XHCI_STATE_DYING)
+	if ((xhci->xhc_state & XHCI_STATE_DYING) ||
+		(xhci->xhc_state & XHCI_STATE_REMOVING))
 		return -ENODEV;
 
 	xhci_dbg(xhci, "%s called for udev %p\n", __func__, udev);
@@ -3800,7 +3802,7 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev,
 
 	mutex_lock(&xhci->mutex);
 
-	if (xhci->xhc_state)	/* dying or halted */
+	if (xhci->xhc_state)	/* dying, removing or halted */
 		goto out;
 
 	if (!udev->slot_id) {
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 0b9451250e33..99ac2289dbf3 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1596,6 +1596,7 @@ struct xhci_hcd {
  */
 #define XHCI_STATE_DYING	(1 << 0)
 #define XHCI_STATE_HALTED	(1 << 1)
+#define XHCI_STATE_REMOVING	(1 << 2)
 	/* Statistics */
 	int			error_bitmask;
 	unsigned int		quirks;

From 95b9219e05dafdb76b0707e815e5314cc0cf91af Mon Sep 17 00:00:00 2001
From: Robert Dobrowolski <robert.dobrowolski@linux.intel.com>
Date: Thu, 24 Mar 2016 03:30:07 -0700
Subject: [PATCH 461/797] usb: hcd: out of bounds access in for_each_companion

commit e86103a75705c7c530768f4ffaba74cf382910f2 upstream.

On BXT platform Host Controller and Device Controller figure as
same PCI device but with different device function. HCD should
not pass data to Device Controller but only to Host Controllers.
Checking if companion device is Host Controller, otherwise skip.

Signed-off-by: Robert Dobrowolski <robert.dobrowolski@linux.intel.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hcd-pci.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c
index 9eb1cff28bd4..b8b580e5ae6e 100644
--- a/drivers/usb/core/hcd-pci.c
+++ b/drivers/usb/core/hcd-pci.c
@@ -74,6 +74,15 @@ static void for_each_companion(struct pci_dev *pdev, struct usb_hcd *hcd,
 		if (companion->bus != pdev->bus ||
 				PCI_SLOT(companion->devfn) != slot)
 			continue;
+
+		/*
+		 * Companion device should be either UHCI,OHCI or EHCI host
+		 * controller, otherwise skip.
+		 */
+		if (companion->class != CL_UHCI && companion->class != CL_OHCI &&
+				companion->class != CL_EHCI)
+			continue;
+
 		companion_hcd = pci_get_drvdata(companion);
 		if (!companion_hcd || !companion_hcd->self.root_hub)
 			continue;

From e4c7ab76586146820b394e0176f286f5a2e70cb3 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Thu, 14 Apr 2016 17:01:17 +0200
Subject: [PATCH 462/797] usb: gadget: f_fs: Fix use-after-free

commit 38740a5b87d53ceb89eb2c970150f6e94e00373a upstream.

When using asynchronous read or write operations on the USB endpoints the
issuer of the IO request is notified by calling the ki_complete() callback
of the submitted kiocb when the URB has been completed.

Calling this ki_complete() callback will free kiocb. Make sure that the
structure is no longer accessed beyond that point, otherwise undefined
behaviour might occur.

Fixes: 2e4c7553cd6f ("usb: gadget: f_fs: add aio support")
Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/f_fs.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index cf43e9e18368..79d895c2dd71 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -646,6 +646,7 @@ static void ffs_user_copy_worker(struct work_struct *work)
 						   work);
 	int ret = io_data->req->status ? io_data->req->status :
 					 io_data->req->actual;
+	bool kiocb_has_eventfd = io_data->kiocb->ki_flags & IOCB_EVENTFD;
 
 	if (io_data->read && ret > 0) {
 		use_mm(io_data->mm);
@@ -657,13 +658,11 @@ static void ffs_user_copy_worker(struct work_struct *work)
 
 	io_data->kiocb->ki_complete(io_data->kiocb, ret, ret);
 
-	if (io_data->ffs->ffs_eventfd &&
-	    !(io_data->kiocb->ki_flags & IOCB_EVENTFD))
+	if (io_data->ffs->ffs_eventfd && !kiocb_has_eventfd)
 		eventfd_signal(io_data->ffs->ffs_eventfd, 1);
 
 	usb_ep_free_request(io_data->ep, io_data->req);
 
-	io_data->kiocb->private = NULL;
 	if (io_data->read)
 		kfree(io_data->to_free);
 	kfree(io_data->buf);

From 9d58f322ee18ffaca1e0b67d90ab811ad75e62a6 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Tue, 12 Apr 2016 12:14:46 -0400
Subject: [PATCH 463/797] dm cache metadata: fix READ_LOCK macros and cleanup
 WRITE_LOCK macros

commit 9567366fefddeaea4ed1d713270535d93a3b3c76 upstream.

The READ_LOCK macro was incorrectly returning -EINVAL if
dm_bm_is_read_only() was true -- it will always be true once the cache
metadata transitions to read-only by dm_cache_metadata_set_read_only().

Wrap READ_LOCK and WRITE_LOCK multi-statement macros in do {} while(0).
Also, all accesses of the 'cmd' argument passed to these related macros
are now encapsulated in parenthesis.

A follow-up patch can be developed to eliminate the use of macros in
favor of pure C code.  Avoiding that now given that this needs to apply
to stable@.

Reported-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Fixes: d14fcf3dd79 ("dm cache: make sure every metadata function checks fail_io")
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/dm-cache-metadata.c | 64 +++++++++++++++++++++-------------
 1 file changed, 40 insertions(+), 24 deletions(-)

diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index 27f2ef300f8b..65ce6985f87a 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -867,39 +867,55 @@ static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd,
 	return 0;
 }
 
-#define WRITE_LOCK(cmd)	\
-	down_write(&cmd->root_lock); \
-	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { \
-		up_write(&cmd->root_lock); \
-		return -EINVAL; \
+static bool cmd_write_lock(struct dm_cache_metadata *cmd)
+{
+	down_write(&cmd->root_lock);
+	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) {
+		up_write(&cmd->root_lock);
+		return false;
 	}
+	return true;
+}
 
-#define WRITE_LOCK_VOID(cmd) \
-	down_write(&cmd->root_lock); \
-	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { \
-		up_write(&cmd->root_lock); \
-		return; \
-	}
+#define WRITE_LOCK(cmd)				\
+	do {					\
+		if (!cmd_write_lock((cmd)))	\
+			return -EINVAL;		\
+	} while(0)
+
+#define WRITE_LOCK_VOID(cmd)			\
+	do {					\
+		if (!cmd_write_lock((cmd)))	\
+			return;			\
+	} while(0)
 
 #define WRITE_UNLOCK(cmd) \
-	up_write(&cmd->root_lock)
+	up_write(&(cmd)->root_lock)
 
-#define READ_LOCK(cmd) \
-	down_read(&cmd->root_lock); \
-	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { \
-		up_read(&cmd->root_lock); \
-		return -EINVAL; \
+static bool cmd_read_lock(struct dm_cache_metadata *cmd)
+{
+	down_write(&cmd->root_lock);
+	if (cmd->fail_io) {
+		up_write(&cmd->root_lock);
+		return false;
 	}
+	return true;
+}
 
-#define READ_LOCK_VOID(cmd)	\
-	down_read(&cmd->root_lock); \
-	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { \
-		up_read(&cmd->root_lock); \
-		return; \
-	}
+#define READ_LOCK(cmd)				\
+	do {					\
+		if (!cmd_read_lock((cmd)))	\
+			return -EINVAL;		\
+	} while(0)
+
+#define READ_LOCK_VOID(cmd)			\
+	do {					\
+		if (!cmd_read_lock((cmd)))	\
+			return;			\
+	} while(0)
 
 #define READ_UNLOCK(cmd) \
-	up_read(&cmd->root_lock)
+	up_read(&(cmd)->root_lock)
 
 int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size)
 {

From be5cbaf31cd318f8aaeeff901f6d27232dfa965f Mon Sep 17 00:00:00 2001
From: Ahmed Samy <f.fallen45@gmail.com>
Date: Sun, 17 Apr 2016 05:37:09 +0000
Subject: [PATCH 464/797] dm cache metadata: fix cmd_read_lock() acquiring
 write lock

commit 6545b60baaf880b0cd29a5e89dbe745a06027e89 upstream.

Commit 9567366fefdd ("dm cache metadata: fix READ_LOCK macros and
cleanup WRITE_LOCK macros") uses down_write() instead of down_read() in
cmd_read_lock(), yet up_read() is used to release the lock in
READ_UNLOCK().  Fix it.

Fixes: 9567366fefdd ("dm cache metadata: fix READ_LOCK macros and cleanup WRITE_LOCK macros")
Signed-off-by: Ahmed Samy <f.fallen45@gmail.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/dm-cache-metadata.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index 65ce6985f87a..3970cda10080 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -894,9 +894,9 @@ static bool cmd_write_lock(struct dm_cache_metadata *cmd)
 
 static bool cmd_read_lock(struct dm_cache_metadata *cmd)
 {
-	down_write(&cmd->root_lock);
+	down_read(&cmd->root_lock);
 	if (cmd->fail_io) {
-		up_write(&cmd->root_lock);
+		up_read(&cmd->root_lock);
 		return false;
 	}
 	return true;

From ed643d220692bfc2bfec9fe618d102f13a5dae9f Mon Sep 17 00:00:00 2001
From: Rui Salvaterra <rsalvaterra@gmail.com>
Date: Sat, 9 Apr 2016 22:05:34 +0100
Subject: [PATCH 465/797] lib: lz4: fixed zram with lz4 on big endian machines

commit 3e26a691fe3fe1e02a76e5bab0c143ace4b137b4 upstream.

Based on Sergey's test patch [1], this fixes zram with lz4 compression
on big endian cpus.

Note that the 64-bit preprocessor test is not a cleanup, it's part of
the fix, since those identifiers are bogus (for example, __ppc64__
isn't defined anywhere else in the kernel, which means we'd fall into
the 32-bit definitions on ppc64).

Tested on ppc64 with no regression on x86_64.

[1] http://marc.info/?l=linux-kernel&m=145994470805853&w=4

Suggested-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Rui Salvaterra <rsalvaterra@gmail.com>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/lz4/lz4defs.h | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h
index abcecdc2d0f2..0710a62ad2f6 100644
--- a/lib/lz4/lz4defs.h
+++ b/lib/lz4/lz4defs.h
@@ -11,8 +11,7 @@
 /*
  * Detects 64 bits mode
  */
-#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) \
-	|| defined(__ppc64__) || defined(__LP64__))
+#if defined(CONFIG_64BIT)
 #define LZ4_ARCH64 1
 #else
 #define LZ4_ARCH64 0
@@ -35,6 +34,10 @@ typedef struct _U64_S { u64 v; } U64_S;
 
 #define PUT4(s, d) (A32(d) = A32(s))
 #define PUT8(s, d) (A64(d) = A64(s))
+
+#define LZ4_READ_LITTLEENDIAN_16(d, s, p)	\
+	(d = s - A16(p))
+
 #define LZ4_WRITE_LITTLEENDIAN_16(p, v)	\
 	do {	\
 		A16(p) = v; \
@@ -51,10 +54,13 @@ typedef struct _U64_S { u64 v; } U64_S;
 #define PUT8(s, d) \
 	put_unaligned(get_unaligned((const u64 *) s), (u64 *) d)
 
-#define LZ4_WRITE_LITTLEENDIAN_16(p, v)	\
-	do {	\
-		put_unaligned(v, (u16 *)(p)); \
-		p += 2; \
+#define LZ4_READ_LITTLEENDIAN_16(d, s, p)	\
+	(d = s - get_unaligned_le16(p))
+
+#define LZ4_WRITE_LITTLEENDIAN_16(p, v)			\
+	do {						\
+		put_unaligned_le16(v, (u16 *)(p));	\
+		p += 2;					\
 	} while (0)
 #endif
 
@@ -140,9 +146,6 @@ typedef struct _U64_S { u64 v; } U64_S;
 
 #endif
 
-#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
-	(d = s - get_unaligned_le16(p))
-
 #define LZ4_WILDCOPY(s, d, e)		\
 	do {				\
 		LZ4_COPYPACKET(s, d);	\

From 9b3bd581a0492bdfe788539ca65a14da570faad1 Mon Sep 17 00:00:00 2001
From: Seth Forshee <seth.forshee@canonical.com>
Date: Wed, 9 Mar 2016 09:18:07 -0600
Subject: [PATCH 466/797] debugfs: Make automount point inodes permanently
 empty

commit 87243deb88671f70def4c52dfa7ca7830707bd31 upstream.

Starting with 4.1 the tracing subsystem has its own filesystem
which is automounted in the tracing subdirectory of debugfs.
Prior to this debugfs could be bind mounted in a cloned mount
namespace, but if tracefs has been mounted under debugfs this
now fails because there is a locked child mount. This creates
a regression for container software which bind mounts debugfs
to satisfy the assumption of some userspace software.

In other pseudo filesystems such as proc and sysfs we're already
creating mountpoints like this in such a way that no dirents can
be created in the directories, allowing them to be exceptions to
some MNT_LOCKED tests. In fact we're already do this for the
tracefs mountpoint in sysfs.

Do the same in debugfs_create_automount(), since the intention
here is clearly to create a mountpoint. This fixes the regression,
as locked child mounts on permanently empty directories do not
cause a bind mount to fail.

Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/debugfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index b7fcc0de0b2f..0f5d05bf2131 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -457,7 +457,7 @@ struct dentry *debugfs_create_automount(const char *name,
 	if (unlikely(!inode))
 		return failed_creating(dentry);
 
-	inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
+	make_empty_dir_inode(inode);
 	inode->i_flags |= S_AUTOMOUNT;
 	inode->i_private = data;
 	dentry->d_fsdata = (void *)f;

From 42e6f01a44fe4aab28819b5efa48fbe9da3059e5 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 8 Apr 2016 16:22:17 +0300
Subject: [PATCH 467/797] dmaengine: dw: fix master selection

commit 3fe6409c23e2bee4b2b1b6d671d2da8daa15271c upstream.

The commit 895005202987 ("dmaengine: dw: apply both HS interfaces and remove
slave_id usage") cleaned up the code to avoid usage of depricated slave_id
member of generic slave configuration.

Meanwhile it broke the master selection by removing important call to
dwc_set_masters() in ->device_alloc_chan_resources() which copied masters from
custom slave configuration to the internal channel structure.

Everything works until now since there is no customized connection of
DesignWare DMA IP to the bus, i.e. one bus and one or more masters are in use.
The configurations where 2 masters are connected to the different masters are
not working anymore. We are expecting one user of such configuration and need
to select masters properly. Besides that it is obviously a performance
regression since only one master is in use in multi-master configuration.

Select masters in accordance with what user asked for. Keep this patch in a form
more suitable for back porting.

We are safe to take necessary data in ->device_alloc_chan_resources() because
we don't support generic slave configuration embedded into custom one, and thus
the only way to provide such is to use the parameter to a filter function which
is called exactly before channel resource allocation.

While here, replase BUG_ON to less noisy dev_warn() and prevent channel
allocation in case of error.

Fixes: 895005202987 ("dmaengine: dw: apply both HS interfaces and remove slave_id usage")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/dma/dw/core.c | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c
index 4f099ea29f83..c66133b5e852 100644
--- a/drivers/dma/dw/core.c
+++ b/drivers/dma/dw/core.c
@@ -130,26 +130,14 @@ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc)
 static void dwc_initialize(struct dw_dma_chan *dwc)
 {
 	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
-	struct dw_dma_slave *dws = dwc->chan.private;
 	u32 cfghi = DWC_CFGH_FIFO_MODE;
 	u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority);
 
 	if (dwc->initialized == true)
 		return;
 
-	if (dws) {
-		/*
-		 * We need controller-specific data to set up slave
-		 * transfers.
-		 */
-		BUG_ON(!dws->dma_dev || dws->dma_dev != dw->dma.dev);
-
-		cfghi |= DWC_CFGH_DST_PER(dws->dst_id);
-		cfghi |= DWC_CFGH_SRC_PER(dws->src_id);
-	} else {
-		cfghi |= DWC_CFGH_DST_PER(dwc->dst_id);
-		cfghi |= DWC_CFGH_SRC_PER(dwc->src_id);
-	}
+	cfghi |= DWC_CFGH_DST_PER(dwc->dst_id);
+	cfghi |= DWC_CFGH_SRC_PER(dwc->src_id);
 
 	channel_writel(dwc, CFG_LO, cfglo);
 	channel_writel(dwc, CFG_HI, cfghi);
@@ -936,7 +924,7 @@ bool dw_dma_filter(struct dma_chan *chan, void *param)
 	struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
 	struct dw_dma_slave *dws = param;
 
-	if (!dws || dws->dma_dev != chan->device->dev)
+	if (dws->dma_dev != chan->device->dev)
 		return false;
 
 	/* We have to copy data since dws can be temporary storage */
@@ -1160,6 +1148,14 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan)
 	 * doesn't mean what you think it means), and status writeback.
 	 */
 
+	/*
+	 * We need controller-specific data to set up slave transfers.
+	 */
+	if (chan->private && !dw_dma_filter(chan, chan->private)) {
+		dev_warn(chan2dev(chan), "Wrong controller-specific data\n");
+		return -EINVAL;
+	}
+
 	/* Enable controller here if needed */
 	if (!dw->in_use)
 		dw_dma_on(dw);
@@ -1221,6 +1217,14 @@ static void dwc_free_chan_resources(struct dma_chan *chan)
 	spin_lock_irqsave(&dwc->lock, flags);
 	list_splice_init(&dwc->free_list, &list);
 	dwc->descs_allocated = 0;
+
+	/* Clear custom channel configuration */
+	dwc->src_id = 0;
+	dwc->dst_id = 0;
+
+	dwc->src_master = 0;
+	dwc->dst_master = 0;
+
 	dwc->initialized = false;
 
 	/* Disable interrupts */

From 34c1b030296c6815c05e416c7a647b68e695004a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 18 Mar 2016 14:26:32 +0200
Subject: [PATCH 468/797] dmaengine: hsu: correct use of channel status
 register

commit 4f4bc0abff79dc9d7ccbd3143adbf8ad1f4fe6ab upstream.

There is a typo in documentation regarding to descriptor empty bit (DESCE)
which is set to 1 when descriptor is empty. Thus, status register at the end of
a transfer usually returns all DESCE bits set and thus it will never be zero.

Moreover, there are 2 bits (CDESC) that encode current descriptor, on which
interrupt has been asserted. In case when we have few descriptors programmed we
might have non-zero value.

Remove DESCE and CDESC bits from DMA channel status register (HSU_CH_SR) when
reading it.

Fixes: 2b49e0c56741 ("dmaengine: append hsu DMA driver")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/dma/hsu/hsu.c | 2 +-
 drivers/dma/hsu/hsu.h | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/dma/hsu/hsu.c b/drivers/dma/hsu/hsu.c
index 823ad728aecf..efc02b98e6ba 100644
--- a/drivers/dma/hsu/hsu.c
+++ b/drivers/dma/hsu/hsu.c
@@ -135,7 +135,7 @@ static u32 hsu_dma_chan_get_sr(struct hsu_dma_chan *hsuc)
 	sr = hsu_chan_readl(hsuc, HSU_CH_SR);
 	spin_unlock_irqrestore(&hsuc->vchan.lock, flags);
 
-	return sr;
+	return sr & ~(HSU_CH_SR_DESCE_ANY | HSU_CH_SR_CDESC_ANY);
 }
 
 irqreturn_t hsu_dma_irq(struct hsu_dma_chip *chip, unsigned short nr)
diff --git a/drivers/dma/hsu/hsu.h b/drivers/dma/hsu/hsu.h
index f06579c6d548..26da2865b025 100644
--- a/drivers/dma/hsu/hsu.h
+++ b/drivers/dma/hsu/hsu.h
@@ -41,6 +41,9 @@
 #define HSU_CH_SR_DESCTO(x)	BIT(8 + (x))
 #define HSU_CH_SR_DESCTO_ANY	(BIT(11) | BIT(10) | BIT(9) | BIT(8))
 #define HSU_CH_SR_CHE		BIT(15)
+#define HSU_CH_SR_DESCE(x)	BIT(16 + (x))
+#define HSU_CH_SR_DESCE_ANY	(BIT(19) | BIT(18) | BIT(17) | BIT(16))
+#define HSU_CH_SR_CDESC_ANY	(BIT(31) | BIT(30))
 
 /* Bits in HSU_CH_CR */
 #define HSU_CH_CR_CHA		BIT(0)

From 94d75e190f199dfce1094496927418cb00810683 Mon Sep 17 00:00:00 2001
From: Robert Jarzmik <robert.jarzmik@free.fr>
Date: Mon, 15 Feb 2016 21:57:48 +0100
Subject: [PATCH 469/797] dmaengine: pxa_dma: fix the maximum requestor line

commit 6bab1c6afdca0371cfa957079b36b78d12dd2cf5 upstream.

The current number of requestor lines is limited to 31. This was an
error of a previous commit, as this number is platform dependent, and is
actually :
 - for pxa25x: 40 requestor lines
 - for pxa27x: 75 requestor lines
 - for pxa3xx: 100 requestor lines

The previous testing did not reveal the faulty constant as on pxa[23]xx
platforms, only camera, MSL and USB are above requestor 32, and in these
only the camera has a driver using dma.

Fixes: e87ffbdf0697 ("dmaengine: pxa_dma: fix the no-requestor case")
Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
Acked-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/dma/pxa_dma.c | 39 ++++++++++++++++++++++++++++-----------
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c
index a59061e4221a..55f5d33f6dc7 100644
--- a/drivers/dma/pxa_dma.c
+++ b/drivers/dma/pxa_dma.c
@@ -122,6 +122,7 @@ struct pxad_chan {
 struct pxad_device {
 	struct dma_device		slave;
 	int				nr_chans;
+	int				nr_requestors;
 	void __iomem			*base;
 	struct pxad_phy			*phys;
 	spinlock_t			phy_lock;	/* Phy association */
@@ -473,7 +474,7 @@ static void pxad_free_phy(struct pxad_chan *chan)
 		return;
 
 	/* clear the channel mapping in DRCMR */
-	if (chan->drcmr <= DRCMR_CHLNUM) {
+	if (chan->drcmr <= pdev->nr_requestors) {
 		reg = pxad_drcmr(chan->drcmr);
 		writel_relaxed(0, chan->phy->base + reg);
 	}
@@ -509,6 +510,7 @@ static bool is_running_chan_misaligned(struct pxad_chan *chan)
 
 static void phy_enable(struct pxad_phy *phy, bool misaligned)
 {
+	struct pxad_device *pdev;
 	u32 reg, dalgn;
 
 	if (!phy->vchan)
@@ -518,7 +520,8 @@ static void phy_enable(struct pxad_phy *phy, bool misaligned)
 		"%s(); phy=%p(%d) misaligned=%d\n", __func__,
 		phy, phy->idx, misaligned);
 
-	if (phy->vchan->drcmr <= DRCMR_CHLNUM) {
+	pdev = to_pxad_dev(phy->vchan->vc.chan.device);
+	if (phy->vchan->drcmr <= pdev->nr_requestors) {
 		reg = pxad_drcmr(phy->vchan->drcmr);
 		writel_relaxed(DRCMR_MAPVLD | phy->idx, phy->base + reg);
 	}
@@ -914,6 +917,7 @@ static void pxad_get_config(struct pxad_chan *chan,
 {
 	u32 maxburst = 0, dev_addr = 0;
 	enum dma_slave_buswidth width = DMA_SLAVE_BUSWIDTH_UNDEFINED;
+	struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device);
 
 	*dcmd = 0;
 	if (dir == DMA_DEV_TO_MEM) {
@@ -922,7 +926,7 @@ static void pxad_get_config(struct pxad_chan *chan,
 		dev_addr = chan->cfg.src_addr;
 		*dev_src = dev_addr;
 		*dcmd |= PXA_DCMD_INCTRGADDR;
-		if (chan->drcmr <= DRCMR_CHLNUM)
+		if (chan->drcmr <= pdev->nr_requestors)
 			*dcmd |= PXA_DCMD_FLOWSRC;
 	}
 	if (dir == DMA_MEM_TO_DEV) {
@@ -931,7 +935,7 @@ static void pxad_get_config(struct pxad_chan *chan,
 		dev_addr = chan->cfg.dst_addr;
 		*dev_dst = dev_addr;
 		*dcmd |= PXA_DCMD_INCSRCADDR;
-		if (chan->drcmr <= DRCMR_CHLNUM)
+		if (chan->drcmr <= pdev->nr_requestors)
 			*dcmd |= PXA_DCMD_FLOWTRG;
 	}
 	if (dir == DMA_MEM_TO_MEM)
@@ -1341,13 +1345,15 @@ static struct dma_chan *pxad_dma_xlate(struct of_phandle_args *dma_spec,
 
 static int pxad_init_dmadev(struct platform_device *op,
 			    struct pxad_device *pdev,
-			    unsigned int nr_phy_chans)
+			    unsigned int nr_phy_chans,
+			    unsigned int nr_requestors)
 {
 	int ret;
 	unsigned int i;
 	struct pxad_chan *c;
 
 	pdev->nr_chans = nr_phy_chans;
+	pdev->nr_requestors = nr_requestors;
 	INIT_LIST_HEAD(&pdev->slave.channels);
 	pdev->slave.device_alloc_chan_resources = pxad_alloc_chan_resources;
 	pdev->slave.device_free_chan_resources = pxad_free_chan_resources;
@@ -1382,7 +1388,7 @@ static int pxad_probe(struct platform_device *op)
 	const struct of_device_id *of_id;
 	struct mmp_dma_platdata *pdata = dev_get_platdata(&op->dev);
 	struct resource *iores;
-	int ret, dma_channels = 0;
+	int ret, dma_channels = 0, nb_requestors = 0;
 	const enum dma_slave_buswidth widths =
 		DMA_SLAVE_BUSWIDTH_1_BYTE   | DMA_SLAVE_BUSWIDTH_2_BYTES |
 		DMA_SLAVE_BUSWIDTH_4_BYTES;
@@ -1399,13 +1405,23 @@ static int pxad_probe(struct platform_device *op)
 		return PTR_ERR(pdev->base);
 
 	of_id = of_match_device(pxad_dt_ids, &op->dev);
-	if (of_id)
+	if (of_id) {
 		of_property_read_u32(op->dev.of_node, "#dma-channels",
 				     &dma_channels);
-	else if (pdata && pdata->dma_channels)
+		ret = of_property_read_u32(op->dev.of_node, "#dma-requests",
+					   &nb_requestors);
+		if (ret) {
+			dev_warn(pdev->slave.dev,
+				 "#dma-requests set to default 32 as missing in OF: %d",
+				 ret);
+			nb_requestors = 32;
+		};
+	} else if (pdata && pdata->dma_channels) {
 		dma_channels = pdata->dma_channels;
-	else
+		nb_requestors = pdata->nb_requestors;
+	} else {
 		dma_channels = 32;	/* default 32 channel */
+	}
 
 	dma_cap_set(DMA_SLAVE, pdev->slave.cap_mask);
 	dma_cap_set(DMA_MEMCPY, pdev->slave.cap_mask);
@@ -1422,7 +1438,7 @@ static int pxad_probe(struct platform_device *op)
 	pdev->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
 
 	pdev->slave.dev = &op->dev;
-	ret = pxad_init_dmadev(op, pdev, dma_channels);
+	ret = pxad_init_dmadev(op, pdev, dma_channels, nb_requestors);
 	if (ret) {
 		dev_err(pdev->slave.dev, "unable to register\n");
 		return ret;
@@ -1441,7 +1457,8 @@ static int pxad_probe(struct platform_device *op)
 
 	platform_set_drvdata(op, pdev);
 	pxad_init_debugfs(pdev);
-	dev_info(pdev->slave.dev, "initialized %d channels\n", dma_channels);
+	dev_info(pdev->slave.dev, "initialized %d channels on %d requestors\n",
+		 dma_channels, nb_requestors);
 	return 0;
 }
 

From c0944355a74bc9c2b5b3cc5b627efe0c73e30bd9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 16 Mar 2016 16:22:45 +0100
Subject: [PATCH 470/797] sched/cgroup: Fix/cleanup cgroup teardown/init

commit 2f5177f0fd7e531b26d54633be62d1d4cb94621c upstream.

The CPU controller hasn't kept up with the various changes in the whole
cgroup initialization / destruction sequence, and commit:

  2e91fa7f6d45 ("cgroup: keep zombies associated with their original cgroups")

caused it to explode.

The reason for this is that zombies do not inhibit css_offline() from
being called, but do stall css_released(). Now we tear down the cfs_rq
structures on css_offline() but zombies can run after that, leading to
use-after-free issues.

The solution is to move the tear-down to css_released(), which
guarantees nobody (including no zombies) is still using our cgroup.

Furthermore, a few simple cleanups are possible too. There doesn't
appear to be any point to us using css_online() (anymore?) so fold that
in css_alloc().

And since cgroup code guarantees an RCU grace period between
css_released() and css_free() we can forgo using call_rcu() and free the
stuff immediately.

Suggested-by: Tejun Heo <tj@kernel.org>
Reported-by: Kazuki Yamaguchi <k@rhe.jp>
Reported-by: Niklas Cassel <niklas.cassel@axis.com>
Tested-by: Niklas Cassel <niklas.cassel@axis.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 2e91fa7f6d45 ("cgroup: keep zombies associated with their original cgroups")
Link: http://lkml.kernel.org/r/20160316152245.GY6344@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/sched/core.c | 35 ++++++++++++++---------------------
 1 file changed, 14 insertions(+), 21 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 70e5e09341f1..55bebf924946 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7693,7 +7693,7 @@ void set_curr_task(int cpu, struct task_struct *p)
 /* task_group_lock serializes the addition/removal of task groups */
 static DEFINE_SPINLOCK(task_group_lock);
 
-static void free_sched_group(struct task_group *tg)
+static void sched_free_group(struct task_group *tg)
 {
 	free_fair_sched_group(tg);
 	free_rt_sched_group(tg);
@@ -7719,7 +7719,7 @@ struct task_group *sched_create_group(struct task_group *parent)
 	return tg;
 
 err:
-	free_sched_group(tg);
+	sched_free_group(tg);
 	return ERR_PTR(-ENOMEM);
 }
 
@@ -7739,17 +7739,16 @@ void sched_online_group(struct task_group *tg, struct task_group *parent)
 }
 
 /* rcu callback to free various structures associated with a task group */
-static void free_sched_group_rcu(struct rcu_head *rhp)
+static void sched_free_group_rcu(struct rcu_head *rhp)
 {
 	/* now it should be safe to free those cfs_rqs */
-	free_sched_group(container_of(rhp, struct task_group, rcu));
+	sched_free_group(container_of(rhp, struct task_group, rcu));
 }
 
-/* Destroy runqueue etc associated with a task group */
 void sched_destroy_group(struct task_group *tg)
 {
 	/* wait for possible concurrent references to cfs_rqs complete */
-	call_rcu(&tg->rcu, free_sched_group_rcu);
+	call_rcu(&tg->rcu, sched_free_group_rcu);
 }
 
 void sched_offline_group(struct task_group *tg)
@@ -8210,31 +8209,26 @@ cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 	if (IS_ERR(tg))
 		return ERR_PTR(-ENOMEM);
 
+	sched_online_group(tg, parent);
+
 	return &tg->css;
 }
 
-static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
+static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
 {
 	struct task_group *tg = css_tg(css);
-	struct task_group *parent = css_tg(css->parent);
 
-	if (parent)
-		sched_online_group(tg, parent);
-	return 0;
+	sched_offline_group(tg);
 }
 
 static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
 {
 	struct task_group *tg = css_tg(css);
 
-	sched_destroy_group(tg);
-}
-
-static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css)
-{
-	struct task_group *tg = css_tg(css);
-
-	sched_offline_group(tg);
+	/*
+	 * Relies on the RCU grace period between css_released() and this.
+	 */
+	sched_free_group(tg);
 }
 
 static void cpu_cgroup_fork(struct task_struct *task, void *private)
@@ -8594,9 +8588,8 @@ static struct cftype cpu_files[] = {
 
 struct cgroup_subsys cpu_cgrp_subsys = {
 	.css_alloc	= cpu_cgroup_css_alloc,
+	.css_released	= cpu_cgroup_css_released,
 	.css_free	= cpu_cgroup_css_free,
-	.css_online	= cpu_cgroup_css_online,
-	.css_offline	= cpu_cgroup_css_offline,
 	.fork		= cpu_cgroup_fork,
 	.can_attach	= cpu_cgroup_can_attach,
 	.attach		= cpu_cgroup_attach,

From bdb0618ad1b9ea6ec6926450c687d133ccddf28c Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 7 Jan 2016 16:07:20 +0000
Subject: [PATCH 471/797] arm64: Honour !PTE_WRITE in set_pte_at() for kernel
 mappings

commit ac15bd63bbb24238f763ec5b24ee175ec301e8cd upstream.

Currently, set_pte_at() only checks the software PTE_WRITE bit for user
mappings when it sets or clears the hardware PTE_RDONLY accordingly. The
kernel ptes are written directly without any modification, relying
solely on the protection bits in macros like PAGE_KERNEL. However,
modifying kernel pte attributes via pte_wrprotect() would be ignored by
set_pte_at(). Since pte_wrprotect() does not set PTE_RDONLY (it only
clears PTE_WRITE), the new permission is not taken into account.

This patch changes set_pte_at() to adjust the read-only permission for
kernel ptes as well. As a side effect, existing PROT_* definitions used
for kernel ioremap*() need to include PTE_DIRTY | PTE_WRITE.

(additionally, white space fix for PTE_KERNEL_ROX)

Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/include/asm/pgtable.h | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index eaa9cabf4066..298474933ef3 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -69,11 +69,11 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
 #define PROT_DEFAULT		(PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
 #define PROT_SECT_DEFAULT	(PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
 
-#define PROT_DEVICE_nGnRnE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
-#define PROT_DEVICE_nGnRE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE))
-#define PROT_NORMAL_NC		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC))
-#define PROT_NORMAL_WT		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_WT))
-#define PROT_NORMAL		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL))
+#define PROT_DEVICE_nGnRnE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
+#define PROT_DEVICE_nGnRE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_NORMAL_NC		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
+#define PROT_NORMAL_WT		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT))
+#define PROT_NORMAL		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
 
 #define PROT_SECT_DEVICE_nGnRE	(PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
 #define PROT_SECT_NORMAL	(PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
@@ -83,7 +83,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
 
 #define PAGE_KERNEL		__pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
 #define PAGE_KERNEL_RO		__pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
-#define PAGE_KERNEL_ROX	__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
+#define PAGE_KERNEL_ROX		__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
 #define PAGE_KERNEL_EXEC	__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
 #define PAGE_KERNEL_EXEC_CONT	__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
 
@@ -155,6 +155,7 @@ extern struct page *empty_zero_page;
 #define pte_write(pte)		(!!(pte_val(pte) & PTE_WRITE))
 #define pte_exec(pte)		(!(pte_val(pte) & PTE_UXN))
 #define pte_cont(pte)		(!!(pte_val(pte) & PTE_CONT))
+#define pte_user(pte)		(!!(pte_val(pte) & PTE_USER))
 
 #ifdef CONFIG_ARM64_HW_AFDBM
 #define pte_hw_dirty(pte)	(pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
@@ -165,8 +166,6 @@ extern struct page *empty_zero_page;
 #define pte_dirty(pte)		(pte_sw_dirty(pte) || pte_hw_dirty(pte))
 
 #define pte_valid(pte)		(!!(pte_val(pte) & PTE_VALID))
-#define pte_valid_user(pte) \
-	((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
 #define pte_valid_not_user(pte) \
 	((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
 
@@ -264,13 +263,13 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep, pte_t pte)
 {
-	if (pte_valid_user(pte)) {
-		if (!pte_special(pte) && pte_exec(pte))
-			__sync_icache_dcache(pte, addr);
+	if (pte_valid(pte)) {
 		if (pte_sw_dirty(pte) && pte_write(pte))
 			pte_val(pte) &= ~PTE_RDONLY;
 		else
 			pte_val(pte) |= PTE_RDONLY;
+		if (pte_user(pte) && pte_exec(pte) && !pte_special(pte))
+			__sync_icache_dcache(pte, addr);
 	}
 
 	/*

From 70d65587f0a82f50952cb29af133a5b6b8538611 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 9 Mar 2016 16:31:29 +0000
Subject: [PATCH 472/797] arm64: Update PTE_RDONLY in set_pte_at() for
 PROT_NONE permission

commit fdc69e7df3cb24f18a93192641786e5b7ecd1dfe upstream.

The set_pte_at() function must update the hardware PTE_RDONLY bit
depending on the state of the PTE_WRITE and PTE_DIRTY bits of the given
entry value. However, it currently only performs this for pte_valid()
entries, ignoring PTE_PROT_NONE. The side-effect is that PROT_NONE
mappings would not have the PTE_RDONLY bit set. Without
CONFIG_ARM64_HW_AFDBM, this is not an issue since such PROT_NONE pages
are not accessible anyway.

With commit 2f4b829c625e ("arm64: Add support for hardware updates of
the access and dirty pte bits"), the ptep_set_wrprotect() function was
re-written to cope with automatic hardware updates of the dirty state.
As an optimisation, only PTE_RDONLY is checked to assess the "dirty"
status. Since set_pte_at() does not set this bit for PROT_NONE mappings,
such pages may be considered "dirty" as a result of
ptep_set_wrprotect().

This patch updates the pte_valid() check to pte_present() in
set_pte_at(). It also adds PTE_PROT_NONE to the swap entry bits comment.

Fixes: 2f4b829c625e ("arm64: Add support for hardware updates of the access and dirty pte bits")
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Ganapatrao Kulkarni <gkulkarni@caviumnetworks.com>
Tested-by: Ganapatrao Kulkarni <gkulkarni@cavium.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/include/asm/pgtable.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 298474933ef3..c63868ae9a4a 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -263,7 +263,7 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep, pte_t pte)
 {
-	if (pte_valid(pte)) {
+	if (pte_present(pte)) {
 		if (pte_sw_dirty(pte) && pte_write(pte))
 			pte_val(pte) &= ~PTE_RDONLY;
 		else
@@ -640,6 +640,7 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
  *	bits 0-1:	present (must be zero)
  *	bits 2-7:	swap type
  *	bits 8-57:	swap offset
+ *	bit  58:	PTE_PROT_NONE (must be zero)
  */
 #define __SWP_TYPE_SHIFT	2
 #define __SWP_TYPE_BITS		6

From 27b3cc048a5275c53e26c15ffcab3fcf9a03cda0 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Thu, 21 Apr 2016 00:27:04 -0600
Subject: [PATCH 473/797] x86/mm/xen: Suppress hugetlbfs in PV guests

commit 103f6112f253017d7062cd74d17f4a514ed4485c upstream.

Huge pages are not normally available to PV guests. Not suppressing
hugetlbfs use results in an endless loop of page faults when user mode
code tries to access a hugetlbfs mapped area (since the hypervisor
denies such PTEs to be created, but error indications can't be
propagated out of xen_set_pte_at(), just like for various of its
siblings), and - once killed in an oops like this:

  kernel BUG at .../fs/hugetlbfs/inode.c:428!
  invalid opcode: 0000 [#1] SMP
  ...
  RIP: e030:[<ffffffff811c333b>]  [<ffffffff811c333b>] remove_inode_hugepages+0x25b/0x320
  ...
  Call Trace:
   [<ffffffff811c3415>] hugetlbfs_evict_inode+0x15/0x40
   [<ffffffff81167b3d>] evict+0xbd/0x1b0
   [<ffffffff8116514a>] __dentry_kill+0x19a/0x1f0
   [<ffffffff81165b0e>] dput+0x1fe/0x220
   [<ffffffff81150535>] __fput+0x155/0x200
   [<ffffffff81079fc0>] task_work_run+0x60/0xa0
   [<ffffffff81063510>] do_exit+0x160/0x400
   [<ffffffff810637eb>] do_group_exit+0x3b/0xa0
   [<ffffffff8106e8bd>] get_signal+0x1ed/0x470
   [<ffffffff8100f854>] do_signal+0x14/0x110
   [<ffffffff810030e9>] prepare_exit_to_usermode+0xe9/0xf0
   [<ffffffff814178a5>] retint_user+0x8/0x13

This is CVE-2016-3961 / XSA-174.

Reported-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <JGross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: xen-devel <xen-devel@lists.xenproject.org>
Link: http://lkml.kernel.org/r/57188ED802000078000E431C@prv-mh.provo.novell.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/hugetlb.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h
index f8a29d2c97b0..e6a8613fbfb0 100644
--- a/arch/x86/include/asm/hugetlb.h
+++ b/arch/x86/include/asm/hugetlb.h
@@ -4,6 +4,7 @@
 #include <asm/page.h>
 #include <asm-generic/hugetlb.h>
 
+#define hugepages_supported() cpu_has_pse
 
 static inline int is_hugepage_only_range(struct mm_struct *mm,
 					 unsigned long addr,

From 5582eb00f5b2362234cccf542232101db61ffc8b Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Thu, 14 Apr 2016 10:21:52 -0700
Subject: [PATCH 474/797] x86 EDAC, sb_edac.c: Repair damage introduced when
 "fixing" channel address

commit ff15e95c82768d589957dbb17d7eb7dba7904659 upstream.

In commit:

  eb1af3b71f9d ("Fix computation of channel address")

I switched the "sck_way" variable from holding the log2 value read
from the h/w to instead be the actual number. Unfortunately it
is needed in log2 form when used to shift the address.

Tested-by: Patrick Geary <patrickg@supermicro.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Cc: Aristeu Rozanski <arozansk@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-edac@vger.kernel.org
Fixes: eb1af3b71f9d ("Fix computation of channel address")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/edac/sb_edac.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index cbee3179ec08..90c3fe99c786 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -1396,7 +1396,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
 	}
 
 	ch_way = TAD_CH(reg) + 1;
-	sck_way = 1 << TAD_SOCK(reg);
+	sck_way = TAD_SOCK(reg);
 
 	if (ch_way == 3)
 		idx = addr >> 6;
@@ -1435,7 +1435,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
 		switch(ch_way) {
 		case 2:
 		case 4:
-			sck_xch = 1 << sck_way * (ch_way >> 1);
+			sck_xch = (1 << sck_way) * (ch_way >> 1);
 			break;
 		default:
 			sprintf(msg, "Invalid mirror set. Can't decode addr");
@@ -1471,7 +1471,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
 
 	ch_addr = addr - offset;
 	ch_addr >>= (6 + shiftup);
-	ch_addr /= ch_way * sck_way;
+	ch_addr /= sck_xch;
 	ch_addr <<= (6 + shiftup);
 	ch_addr |= addr & ((1 << (6 + shiftup)) - 1);
 

From 66b7be5743d88c9b8fa69e5ba7d06d33d14de8c7 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Sun, 17 Apr 2016 09:39:41 +0200
Subject: [PATCH 475/797] ALSA: hda - Don't trust the reported actual power
 state

commit 50fd4987c4f3c3ebf0ce94d932732011bbdc7c71 upstream.

We've got a regression report that the recording on Mac with a cirrus
codec doesn't work any longer.  This turned out to be the missing
power up to D0 by power_save_node enablement.

After analyzing the traces, we found out that the culprit is that the
codec advertises the "actual" power state of a few nodes to be D0
while the "target" power state is D3.  This inconsistency is usually
OK, as it implies the power transition.  But in the case of cirrus
codec, this seems to be stuck to D3 while it's not actually D0.

This patch addresses the issue by checking the power state difference
more strictly.  It sends the power-state change verb unless both the
target and the actual power states show the given value.

We may introduce yet another flag indicating the possible broken
hardware power state, but it's anyway safer to set the proper power
state even in a transition (at least it's harmless as long as the
target state is same).  So this simpler change was applied now.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=116171
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/hda_generic.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
index 5c4fa8eba1d0..367dbf0d285e 100644
--- a/sound/pci/hda/hda_generic.c
+++ b/sound/pci/hda/hda_generic.c
@@ -843,7 +843,7 @@ static hda_nid_t path_power_update(struct hda_codec *codec,
 				   bool allow_powerdown)
 {
 	hda_nid_t nid, changed = 0;
-	int i, state;
+	int i, state, power;
 
 	for (i = 0; i < path->depth; i++) {
 		nid = path->path[i];
@@ -855,7 +855,9 @@ static hda_nid_t path_power_update(struct hda_codec *codec,
 			state = AC_PWRST_D0;
 		else
 			state = AC_PWRST_D3;
-		if (!snd_hda_check_power_state(codec, nid, state)) {
+		power = snd_hda_codec_read(codec, nid, 0,
+					   AC_VERB_GET_POWER_STATE, 0);
+		if (power != (state | (state << 4))) {
 			snd_hda_codec_write(codec, nid, 0,
 					    AC_VERB_SET_POWER_STATE, state);
 			changed = nid;

From a38ae6bb1473a02c5e7501fefe2a5ff42ad736c5 Mon Sep 17 00:00:00 2001
From: Bastien Nocera <hadess@hadess.net>
Date: Mon, 18 Apr 2016 11:10:42 +0200
Subject: [PATCH 476/797] ALSA: hda/realtek - Add ALC3234 headset mode for
 Optiplex 9020m

commit afecb146d8d8a60a1dde9cdf570c278649617fde upstream.

The Optiplex 9020m with Haswell-DT processor needs a quirk for the
headset jack at the front of the machine to be able to use microphones.

A quirk for this model was originally added in 3127899, but c77900e
removed it in favour of a more generic version.

Unfortunately, pin configurations can changed based on firmware/BIOS
versions, and the generic version doesn't have any effect on newer
versions of the machine/firmware anymore.

With help from David Henningsson <diwic@ubuntu.com>

Signed-off-by: Bastien Nocera <hadess@hadess.net>
Tested-by: Bastien Nocera <hadess@hadess.net>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 1402ba954b3d..810bceee4fd2 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -5449,6 +5449,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1028, 0x064a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x064b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x0665, "Dell XPS 13", ALC288_FIXUP_DELL_XPS_13),
+	SND_PCI_QUIRK(0x1028, 0x0669, "Dell Optiplex 9020m", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x069a, "Dell Vostro 5480", ALC290_FIXUP_SUBWOOFER_HSJACK),
 	SND_PCI_QUIRK(0x1028, 0x06c7, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x06d9, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),

From 0df9987a2ec6bc37440b9c4fa176f360039a8b8e Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 19 Apr 2016 22:07:50 +0200
Subject: [PATCH 477/797] ALSA: hda - Keep powering up ADCs on Cirrus codecs

commit de3df8a986b635082a1d94bae2c361d043c57106 upstream.

Although one weird behavior about the input path (inconsistent D0/D3
switch) on Cirrus CS420x codecs was fixed in the previous commit,
there is still an issue on some Mac machines: the capture stream
stalls when switching the ADCs on the fly.  More badly, this keeps
stuck until the next reboot.

The dynamic ADC switching is already a bit fragile and assuming
optimistically that the chip accepts the frequent power changes.  On
Cirrus codecs, this doesn't seem applicable.

As a quick workaround, we pin down the ADCs to keep up in D0 when
spec->dyn_adc_switch is set.  In this way, the ADCs are kept up only
for the system that were confirmed to be broken.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=116171
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_cirrus.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c
index a47e8ae0eb30..80bbadc83721 100644
--- a/sound/pci/hda/patch_cirrus.c
+++ b/sound/pci/hda/patch_cirrus.c
@@ -361,6 +361,7 @@ static int cs_parse_auto_config(struct hda_codec *codec)
 {
 	struct cs_spec *spec = codec->spec;
 	int err;
+	int i;
 
 	err = snd_hda_parse_pin_defcfg(codec, &spec->gen.autocfg, NULL, 0);
 	if (err < 0)
@@ -370,6 +371,19 @@ static int cs_parse_auto_config(struct hda_codec *codec)
 	if (err < 0)
 		return err;
 
+	/* keep the ADCs powered up when it's dynamically switchable */
+	if (spec->gen.dyn_adc_switch) {
+		unsigned int done = 0;
+		for (i = 0; i < spec->gen.input_mux.num_items; i++) {
+			int idx = spec->gen.dyn_adc_idx[i];
+			if (done & (1 << idx))
+				continue;
+			snd_hda_gen_fix_pin_power(codec,
+						  spec->gen.adc_nids[idx]);
+			done |= 1 << idx;
+		}
+	}
+
 	return 0;
 }
 

From 79dc55bd02a8dc0b6adf7598c4f8a7356594c363 Mon Sep 17 00:00:00 2001
From: "Lu, Han" <han.lu@intel.com>
Date: Wed, 20 Apr 2016 10:08:43 +0800
Subject: [PATCH 478/797] ALSA: hda - add PCI ID for Intel Broxton-T

commit 9859a971ca228725425238756ee89c6133306ec8 upstream.

Add HD Audio Device PCI ID for the Intel Broxton-T platform.
It is an HDA Intel PCH controller.

Signed-off-by: Lu, Han <han.lu@intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/hda_intel.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 2ff692dd2c5f..411630e9c034 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2207,6 +2207,9 @@ static const struct pci_device_id azx_ids[] = {
 	/* Broxton-P(Apollolake) */
 	{ PCI_DEVICE(0x8086, 0x5a98),
 	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON },
+	/* Broxton-T */
+	{ PCI_DEVICE(0x8086, 0x1a98),
+	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON },
 	/* Haswell */
 	{ PCI_DEVICE(0x8086, 0x0a0c),
 	  .driver_data = AZX_DRIVER_HDMI | AZX_DCAPS_INTEL_HASWELL },

From 519aef523513a58f958e0aa432855e7b2a57a611 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 21 Apr 2016 17:37:54 +0200
Subject: [PATCH 479/797] ALSA: pcxhr: Fix missing mutex unlock

commit 67f3754b51f22b18c4820fb84062f658c30e8644 upstream.

The commit [9bef72bdb26e: ALSA: pcxhr: Use nonatomic PCM ops]
converted to non-atomic PCM ops, but shamelessly with an unbalanced
mutex locking, which leads to the hangup easily.  Fix it.

Fixes: 9bef72bdb26e ('ALSA: pcxhr: Use nonatomic PCM ops')
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=116441
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/pcxhr/pcxhr_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/pcxhr/pcxhr_core.c b/sound/pci/pcxhr/pcxhr_core.c
index c5194f5b150a..d7e71f309299 100644
--- a/sound/pci/pcxhr/pcxhr_core.c
+++ b/sound/pci/pcxhr/pcxhr_core.c
@@ -1341,5 +1341,6 @@ irqreturn_t pcxhr_threaded_irq(int irq, void *dev_id)
 	}
 
 	pcxhr_msg_thread(mgr);
+	mutex_unlock(&mgr->lock);
 	return IRQ_HANDLED;
 }

From 8dd069c221e299db24ea5937c6130433109d6499 Mon Sep 17 00:00:00 2001
From: Conrad Kostecki <ck+linuxkernel@bl4ckb0x.de>
Date: Tue, 26 Apr 2016 10:08:10 +0200
Subject: [PATCH 480/797] ALSA: hda - Add dock support for ThinkPad X260

commit 037e119738120c1cdc460c6ae33871c3000531f3 upstream.

Fixes audio output on a ThinkPad X260, when using Lenovo CES 2013
docking station series (basic, pro, ultra).

Signed-off-by: Conrad Kostecki <ck+linuxkernel@bl4ckb0x.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 810bceee4fd2..ac4490a96863 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -5584,6 +5584,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x17aa, 0x5034, "Thinkpad T450", ALC292_FIXUP_TPT440_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x5036, "Thinkpad T450s", ALC292_FIXUP_TPT440_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x503c, "Thinkpad L450", ALC292_FIXUP_TPT440_DOCK),
+	SND_PCI_QUIRK(0x17aa, 0x504a, "ThinkPad X260", ALC292_FIXUP_TPT440_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x504b, "Thinkpad", ALC293_FIXUP_LENOVO_SPK_NOISE),
 	SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
 	SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K),

From 4fad26a279caad671287e502e9b6d2487a56b270 Mon Sep 17 00:00:00 2001
From: Romain Perier <romain.perier@free-electrons.com>
Date: Thu, 14 Apr 2016 15:36:03 +0200
Subject: [PATCH 481/797] asm-generic/futex: Re-enable preemption in
 futex_atomic_cmpxchg_inatomic()

commit fba7cd681b6155e2d93e7862fcd6f970336b83c3 upstream.

The recent decoupling of pagefault disable and preempt disable added an
explicit preempt_disable/enable() pair to the futex_atomic_cmpxchg_inatomic()
implementation in asm-generic/futex.h. But it forgot to add preempt_enable()
calls to the error handling code pathes, which results in a preemption count
imbalance.

This is observable on boot when the test for atomic_cmpxchg() is calling
futex_atomic_cmpxchg_inatomic() on a NULL pointer.

Add the missing preempt_enable() calls to the error handling code pathes.

[ tglx: Massaged changelog ]

Fixes: d9b9ff8c1889 ("sched/preempt, futex: Disable preemption in UP futex_atomic_cmpxchg_inatomic() explicitly")
Signed-off-by: Romain Perier <romain.perier@free-electrons.com>
Cc: linux-arch@vger.kernel.org
Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1460640963-690-1-git-send-email-romain.perier@free-electrons.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/asm-generic/futex.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h
index e56272c919b5..bf2d34c9d804 100644
--- a/include/asm-generic/futex.h
+++ b/include/asm-generic/futex.h
@@ -108,11 +108,15 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 	u32 val;
 
 	preempt_disable();
-	if (unlikely(get_user(val, uaddr) != 0))
+	if (unlikely(get_user(val, uaddr) != 0)) {
+		preempt_enable();
 		return -EFAULT;
+	}
 
-	if (val == oldval && unlikely(put_user(newval, uaddr) != 0))
+	if (val == oldval && unlikely(put_user(newval, uaddr) != 0)) {
+		preempt_enable();
 		return -EFAULT;
+	}
 
 	*uval = val;
 	preempt_enable();

From 61fc0ae42c498f8eb782733065d93da6817d28b4 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 15 Apr 2016 14:35:39 +0200
Subject: [PATCH 482/797] futex: Handle unlock_pi race gracefully

commit 89e9e66ba1b3bde9d8ea90566c2aee20697ad681 upstream.

If userspace calls UNLOCK_PI unconditionally without trying the TID -> 0
transition in user space first then the user space value might not have the
waiters bit set. This opens the following race:

CPU0	    	      	    CPU1
uval = get_user(futex)
			    lock(hb)
lock(hb)
			    futex |= FUTEX_WAITERS
			    ....
			    unlock(hb)

cmpxchg(futex, uval, newval)

So the cmpxchg fails and returns -EINVAL to user space, which is wrong because
the futex value is valid.

To handle this (yes, yet another) corner case gracefully, check for a flag
change and retry.

[ tglx: Massaged changelog and slightly reworked implementation ]

Fixes: ccf9e6a80d9e ("futex: Make unlock_pi more robust")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Darren Hart <dvhart@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1460723739-5195-1-git-send-email-bigeasy@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/futex.c | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index 461c72b2dac2..eaa3a8dfd345 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1244,10 +1244,20 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
 	if (unlikely(should_fail_futex(true)))
 		ret = -EFAULT;
 
-	if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
+	if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) {
 		ret = -EFAULT;
-	else if (curval != uval)
-		ret = -EINVAL;
+	} else if (curval != uval) {
+		/*
+		 * If a unconditional UNLOCK_PI operation (user space did not
+		 * try the TID->0 transition) raced with a waiter setting the
+		 * FUTEX_WAITERS flag between get_user() and locking the hash
+		 * bucket lock, retry the operation.
+		 */
+		if ((FUTEX_TID_MASK & curval) == uval)
+			ret = -EAGAIN;
+		else
+			ret = -EINVAL;
+	}
 	if (ret) {
 		raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
 		return ret;
@@ -2537,6 +2547,15 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
 		 */
 		if (ret == -EFAULT)
 			goto pi_faulted;
+		/*
+		 * A unconditional UNLOCK_PI op raced against a waiter
+		 * setting the FUTEX_WAITERS bit. Try again.
+		 */
+		if (ret == -EAGAIN) {
+			spin_unlock(&hb->lock);
+			put_futex_key(&key);
+			goto retry;
+		}
 		/*
 		 * wake_futex_pi has detected invalid state. Tell user
 		 * space.

From ad4b209d192624e8587f4988171d624346913ddd Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Wed, 20 Apr 2016 20:09:24 -0700
Subject: [PATCH 483/797] futex: Acknowledge a new waiter in counter before
 plist

commit fe1bce9e2107ba3a8faffe572483b6974201a0e6 upstream.

Otherwise an incoming waker on the dest hash bucket can miss
the waiter adding itself to the plist during the lockless
check optimization (small window but still the correct way
of doing this); similarly to the decrement counterpart.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: bigeasy@linutronix.de
Cc: dvhart@infradead.org
Link: http://lkml.kernel.org/r/1461208164-29150-1-git-send-email-dave@stgolabs.net
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/futex.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index eaa3a8dfd345..9d8163afd87c 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1484,8 +1484,8 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
 	if (likely(&hb1->chain != &hb2->chain)) {
 		plist_del(&q->list, &hb1->chain);
 		hb_waiters_dec(hb1);
-		plist_add(&q->list, &hb2->chain);
 		hb_waiters_inc(hb2);
+		plist_add(&q->list, &hb2->chain);
 		q->lock_ptr = &hb2->lock;
 	}
 	get_futex_key_refs(key2);

From 06e38eaf1a24332b15748f33039d5bf15799c5cb Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sun, 6 Mar 2016 16:06:06 -0500
Subject: [PATCH 484/797] drm/nouveau/core: use vzalloc for allocating ramht

commit 78a121d82da8aff3aca2a6a1c40f5061081760f0 upstream.

Most calls to nvkm_ramht_new use 0x8000 as the size. This results in a
fairly sizeable chunk of memory to be allocated, which may not be
available with kzalloc. Since this is done fairly rarely (once per
channel), use vzalloc instead.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Cc: Sven Joachim <svenjoac@gmx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/nouveau/nvkm/core/ramht.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/core/ramht.c b/drivers/gpu/drm/nouveau/nvkm/core/ramht.c
index 3216e157a8a0..89da47234016 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/ramht.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/ramht.c
@@ -131,7 +131,7 @@ nvkm_ramht_del(struct nvkm_ramht **pramht)
 	struct nvkm_ramht *ramht = *pramht;
 	if (ramht) {
 		nvkm_gpuobj_del(&ramht->gpuobj);
-		kfree(*pramht);
+		vfree(*pramht);
 		*pramht = NULL;
 	}
 }
@@ -143,8 +143,8 @@ nvkm_ramht_new(struct nvkm_device *device, u32 size, u32 align,
 	struct nvkm_ramht *ramht;
 	int ret, i;
 
-	if (!(ramht = *pramht = kzalloc(sizeof(*ramht) + (size >> 3) *
-					sizeof(*ramht->data), GFP_KERNEL)))
+	if (!(ramht = *pramht = vzalloc(sizeof(*ramht) +
+					(size >> 3) * sizeof(*ramht->data))))
 		return -ENOMEM;
 
 	ramht->device = device;

From 20fd4b1bbfbea603fff1d756b39cffc67048aec5 Mon Sep 17 00:00:00 2001
From: John Keeping <john@metanate.com>
Date: Wed, 18 Nov 2015 11:17:25 +0000
Subject: [PATCH 485/797] drm/qxl: fix cursor position with non-zero hotspot

commit d59a1f71ff1aeda4b4630df92d3ad4e3b1dfc885 upstream.

The SPICE protocol considers the position of a cursor to be the location
of its active pixel on the display, so the cursor is drawn with its
top-left corner at "(x - hot_spot_x, y - hot_spot_y)" but the DRM cursor
position gives the location where the top-left corner should be drawn,
with the hotspot being a hint for drivers that need it.

This fixes the location of the window resize cursors when using Fluxbox
with the QXL DRM driver and both the QXL and modesetting X drivers.

Signed-off-by: John Keeping <john@metanate.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/1447845445-2116-1-git-send-email-john@metanate.com
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/qxl/qxl_display.c | 13 +++++++++----
 drivers/gpu/drm/qxl/qxl_drv.h     |  2 ++
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index 183aea1abebc..5edebf495c07 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -375,10 +375,15 @@ static int qxl_crtc_cursor_set2(struct drm_crtc *crtc,
 
 	qxl_bo_kunmap(user_bo);
 
+	qcrtc->cur_x += qcrtc->hot_spot_x - hot_x;
+	qcrtc->cur_y += qcrtc->hot_spot_y - hot_y;
+	qcrtc->hot_spot_x = hot_x;
+	qcrtc->hot_spot_y = hot_y;
+
 	cmd = (struct qxl_cursor_cmd *)qxl_release_map(qdev, release);
 	cmd->type = QXL_CURSOR_SET;
-	cmd->u.set.position.x = qcrtc->cur_x;
-	cmd->u.set.position.y = qcrtc->cur_y;
+	cmd->u.set.position.x = qcrtc->cur_x + qcrtc->hot_spot_x;
+	cmd->u.set.position.y = qcrtc->cur_y + qcrtc->hot_spot_y;
 
 	cmd->u.set.shape = qxl_bo_physical_address(qdev, cursor_bo, 0);
 
@@ -441,8 +446,8 @@ static int qxl_crtc_cursor_move(struct drm_crtc *crtc,
 
 	cmd = (struct qxl_cursor_cmd *)qxl_release_map(qdev, release);
 	cmd->type = QXL_CURSOR_MOVE;
-	cmd->u.position.x = qcrtc->cur_x;
-	cmd->u.position.y = qcrtc->cur_y;
+	cmd->u.position.x = qcrtc->cur_x + qcrtc->hot_spot_x;
+	cmd->u.position.y = qcrtc->cur_y + qcrtc->hot_spot_y;
 	qxl_release_unmap(qdev, release, &cmd->release_info);
 
 	qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false);
diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h
index 01a86948eb8c..3ab90179e9ab 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.h
+++ b/drivers/gpu/drm/qxl/qxl_drv.h
@@ -135,6 +135,8 @@ struct qxl_crtc {
 	int index;
 	int cur_x;
 	int cur_y;
+	int hot_spot_x;
+	int hot_spot_y;
 };
 
 struct qxl_output {

From 67df493c5557c6f34f33c4a3d52784d7ba46312a Mon Sep 17 00:00:00 2001
From: Lyude <cpaul@redhat.com>
Date: Wed, 16 Mar 2016 15:18:04 -0400
Subject: [PATCH 486/797] drm/i915: Fix race condition in
 intel_dp_destroy_mst_connector()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 9e60290dbafdf577766e5fc5f2fdb3be450cf9a6 upstream.

After unplugging a DP MST display from the system, we have to go through
and destroy all of the DRM connectors associated with it since none of
them are valid anymore. Unfortunately, intel_dp_destroy_mst_connector()
doesn't do a good enough job of ensuring that throughout the destruction
process that no modesettings can be done with the connectors. As it is
right now, intel_dp_destroy_mst_connector() works like this:

* Take all modeset locks
* Clear the configuration of the crtc on the connector, if there is one
* Drop all modeset locks, this is required because of circular
  dependency issues that arise with trying to remove the connector from
  sysfs with modeset locks held
* Unregister the connector
* Take all modeset locks, again
* Do the rest of the required cleaning for destroying the connector
* Finally drop all modeset locks for good

This only works sometimes. During the destruction process, it's very
possible that a userspace application will attempt to do a modesetting
using the connector. When we drop the modeset locks, an ioctl handler
such as drm_mode_setcrtc has the oppurtunity to take all of the modeset
locks from us. When this happens, one thing leads to another and
eventually we end up committing a mode with the non-existent connector:

	[drm:intel_dp_link_training_clock_recovery [i915]] *ERROR* failed to enable link training
	[drm:intel_dp_aux_ch] dp_aux_ch timeout status 0x7cf0001f
	[drm:intel_dp_start_link_train [i915]] *ERROR* failed to start channel equalization
	[drm:intel_dp_aux_ch] dp_aux_ch timeout status 0x7cf0001f
	[drm:intel_mst_pre_enable_dp [i915]] *ERROR* failed to allocate vcpi

And in some cases, such as with the T460s using an MST dock, this
results in breaking modesetting and/or panicking the system.

To work around this, we now unregister the connector at the very
beginning of intel_dp_destroy_mst_connector(), grab all the modesetting
locks, and then hold them until we finish the rest of the function.

Signed-off-by: Lyude <cpaul@redhat.com>
Signed-off-by: Rob Clark <rclark@redhat.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/1458155884-13877-1-git-send-email-cpaul@redhat.com
(cherry picked from commit 1f7717552ef1306be3b7ed28c66c6eff550e3a23)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/intel_dp_mst.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c
index 0639275fc471..06bd9257acdc 100644
--- a/drivers/gpu/drm/i915/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/intel_dp_mst.c
@@ -477,6 +477,8 @@ static void intel_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
 	struct intel_connector *intel_connector = to_intel_connector(connector);
 	struct drm_device *dev = connector->dev;
 
+	intel_connector->unregister(intel_connector);
+
 	/* need to nuke the connector */
 	drm_modeset_lock_all(dev);
 	if (connector->state->crtc) {
@@ -490,11 +492,7 @@ static void intel_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
 
 		WARN(ret, "Disabling mst crtc failed with %i\n", ret);
 	}
-	drm_modeset_unlock_all(dev);
 
-	intel_connector->unregister(intel_connector);
-
-	drm_modeset_lock_all(dev);
 	intel_connector_remove_from_fbdev(intel_connector);
 	drm_connector_cleanup(connector);
 	drm_modeset_unlock_all(dev);

From 67fb098f6f23ebab7b47ae517c161032dc161cd9 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 18 Apr 2016 11:19:19 -0400
Subject: [PATCH 487/797] Revert "drm/radeon: disable runtime pm on PX laptops
 without dGPU power control"

commit bfaddd9fc8ac048b99475f000dbef6f08297417f upstream.

This reverts commit e64c952efb8e0c15ae82cec8e455ab4910690ef1.

ATPX is the ACPI method for controlling AMD PowerXpress laptops.
There are flags to indicate which methods are supported.  If
the dGPU power down flag is not supported, the driver needs to
implement the dGPU power down manually.  We had previously
always forced the driver to assume the ATPX dGPU power down
was present, but this causes problems on boards where it is
not, leading to GPU hangs when attempting to power down the
dGPU.  Manual dGPU power down is not currently supported in
the Linux driver.  Some laptops indicate that the ATPX
dGPU power down method is not present, but it actually
apparently is.  I'm not sure if this is a bios bug and it should
be set or if there is a reason it was unset and the method should
not be used.  This is not an issue on other OSes since both the
ATPX and the manual driver power down methods are supported.

This is apparently fairly widespread, so just revert for now.

bugs:
https://bugzilla.kernel.org/show_bug.cgi?id=115321
https://bugzilla.kernel.org/show_bug.cgi?id=116581
https://bugzilla.kernel.org/show_bug.cgi?id=116251

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/radeon/radeon_atpx_handler.c | 8 ++++----
 drivers/gpu/drm/radeon/radeon_device.c       | 8 +-------
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
index 9bc408c9f9f6..c4b4f298a283 100644
--- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c
+++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
@@ -62,10 +62,6 @@ bool radeon_has_atpx(void) {
 	return radeon_atpx_priv.atpx_detected;
 }
 
-bool radeon_has_atpx_dgpu_power_cntl(void) {
-	return radeon_atpx_priv.atpx.functions.power_cntl;
-}
-
 /**
  * radeon_atpx_call - call an ATPX method
  *
@@ -145,6 +141,10 @@ static void radeon_atpx_parse_functions(struct radeon_atpx_functions *f, u32 mas
  */
 static int radeon_atpx_validate(struct radeon_atpx *atpx)
 {
+	/* make sure required functions are enabled */
+	/* dGPU power control is required */
+	atpx->functions.power_cntl = true;
+
 	if (atpx->functions.px_params) {
 		union acpi_object *info;
 		struct atpx_px_params output;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index f78f111e68de..c566993a2ec3 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -103,12 +103,6 @@ static const char radeon_family_name[][16] = {
 	"LAST",
 };
 
-#if defined(CONFIG_VGA_SWITCHEROO)
-bool radeon_has_atpx_dgpu_power_cntl(void);
-#else
-static inline bool radeon_has_atpx_dgpu_power_cntl(void) { return false; }
-#endif
-
 #define RADEON_PX_QUIRK_DISABLE_PX  (1 << 0)
 #define RADEON_PX_QUIRK_LONG_WAKEUP (1 << 1)
 
@@ -1439,7 +1433,7 @@ int radeon_device_init(struct radeon_device *rdev,
 	 * ignore it */
 	vga_client_register(rdev->pdev, rdev, NULL, radeon_vga_set_decode);
 
-	if ((rdev->flags & RADEON_IS_PX) && radeon_has_atpx_dgpu_power_cntl())
+	if (rdev->flags & RADEON_IS_PX)
 		runtime = true;
 	vga_switcheroo_register_client(rdev->pdev, &radeon_switcheroo_ops, runtime);
 	if (runtime)

From 54aeb5854ec03315a721268b8c207fcdcd7f298f Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 25 Apr 2016 13:12:18 -0400
Subject: [PATCH 488/797] Revert "drm/amdgpu: disable runtime pm on PX laptops
 without dGPU power control"

commit e9bef455af8eb0e837e179aab8988ae2649fd8d3 upstream.

This reverts commit bedf2a65c1aa8fb29ba8527fd00c0f68ec1f55f1.

See the radeon revert for an extended description.

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c | 8 ++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c       | 8 +-------
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index 8ac49812a716..5a8fbadbd27b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -63,10 +63,6 @@ bool amdgpu_has_atpx(void) {
 	return amdgpu_atpx_priv.atpx_detected;
 }
 
-bool amdgpu_has_atpx_dgpu_power_cntl(void) {
-	return amdgpu_atpx_priv.atpx.functions.power_cntl;
-}
-
 /**
  * amdgpu_atpx_call - call an ATPX method
  *
@@ -146,6 +142,10 @@ static void amdgpu_atpx_parse_functions(struct amdgpu_atpx_functions *f, u32 mas
  */
 static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
 {
+	/* make sure required functions are enabled */
+	/* dGPU power control is required */
+	atpx->functions.power_cntl = true;
+
 	if (atpx->functions.px_params) {
 		union acpi_object *info;
 		struct atpx_px_params output;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 9d88023df836..c961fe093e12 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -61,12 +61,6 @@ static const char *amdgpu_asic_name[] = {
 	"LAST",
 };
 
-#if defined(CONFIG_VGA_SWITCHEROO)
-bool amdgpu_has_atpx_dgpu_power_cntl(void);
-#else
-static inline bool amdgpu_has_atpx_dgpu_power_cntl(void) { return false; }
-#endif
-
 bool amdgpu_device_is_px(struct drm_device *dev)
 {
 	struct amdgpu_device *adev = dev->dev_private;
@@ -1475,7 +1469,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
 	if (amdgpu_runtime_pm == 1)
 		runtime = true;
-	if (amdgpu_device_is_px(ddev) && amdgpu_has_atpx_dgpu_power_cntl())
+	if (amdgpu_device_is_px(ddev))
 		runtime = true;
 	vga_switcheroo_register_client(adev->pdev, &amdgpu_switcheroo_ops, runtime);
 	if (runtime)

From 61fe67520c4394c90f688c61c5c16dd63824cd42 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Fri, 22 Apr 2016 19:53:59 -0700
Subject: [PATCH 489/797] cpufreq: intel_pstate: Fix processing for turbo
 activation ratio

commit 1becf03545a0859ceaaf9e8c2d9861882a71cb01 upstream.

When the config TDP level is not nominal (level = 0), the MSR values for
reading level 1 and level 2 ratios contain power in low 14 bits and actual
ratio bits are at bits [23:16]. The current processing for level 1 and
level 2 is wrong as there is no shift done to get actual ratio.

Fixes: 6a35fc2d6c22 (cpufreq: intel_pstate: get P1 from TAR when available)
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/cpufreq/intel_pstate.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 98fb8821382d..f53b02a6bc05 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -667,6 +667,11 @@ static int core_get_max_pstate(void)
 			if (err)
 				goto skip_tar;
 
+			/* For level 1 and 2, bits[23:16] contain the ratio */
+			if (tdp_ctrl)
+				tdp_ratio >>= 16;
+
+			tdp_ratio &= 0xff; /* ratios are only 8 bits long */
 			if (tdp_ratio - 1 == tar) {
 				max_pstate = tar;
 				pr_debug("max_pstate=TAC %x\n", max_pstate);

From d22ac3a9403bc2a60662ec117dc83f72564d61f9 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Thu, 31 Mar 2016 11:48:31 +0200
Subject: [PATCH 490/797] s390/pci: add extra padding to function measurement
 block

commit 9d89d9e61d361f3adb75e1aebe4bb367faf16cfa upstream.

Newer machines might use a different (larger) format for function
measurement blocks. To ensure that we comply with the alignment
requirement on these machines and prevent memory corruption (when
firmware writes more data than we expect) add 16 padding bytes
at the end of the fmb.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/include/asm/pci.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 2b2ced9dc00a..6dafabb6ae1a 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -45,7 +45,8 @@ struct zpci_fmb {
 	u64 rpcit_ops;
 	u64 dma_rbytes;
 	u64 dma_wbytes;
-} __packed __aligned(64);
+	u64 pad[2];
+} __packed __aligned(128);
 
 enum zpci_state {
 	ZPCI_FN_STATE_RESERVED,

From 0dec867402c0ce4eee7ca0055a99d634fe32a72b Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Thu, 10 Mar 2016 13:07:17 +0200
Subject: [PATCH 491/797] iwlwifi: pcie: lower the debug level for RSA
 semaphore access

commit 9fc515bc9e735c10cd327f05c20f5ef69474188d upstream.

IWL_INFO is not an error but still printed by default.
"can't access the RSA semaphore it is write protected" seems
worrisome but it is not really a problem.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/iwlwifi/pcie/trans.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c
index 8c7204738aa3..00e0332e2544 100644
--- a/drivers/net/wireless/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/iwlwifi/pcie/trans.c
@@ -731,8 +731,8 @@ static int iwl_pcie_rsa_race_bug_wa(struct iwl_trans *trans)
 	 */
 	val = iwl_read_prph(trans, PREG_AUX_BUS_WPROT_0);
 	if (val & (BIT(1) | BIT(17))) {
-		IWL_INFO(trans,
-			 "can't access the RSA semaphore it is write protected\n");
+		IWL_DEBUG_INFO(trans,
+			       "can't access the RSA semaphore it is write protected\n");
 		return 0;
 	}
 

From 72b847aa95584d9e0718c9e3ee38a627bbb24c17 Mon Sep 17 00:00:00 2001
From: Matti Gottlieb <matti.gottlieb@intel.com>
Date: Tue, 15 Mar 2016 13:46:47 +0200
Subject: [PATCH 492/797] iwlwifi: mvm: fix memory leak in paging

commit 7fdf9663261cc77a516396fec82cee8a8ea07e76 upstream.

Currently paging download buffer is freed during the
the unloading of the opmode which happens when the driver
is unloaded.

This causes a memory leak since the paging download
buffer is allocated every time we enable the
interface, so the download buffer can be allocated many
times, but only be freed once.

Free paging download buffer during disabling of the
interface.

Signed-off-by: Matti Gottlieb <matti.gottlieb@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/iwlwifi/mvm/mac80211.c | 2 ++
 drivers/net/wireless/iwlwifi/mvm/ops.c      | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c
index e88afac51c5d..f96ab2f4b90e 100644
--- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c
@@ -1557,6 +1557,8 @@ void __iwl_mvm_mac_stop(struct iwl_mvm *mvm)
 	/* the fw is stopped, the aux sta is dead: clean up driver state */
 	iwl_mvm_del_aux_sta(mvm);
 
+	iwl_free_fw_paging(mvm);
+
 	/*
 	 * Clear IN_HW_RESTART flag when stopping the hw (as restart_complete()
 	 * won't be called in this case).
diff --git a/drivers/net/wireless/iwlwifi/mvm/ops.c b/drivers/net/wireless/iwlwifi/mvm/ops.c
index c3adf2bcdc85..13c97f665ba8 100644
--- a/drivers/net/wireless/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/iwlwifi/mvm/ops.c
@@ -645,8 +645,6 @@ static void iwl_op_mode_mvm_stop(struct iwl_op_mode *op_mode)
 	for (i = 0; i < NVM_MAX_NUM_SECTIONS; i++)
 		kfree(mvm->nvm_sections[i].data);
 
-	iwl_free_fw_paging(mvm);
-
 	iwl_mvm_tof_clean(mvm);
 
 	ieee80211_free_hw(mvm->hw);

From 1575fcd167e3452cadbcf7d04ad6277c875a482f Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Wed, 13 Apr 2016 10:52:25 -0500
Subject: [PATCH 493/797] crypto: ccp - Prevent information leakage on export

commit f709b45ec461b548c41a00044dba1f1b572783bf upstream.

Prevent information from leaking to userspace by doing a memset to 0 of
the export state structure before setting the structure values and copying
it. This prevents un-initialized padding areas from being copied into the
export area.

Reported-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/crypto/ccp/ccp-crypto-aes-cmac.c | 3 +++
 drivers/crypto/ccp/ccp-crypto-sha.c      | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
index 3d9acc53d247..60fc0fa26fd3 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
@@ -225,6 +225,9 @@ static int ccp_aes_cmac_export(struct ahash_request *req, void *out)
 	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req);
 	struct ccp_aes_cmac_exp_ctx state;
 
+	/* Don't let anything leak to 'out' */
+	memset(&state, 0, sizeof(state));
+
 	state.null_msg = rctx->null_msg;
 	memcpy(state.iv, rctx->iv, sizeof(state.iv));
 	state.buf_count = rctx->buf_count;
diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c
index 8ef06fad8b14..ab9945f2cb7a 100644
--- a/drivers/crypto/ccp/ccp-crypto-sha.c
+++ b/drivers/crypto/ccp/ccp-crypto-sha.c
@@ -212,6 +212,9 @@ static int ccp_sha_export(struct ahash_request *req, void *out)
 	struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
 	struct ccp_sha_exp_ctx state;
 
+	/* Don't let anything leak to 'out' */
+	memset(&state, 0, sizeof(state));
+
 	state.type = rctx->type;
 	state.msg_bits = rctx->msg_bits;
 	state.first = rctx->first;

From 9a10dfc8bf95270073c6c2e2be3de26a652d730a Mon Sep 17 00:00:00 2001
From: Xiaodong Liu <xiaodong.liu@intel.com>
Date: Tue, 12 Apr 2016 09:45:51 +0000
Subject: [PATCH 494/797] crypto: sha1-mb - use corrcet pointer while
 completing jobs

commit 0851561d9c965df086ef8a53f981f5f95a57c2c8 upstream.

In sha_complete_job, incorrect mcryptd_hash_request_ctx pointer is used
when check and complete other jobs. If the memory of first completed req
is freed, while still completing other jobs in the func, kernel will
crash since NULL pointer is assigned to RIP.

Signed-off-by: Xiaodong Liu <xiaodong.liu@intel.com>
Acked-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/crypto/sha-mb/sha1_mb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c
index a841e9765bd6..8381c09d2870 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb.c
+++ b/arch/x86/crypto/sha-mb/sha1_mb.c
@@ -453,10 +453,10 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
 
 			req = cast_mcryptd_ctx_to_req(req_ctx);
 			if (irqs_disabled())
-				rctx->complete(&req->base, ret);
+				req_ctx->complete(&req->base, ret);
 			else {
 				local_bh_disable();
-				rctx->complete(&req->base, ret);
+				req_ctx->complete(&req->base, ret);
 				local_bh_enable();
 			}
 		}

From 0dedb763d08e6fa0da9c1ee14fb5d8522a85e846 Mon Sep 17 00:00:00 2001
From: Jonas Eymann <J.Eymann@gmx.net>
Date: Tue, 19 Apr 2016 20:33:47 +0300
Subject: [PATCH 495/797] crypto: talitos - fix crash in talitos_cra_init()

commit 89d124cb61b39900959e2839ac06b6339b6a54cb upstream.

Conversion of talitos driver to the new AEAD interface
hasn't been properly tested.

AEAD algorithms crash in talitos_cra_init as follows:

[...]
[    1.141095] talitos ffe30000.crypto: hwrng
[    1.145381] Unable to handle kernel paging request for data at address 0x00000058
[    1.152913] Faulting instruction address: 0xc02accc0
[    1.157910] Oops: Kernel access of bad area, sig: 11 [#1]
[    1.163315] SMP NR_CPUS=2 P1020 RDB
[    1.166810] Modules linked in:
[    1.169875] CPU: 0 PID: 1007 Comm: cryptomgr_test Not tainted 4.4.6 #1
[    1.176415] task: db5ec200 ti: db4d6000 task.ti: db4d6000
[    1.181821] NIP: c02accc0 LR: c02acd18 CTR: c02acd04
[    1.186793] REGS: db4d7d30 TRAP: 0300   Not tainted  (4.4.6)
[    1.192457] MSR: 00029000 <CE,EE,ME>  CR: 95009359  XER: e0000000
[    1.198585] DEAR: 00000058 ESR: 00000000
GPR00: c017bdc0 db4d7de0 db5ec200 df424b48 00000000 00000000 df424bfc db75a600
GPR08: df424b48 00000000 db75a628 db4d6000 00000149 00000000 c0044cac db5acda0
GPR16: 00000000 00000000 00000000 00000000 00000000 00000000 00000400 df424940
GPR24: df424900 00003083 00000400 c0180000 db75a640 c03e9f84 df424b40 df424b48
[    1.230978] NIP [c02accc0] talitos_cra_init+0x28/0x6c
[    1.236039] LR [c02acd18] talitos_cra_init_aead+0x14/0x28
[    1.241443] Call Trace:
[    1.243894] [db4d7de0] [c03e9f84] 0xc03e9f84 (unreliable)
[    1.249322] [db4d7df0] [c017bdc0] crypto_create_tfm+0x5c/0xf0
[    1.255083] [db4d7e10] [c017beec] crypto_alloc_tfm+0x98/0xf8
[    1.260769] [db4d7e40] [c0186a20] alg_test_aead+0x28/0xc8
[    1.266181] [db4d7e60] [c0186718] alg_test+0x260/0x2e0
[    1.271333] [db4d7ee0] [c0183860] cryptomgr_test+0x30/0x54
[    1.276843] [db4d7ef0] [c0044d80] kthread+0xd4/0xd8
[    1.281741] [db4d7f40] [c000e4a4] ret_from_kernel_thread+0x5c/0x64
[    1.287930] Instruction dump:
[    1.290902] 38600000 4e800020 81230028 7c681b78 81490010 38e9ffc0 3929ffe8 554a073e
[    1.298691] 2b8a000a 7d474f9e 812a0008 91230030 <80e90058> 39270060 7c0004ac 7cc04828

Fixes: aeb4c132f33d ("crypto: talitos - Convert to new AEAD interface")
Signed-off-by: Jonas Eymann <J.Eymann@gmx.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fix typo - replaced parameter of __crypto_ahash_alg(): s/tfm/alg
Remove checkpatch warnings.
Add commit message.

Signed-off-by: Horia Geant? <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/talitos.c | 41 ++++++++++++++++++++++++++--------------
 1 file changed, 27 insertions(+), 14 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index b6f9f42e2985..79dff3b2dfb7 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -2519,21 +2519,11 @@ struct talitos_crypto_alg {
 	struct talitos_alg_template algt;
 };
 
-static int talitos_cra_init(struct crypto_tfm *tfm)
+static int talitos_init_common(struct talitos_ctx *ctx,
+			       struct talitos_crypto_alg *talitos_alg)
 {
-	struct crypto_alg *alg = tfm->__crt_alg;
-	struct talitos_crypto_alg *talitos_alg;
-	struct talitos_ctx *ctx = crypto_tfm_ctx(tfm);
 	struct talitos_private *priv;
 
-	if ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) == CRYPTO_ALG_TYPE_AHASH)
-		talitos_alg = container_of(__crypto_ahash_alg(alg),
-					   struct talitos_crypto_alg,
-					   algt.alg.hash);
-	else
-		talitos_alg = container_of(alg, struct talitos_crypto_alg,
-					   algt.alg.crypto);
-
 	/* update context with ptr to dev */
 	ctx->dev = talitos_alg->dev;
 
@@ -2551,10 +2541,33 @@ static int talitos_cra_init(struct crypto_tfm *tfm)
 	return 0;
 }
 
+static int talitos_cra_init(struct crypto_tfm *tfm)
+{
+	struct crypto_alg *alg = tfm->__crt_alg;
+	struct talitos_crypto_alg *talitos_alg;
+	struct talitos_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	if ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) == CRYPTO_ALG_TYPE_AHASH)
+		talitos_alg = container_of(__crypto_ahash_alg(alg),
+					   struct talitos_crypto_alg,
+					   algt.alg.hash);
+	else
+		talitos_alg = container_of(alg, struct talitos_crypto_alg,
+					   algt.alg.crypto);
+
+	return talitos_init_common(ctx, talitos_alg);
+}
+
 static int talitos_cra_init_aead(struct crypto_aead *tfm)
 {
-	talitos_cra_init(crypto_aead_tfm(tfm));
-	return 0;
+	struct aead_alg *alg = crypto_aead_alg(tfm);
+	struct talitos_crypto_alg *talitos_alg;
+	struct talitos_ctx *ctx = crypto_aead_ctx(tfm);
+
+	talitos_alg = container_of(alg, struct talitos_crypto_alg,
+				   algt.alg.aead);
+
+	return talitos_init_common(ctx, talitos_alg);
 }
 
 static int talitos_cra_init_ahash(struct crypto_tfm *tfm)

From cd7803563938ce36988e6bc494b8f6610c1537af Mon Sep 17 00:00:00 2001
From: Horia Geant? <horia.geanta@nxp.com>
Date: Tue, 19 Apr 2016 20:33:48 +0300
Subject: [PATCH 496/797] crypto: talitos - fix AEAD tcrypt tests

commit 340ff60ae93a5db2b2be6f38868df9a1293b6007 upstream.

After conversion to new AEAD interface, tcrypt tests fail as follows:

[...]
[    1.145414] alg: aead: Test 1 failed on encryption for authenc-hmac-sha1-cbc-aes-talitos
[    1.153564] 00000000: 53 69 6e 67 6c 65 20 62 6c 6f 63 6b 20 6d 73 67
[    1.160041] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[    1.166509] 00000020: 00 00 00 00
[...]

Fix them by providing the correct cipher in & cipher out pointers,
i.e. must skip over associated data in src and dst S/G.

While here, fix a problem with the HW S/G table index usage:
tbl_off must be updated after the pointer to the table entries is set.

Fixes: aeb4c132f33d ("crypto: talitos - Convert to new AEAD interface")
Reported-by: Jonas Eymann <J.Eymann@gmx.net>
Signed-off-by: Horia Geant? <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/crypto/talitos.c | 46 ++++++++++++++++++++++++++--------------
 1 file changed, 30 insertions(+), 16 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 79dff3b2dfb7..a04fea4d0063 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -63,6 +63,14 @@ static void to_talitos_ptr(struct talitos_ptr *ptr, dma_addr_t dma_addr,
 		ptr->eptr = upper_32_bits(dma_addr);
 }
 
+static void copy_talitos_ptr(struct talitos_ptr *dst_ptr,
+			     struct talitos_ptr *src_ptr, bool is_sec1)
+{
+	dst_ptr->ptr = src_ptr->ptr;
+	if (!is_sec1)
+		dst_ptr->eptr = src_ptr->eptr;
+}
+
 static void to_talitos_ptr_len(struct talitos_ptr *ptr, unsigned int len,
 			       bool is_sec1)
 {
@@ -1083,21 +1091,20 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 	sg_count = dma_map_sg(dev, areq->src, edesc->src_nents ?: 1,
 			      (areq->src == areq->dst) ? DMA_BIDIRECTIONAL
 							   : DMA_TO_DEVICE);
-
 	/* hmac data */
 	desc->ptr[1].len = cpu_to_be16(areq->assoclen);
 	if (sg_count > 1 &&
 	    (ret = sg_to_link_tbl_offset(areq->src, sg_count, 0,
 					 areq->assoclen,
 					 &edesc->link_tbl[tbl_off])) > 1) {
-		tbl_off += ret;
-
 		to_talitos_ptr(&desc->ptr[1], edesc->dma_link_tbl + tbl_off *
 			       sizeof(struct talitos_ptr), 0);
 		desc->ptr[1].j_extent = DESC_PTR_LNKTBL_JUMP;
 
 		dma_sync_single_for_device(dev, edesc->dma_link_tbl,
 					   edesc->dma_len, DMA_BIDIRECTIONAL);
+
+		tbl_off += ret;
 	} else {
 		to_talitos_ptr(&desc->ptr[1], sg_dma_address(areq->src), 0);
 		desc->ptr[1].j_extent = 0;
@@ -1126,11 +1133,13 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 	if (edesc->desc.hdr & DESC_HDR_MODE1_MDEU_CICV)
 		sg_link_tbl_len += authsize;
 
-	if (sg_count > 1 &&
-	    (ret = sg_to_link_tbl_offset(areq->src, sg_count, areq->assoclen,
-					 sg_link_tbl_len,
-					 &edesc->link_tbl[tbl_off])) > 1) {
-		tbl_off += ret;
+	if (sg_count == 1) {
+		to_talitos_ptr(&desc->ptr[4], sg_dma_address(areq->src) +
+			       areq->assoclen, 0);
+	} else if ((ret = sg_to_link_tbl_offset(areq->src, sg_count,
+						areq->assoclen, sg_link_tbl_len,
+						&edesc->link_tbl[tbl_off])) >
+		   1) {
 		desc->ptr[4].j_extent |= DESC_PTR_LNKTBL_JUMP;
 		to_talitos_ptr(&desc->ptr[4], edesc->dma_link_tbl +
 					      tbl_off *
@@ -1138,8 +1147,10 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 		dma_sync_single_for_device(dev, edesc->dma_link_tbl,
 					   edesc->dma_len,
 					   DMA_BIDIRECTIONAL);
-	} else
-		to_talitos_ptr(&desc->ptr[4], sg_dma_address(areq->src), 0);
+		tbl_off += ret;
+	} else {
+		copy_talitos_ptr(&desc->ptr[4], &edesc->link_tbl[tbl_off], 0);
+	}
 
 	/* cipher out */
 	desc->ptr[5].len = cpu_to_be16(cryptlen);
@@ -1151,11 +1162,13 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 
 	edesc->icv_ool = false;
 
-	if (sg_count > 1 &&
-	    (sg_count = sg_to_link_tbl_offset(areq->dst, sg_count,
+	if (sg_count == 1) {
+		to_talitos_ptr(&desc->ptr[5], sg_dma_address(areq->dst) +
+			       areq->assoclen, 0);
+	} else if ((sg_count =
+			sg_to_link_tbl_offset(areq->dst, sg_count,
 					      areq->assoclen, cryptlen,
-					      &edesc->link_tbl[tbl_off])) >
-	    1) {
+					      &edesc->link_tbl[tbl_off])) > 1) {
 		struct talitos_ptr *tbl_ptr = &edesc->link_tbl[tbl_off];
 
 		to_talitos_ptr(&desc->ptr[5], edesc->dma_link_tbl +
@@ -1178,8 +1191,9 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 					   edesc->dma_len, DMA_BIDIRECTIONAL);
 
 		edesc->icv_ool = true;
-	} else
-		to_talitos_ptr(&desc->ptr[5], sg_dma_address(areq->dst), 0);
+	} else {
+		copy_talitos_ptr(&desc->ptr[5], &edesc->link_tbl[tbl_off], 0);
+	}
 
 	/* iv out */
 	map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv,

From 53f3e26b3d09ae40318877b74e0d6c1af767a07f Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Fri, 15 Apr 2016 12:06:13 +1000
Subject: [PATCH 497/797] powerpc: scan_features() updates incorrect bits for
 REAL_LE

commit 6997e57d693b07289694239e52a10d2f02c3a46f upstream.

The REAL_LE feature entry in the ibm_pa_feature struct is missing an MMU
feature value, meaning all the remaining elements initialise the wrong
values.

This means instead of checking for byte 5, bit 0, we check for byte 0,
bit 0, and then we incorrectly set the CPU feature bit as well as MMU
feature bit 1 and CPU user feature bits 0 and 2 (5).

Checking byte 0 bit 0 (IBM numbering), means we're looking at the
"Memory Management Unit (MMU)" feature - ie. does the CPU have an MMU.
In practice that bit is set on all platforms which have the property.

This means we set CPU_FTR_REAL_LE always. In practice that seems not to
matter because all the modern cpus which have this property also
implement REAL_LE, and we've never needed to disable it.

We're also incorrectly setting MMU feature bit 1, which is:

  #define MMU_FTR_TYPE_8xx		0x00000002

Luckily the only place that looks for MMU_FTR_TYPE_8xx is in Book3E
code, which can't run on the same cpus as scan_features(). So this also
doesn't matter in practice.

Finally in the CPU user feature mask, we're setting bits 0 and 2. Bit 2
is not currently used, and bit 0 is:

  #define PPC_FEATURE_PPC_LE		0x00000001

Which says the CPU supports the old style "PPC Little Endian" mode.
Again this should be harmless in practice as no 64-bit CPUs implement
that mode.

Fix the code by adding the missing initialisation of the MMU feature.

Also add a comment marking CPU user feature bit 2 (0x4) as reserved. It
would be unsafe to start using it as old kernels incorrectly set it.

Fixes: 44ae3ab3358e ("powerpc: Free up some CPU feature bits by moving out MMU-related features")
Signed-off-by: Anton Blanchard <anton@samba.org>
[mpe: Flesh out changelog, add comment reserving 0x4]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/include/uapi/asm/cputable.h | 1 +
 arch/powerpc/kernel/prom.c               | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h
index 43686043e297..2734c005da21 100644
--- a/arch/powerpc/include/uapi/asm/cputable.h
+++ b/arch/powerpc/include/uapi/asm/cputable.h
@@ -31,6 +31,7 @@
 #define PPC_FEATURE_PSERIES_PERFMON_COMPAT \
 					0x00000040
 
+/* Reserved - do not use		0x00000004 */
 #define PPC_FEATURE_TRUE_LE		0x00000002
 #define PPC_FEATURE_PPC_LE		0x00000001
 
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 7030b035905d..080c96b44a7f 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -158,7 +158,7 @@ static struct ibm_pa_feature {
 	{CPU_FTR_NOEXECUTE, 0, 0,	0, 6, 0},
 	{CPU_FTR_NODSISRALIGN, 0, 0,	1, 1, 1},
 	{0, MMU_FTR_CI_LARGE_PAGE, 0,	1, 2, 0},
-	{CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 5, 0, 0},
+	{CPU_FTR_REAL_LE, 0, PPC_FEATURE_TRUE_LE, 5, 0, 0},
 	/*
 	 * If the kernel doesn't support TM (ie. CONFIG_PPC_TRANSACTIONAL_MEM=n),
 	 * we don't want to turn on CPU_FTR_TM here, so we use CPU_FTR_TM_COMP

From 08c9b94505bbe09ed42f658de2a4dbe274fa7468 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Fri, 15 Apr 2016 12:07:24 +1000
Subject: [PATCH 498/797] powerpc: Update cpu_user_features2 in scan_features()

commit beff82374b259d726e2625ec6c518a5f2613f0ae upstream.

scan_features() updates cpu_user_features but not cpu_user_features2.

Amongst other things, cpu_user_features2 contains the user TM feature
bits which we must keep in sync with the kernel TM feature bit.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/kernel/prom.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 080c96b44a7f..03fce77e441d 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -148,23 +148,24 @@ static struct ibm_pa_feature {
 	unsigned long	cpu_features;	/* CPU_FTR_xxx bit */
 	unsigned long	mmu_features;	/* MMU_FTR_xxx bit */
 	unsigned int	cpu_user_ftrs;	/* PPC_FEATURE_xxx bit */
+	unsigned int	cpu_user_ftrs2;	/* PPC_FEATURE2_xxx bit */
 	unsigned char	pabyte;		/* byte number in ibm,pa-features */
 	unsigned char	pabit;		/* bit number (big-endian) */
 	unsigned char	invert;		/* if 1, pa bit set => clear feature */
 } ibm_pa_features[] __initdata = {
-	{0, 0, PPC_FEATURE_HAS_MMU,	0, 0, 0},
-	{0, 0, PPC_FEATURE_HAS_FPU,	0, 1, 0},
-	{CPU_FTR_CTRL, 0, 0,		0, 3, 0},
-	{CPU_FTR_NOEXECUTE, 0, 0,	0, 6, 0},
-	{CPU_FTR_NODSISRALIGN, 0, 0,	1, 1, 1},
-	{0, MMU_FTR_CI_LARGE_PAGE, 0,	1, 2, 0},
-	{CPU_FTR_REAL_LE, 0, PPC_FEATURE_TRUE_LE, 5, 0, 0},
+	{0, 0, PPC_FEATURE_HAS_MMU, 0,		0, 0, 0},
+	{0, 0, PPC_FEATURE_HAS_FPU, 0,		0, 1, 0},
+	{CPU_FTR_CTRL, 0, 0, 0,			0, 3, 0},
+	{CPU_FTR_NOEXECUTE, 0, 0, 0,		0, 6, 0},
+	{CPU_FTR_NODSISRALIGN, 0, 0, 0,		1, 1, 1},
+	{0, MMU_FTR_CI_LARGE_PAGE, 0, 0,		1, 2, 0},
+	{CPU_FTR_REAL_LE, 0, PPC_FEATURE_TRUE_LE, 0, 5, 0, 0},
 	/*
 	 * If the kernel doesn't support TM (ie. CONFIG_PPC_TRANSACTIONAL_MEM=n),
 	 * we don't want to turn on CPU_FTR_TM here, so we use CPU_FTR_TM_COMP
 	 * which is 0 if the kernel doesn't support TM.
 	 */
-	{CPU_FTR_TM_COMP, 0, 0,		22, 0, 0},
+	{CPU_FTR_TM_COMP, 0, 0, 0,		22, 0, 0},
 };
 
 static void __init scan_features(unsigned long node, const unsigned char *ftrs,
@@ -195,10 +196,12 @@ static void __init scan_features(unsigned long node, const unsigned char *ftrs,
 		if (bit ^ fp->invert) {
 			cur_cpu_spec->cpu_features |= fp->cpu_features;
 			cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftrs;
+			cur_cpu_spec->cpu_user_features2 |= fp->cpu_user_ftrs2;
 			cur_cpu_spec->mmu_features |= fp->mmu_features;
 		} else {
 			cur_cpu_spec->cpu_features &= ~fp->cpu_features;
 			cur_cpu_spec->cpu_user_features &= ~fp->cpu_user_ftrs;
+			cur_cpu_spec->cpu_user_features2 &= ~fp->cpu_user_ftrs2;
 			cur_cpu_spec->mmu_features &= ~fp->mmu_features;
 		}
 	}

From c89c3225062d64c63532c127c374ea962f336e6b Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Fri, 15 Apr 2016 12:08:19 +1000
Subject: [PATCH 499/797] powerpc: Update TM user feature bits in
 scan_features()

commit 4705e02498d6d5a7ab98dfee9595cd5e91db2017 upstream.

We need to update the user TM feature bits (PPC_FEATURE2_HTM and
PPC_FEATURE2_HTM) to mirror what we do with the kernel TM feature
bit.

At the moment, if firmware reports TM is not available we turn off
the kernel TM feature bit but leave the userspace ones on. Userspace
thinks it can execute TM instructions and it dies trying.

This (together with a QEMU patch) fixes PR KVM, which doesn't currently
support TM.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/kernel/prom.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 03fce77e441d..a15fe1d4e84a 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -161,11 +161,12 @@ static struct ibm_pa_feature {
 	{0, MMU_FTR_CI_LARGE_PAGE, 0, 0,		1, 2, 0},
 	{CPU_FTR_REAL_LE, 0, PPC_FEATURE_TRUE_LE, 0, 5, 0, 0},
 	/*
-	 * If the kernel doesn't support TM (ie. CONFIG_PPC_TRANSACTIONAL_MEM=n),
-	 * we don't want to turn on CPU_FTR_TM here, so we use CPU_FTR_TM_COMP
-	 * which is 0 if the kernel doesn't support TM.
+	 * If the kernel doesn't support TM (ie CONFIG_PPC_TRANSACTIONAL_MEM=n),
+	 * we don't want to turn on TM here, so we use the *_COMP versions
+	 * which are 0 if the kernel doesn't support TM.
 	 */
-	{CPU_FTR_TM_COMP, 0, 0, 0,		22, 0, 0},
+	{CPU_FTR_TM_COMP, 0, 0,
+	 PPC_FEATURE2_HTM_COMP|PPC_FEATURE2_HTM_NOSC_COMP, 22, 0, 0},
 };
 
 static void __init scan_features(unsigned long node, const unsigned char *ftrs,

From 56b8eaa38b04f147a6b825a73a31b826b6051604 Mon Sep 17 00:00:00 2001
From: Dmitry Ivanov <dmitrijs.ivanovs@ubnt.com>
Date: Wed, 6 Apr 2016 17:23:18 +0300
Subject: [PATCH 500/797] nl80211: check netlink protocol in socket release
 notification

commit 8f815cdde3e550e10c2736990d791f60c2ce43eb upstream.

A non-privileged user can create a netlink socket with the same port_id as
used by an existing open nl80211 netlink socket (e.g. as used by a hostapd
process) with a different protocol number.

Closing this socket will then lead to the notification going to nl80211's
socket release notification handler, and possibly cause an action such as
removing a virtual interface.

Fix this issue by checking that the netlink protocol is NETLINK_GENERIC.
Since generic netlink has no notifier chain of its own, we can't fix the
problem more generically.

Fixes: 026331c4d9b5 ("cfg80211/mac80211: allow registering for and sending action frames")
Signed-off-by: Dmitry Ivanov <dima@ubnt.com>
[rewrite commit message]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/wireless/nl80211.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 75b0d23ee882..5d89f13a98db 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -13161,7 +13161,7 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
 	struct wireless_dev *wdev;
 	struct cfg80211_beacon_registration *reg, *tmp;
 
-	if (state != NETLINK_URELEASE)
+	if (state != NETLINK_URELEASE || notify->protocol != NETLINK_GENERIC)
 		return NOTIFY_DONE;
 
 	rcu_read_lock();

From 95415ac5786f483c7c69145ae644bc64c2240776 Mon Sep 17 00:00:00 2001
From: Dmitry Ivanov <dmitrijs.ivanovs@ubnt.com>
Date: Thu, 7 Apr 2016 09:31:38 +0200
Subject: [PATCH 501/797] netlink: don't send NETLINK_URELEASE for unbound
 sockets

commit e27260203912b40751fa353d009eaa5a642c739f upstream.

All existing users of NETLINK_URELEASE use it to clean up resources that
were previously allocated to a socket via some command. As a result, no
users require getting this notification for unbound sockets.

Sending it for unbound sockets, however, is a problem because any user
(including unprivileged users) can create a socket that uses the same ID
as an existing socket. Binding this new socket will fail, but if the
NETLINK_URELEASE notification is generated for such sockets, the users
thereof will be tricked into thinking the socket that they allocated the
resources for is closed.

In the nl80211 case, this will cause destruction of virtual interfaces
that still belong to an existing hostapd process; this is the case that
Dmitry noticed. In the NFC case, it will cause a poll abort. In the case
of netlink log/queue it will cause them to stop reporting events, as if
NFULNL_CFG_CMD_UNBIND/NFQNL_CFG_CMD_UNBIND had been called.

Fix this problem by checking that the socket is bound before generating
the NETLINK_URELEASE notification.

Signed-off-by: Dmitry Ivanov <dima@ubnt.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netlink/af_netlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 59651af8cc27..992b35fb8615 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1305,7 +1305,7 @@ static int netlink_release(struct socket *sock)
 
 	skb_queue_purge(&sk->sk_write_queue);
 
-	if (nlk->portid) {
+	if (nlk->portid && nlk->bound) {
 		struct netlink_notify n = {
 						.net = sock_net(sk),
 						.protocol = sk->sk_protocol,

From 197b6c5f0d976420c3eeacc7589ebc5869d2d70f Mon Sep 17 00:00:00 2001
From: Vladis Dronov <vdronov@redhat.com>
Date: Thu, 31 Mar 2016 10:53:42 -0700
Subject: [PATCH 502/797] Input: gtco - fix crash on detecting device without
 endpoints

commit 162f98dea487206d9ab79fc12ed64700667a894d upstream.

The gtco driver expects at least one valid endpoint. If given malicious
descriptors that specify 0 for the number of endpoints, it will crash in
the probe function. Ensure there is at least one endpoint on the interface
before using it.

Also let's fix a minor coding style issue.

The full correct report of this issue can be found in the public
Red Hat Bugzilla:

https://bugzilla.redhat.com/show_bug.cgi?id=1283385

Reported-by: Ralf Spenneberg <ralf@spenneberg.net>
Signed-off-by: Vladis Dronov <vdronov@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/tablet/gtco.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/input/tablet/gtco.c b/drivers/input/tablet/gtco.c
index 3a7f3a4a4396..7c18249d6c8e 100644
--- a/drivers/input/tablet/gtco.c
+++ b/drivers/input/tablet/gtco.c
@@ -858,6 +858,14 @@ static int gtco_probe(struct usb_interface *usbinterface,
 		goto err_free_buf;
 	}
 
+	/* Sanity check that a device has an endpoint */
+	if (usbinterface->altsetting[0].desc.bNumEndpoints < 1) {
+		dev_err(&usbinterface->dev,
+			"Invalid number of endpoints\n");
+		error = -EINVAL;
+		goto err_free_urb;
+	}
+
 	/*
 	 * The endpoint is always altsetting 0, we know this since we know
 	 * this device only has one interrupt endpoint
@@ -879,7 +887,7 @@ static int gtco_probe(struct usb_interface *usbinterface,
 	 * HID report descriptor
 	 */
 	if (usb_get_extra_descriptor(usbinterface->cur_altsetting,
-				     HID_DEVICE_TYPE, &hid_desc) != 0){
+				     HID_DEVICE_TYPE, &hid_desc) != 0) {
 		dev_err(&usbinterface->dev,
 			"Can't retrieve exta USB descriptor to get hid report descriptor length\n");
 		error = -EIO;

From 506788dafb7d27c31703991f0b5f7b87bd9a942c Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Sun, 17 Apr 2016 05:21:42 -0700
Subject: [PATCH 503/797] Input: pmic8xxx-pwrkey - fix algorithm for converting
 trigger delay

commit eda5ecc0a6b865561997e177c393f0b0136fe3b7 upstream.

The trigger delay algorithm that converts from microseconds to
the register value looks incorrect. According to most of the PMIC
documentation, the equation is

	delay (Seconds) = (1 / 1024) * 2 ^ (x + 4)

except for one case where the documentation looks to have a
formatting issue and the equation looks like

	delay (Seconds) = (1 / 1024) * 2 x + 4

Most likely this driver was written with the improper
documentation to begin with. According to the downstream sources
the valid delays are from 2 seconds to 1/64 second, and the
latter equation just doesn't make sense for that. Let's fix the
algorithm and the range check to match the documentation and the
downstream sources.

Reported-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Fixes: 92d57a73e410 ("input: Add support for Qualcomm PMIC8XXX power key")
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Tested-by: John Stultz <john.stultz@linaro.org>
Acked-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/misc/pmic8xxx-pwrkey.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/input/misc/pmic8xxx-pwrkey.c b/drivers/input/misc/pmic8xxx-pwrkey.c
index 3f02e0e03d12..67aab86048ad 100644
--- a/drivers/input/misc/pmic8xxx-pwrkey.c
+++ b/drivers/input/misc/pmic8xxx-pwrkey.c
@@ -353,7 +353,8 @@ static int pmic8xxx_pwrkey_probe(struct platform_device *pdev)
 	if (of_property_read_u32(pdev->dev.of_node, "debounce", &kpd_delay))
 		kpd_delay = 15625;
 
-	if (kpd_delay > 62500 || kpd_delay == 0) {
+	/* Valid range of pwr key trigger delay is 1/64 sec to 2 seconds. */
+	if (kpd_delay > USEC_PER_SEC * 2 || kpd_delay < USEC_PER_SEC / 64) {
 		dev_err(&pdev->dev, "invalid power key trigger delay\n");
 		return -EINVAL;
 	}
@@ -385,8 +386,8 @@ static int pmic8xxx_pwrkey_probe(struct platform_device *pdev)
 	pwr->name = "pmic8xxx_pwrkey";
 	pwr->phys = "pmic8xxx_pwrkey/input0";
 
-	delay = (kpd_delay << 10) / USEC_PER_SEC;
-	delay = 1 + ilog2(delay);
+	delay = (kpd_delay << 6) / USEC_PER_SEC;
+	delay = ilog2(delay);
 
 	err = regmap_read(regmap, PON_CNTL_1, &pon_cntl);
 	if (err < 0) {

From 4e8d89e8bb8828faf3c955fe9a50e1ae54918326 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 16 Feb 2016 16:03:23 +0100
Subject: [PATCH 504/797] xen kconfig: don't "select INPUT_XEN_KBDDEV_FRONTEND"

commit 13aa38e291bdd4e4018f40dd2f75e464814dcbf3 upstream.

The Xen framebuffer driver selects the xen keyboard driver, so the latter
will be built-in if XEN_FBDEV_FRONTEND=y. However, when CONFIG_INPUT
is a loadable module, this configuration cannot work. On mainline kernels,
the symbol will be enabled but not used, while in combination with
a patch I have to detect such useless configurations, we get the
expected link failure:

drivers/input/built-in.o: In function `xenkbd_remove':
xen-kbdfront.c:(.text+0x2f0): undefined reference to `input_unregister_device'
xen-kbdfront.c:(.text+0x30e): undefined reference to `input_unregister_device'

This removes the extra "select", as it just causes more trouble than
it helps. In theory, some defconfig file might break if it has
XEN_FBDEV_FRONTEND in it but not INPUT_XEN_KBDDEV_FRONTEND. The Kconfig
fragment we ship in the kernel (kernel/configs/xen.config) however
already enables both, and anyone using an old .config file would
keep having both enabled.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Suggested-by: David Vrabel <david.vrabel@citrix.com>
Fixes: 36c1132e34bd ("xen kconfig: fix select INPUT_XEN_KBDDEV_FRONTEND")
Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/video/fbdev/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index e6d16d65e4e6..f07a0974fda2 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -2249,7 +2249,6 @@ config XEN_FBDEV_FRONTEND
 	select FB_SYS_IMAGEBLIT
 	select FB_SYS_FOPS
 	select FB_DEFERRED_IO
-	select INPUT_XEN_KBDDEV_FRONTEND if INPUT_MISC
 	select XEN_XENBUS_FRONTEND
 	default y
 	help

From c7ce82609fda7214292998e3a38901d3944b6c16 Mon Sep 17 00:00:00 2001
From: Yingjoe Chen <yingjoe.chen@mediatek.com>
Date: Sat, 2 Apr 2016 14:57:49 +0800
Subject: [PATCH 505/797] pinctrl: mediatek: correct debounce time unit in
 mtk_gpio_set_debounce

commit 5fedbb923936174ab4d1d5cc92bca1cf6b2e0ca2 upstream.

The debounce time unit for gpio_chip.set_debounce is us but
mtk_gpio_set_debounce regard it as ms.
Fix this by correct debounce time array dbnc_arr so it can find correct
debounce setting. Debounce time for first debounce setting is 500us,
correct this as well.

While I'm at it, also change the debounce time array name to
"debounce_time" for readability.

Signed-off-by: Yingjoe Chen <yingjoe.chen@mediatek.com>
Reviewed-by: Daniel Kurtz <djkurtz@chromium.org>
Acked-by: Hongzhou Yang <hongzhou.yang@mediatek.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pinctrl/mediatek/pinctrl-mtk-common.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
index 5c717275a7fa..3d8019eb3d84 100644
--- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
+++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
@@ -939,7 +939,8 @@ static int mtk_gpio_set_debounce(struct gpio_chip *chip, unsigned offset,
 	struct mtk_pinctrl *pctl = dev_get_drvdata(chip->dev);
 	int eint_num, virq, eint_offset;
 	unsigned int set_offset, bit, clr_bit, clr_offset, rst, i, unmask, dbnc;
-	static const unsigned int dbnc_arr[] = {0 , 1, 16, 32, 64, 128, 256};
+	static const unsigned int debounce_time[] = {500, 1000, 16000, 32000, 64000,
+						128000, 256000};
 	const struct mtk_desc_pin *pin;
 	struct irq_data *d;
 
@@ -957,9 +958,9 @@ static int mtk_gpio_set_debounce(struct gpio_chip *chip, unsigned offset,
 	if (!mtk_eint_can_en_debounce(pctl, eint_num))
 		return -ENOSYS;
 
-	dbnc = ARRAY_SIZE(dbnc_arr);
-	for (i = 0; i < ARRAY_SIZE(dbnc_arr); i++) {
-		if (debounce <= dbnc_arr[i]) {
+	dbnc = ARRAY_SIZE(debounce_time);
+	for (i = 0; i < ARRAY_SIZE(debounce_time); i++) {
+		if (debounce <= debounce_time[i]) {
 			dbnc = i;
 			break;
 		}

From ee6a1e9eefed56308fcbd5619cb02b926b7ec630 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Thu, 14 Apr 2016 10:29:16 +0530
Subject: [PATCH 506/797] pinctrl: single: Fix pcs_parse_bits_in_pinctrl_entry
 to use __ffs than ffs

commit 56b367c0cd67d4c3006738e7dc9dda9273fd2bfe upstream.

pcs_parse_bits_in_pinctrl_entry uses ffs which gives bit indices
ranging from 1 to MAX. This leads to a corner case where we try to request
the pin number = MAX and fails.

bit_pos value is being calculted using ffs. pin_num_from_lsb uses
bit_pos value. pins array is populated with:

pin + pin_num_from_lsb.

The above is 1 more than usual bit indices as bit_pos uses ffs to compute
first set bit. Hence the last of the pins array is populated with the MAX
value and not MAX - 1 which causes error when we call pin_request.

mask_pos is rightly calculated as ((pcs->fmask) << (bit_pos - 1))
Consequently val_pos and submask are correct.

Hence use __ffs which gives (ffs(x) - 1) as the first bit set.

fixes: 4e7e8017a8 ("pinctrl: pinctrl-single: enhance to configure multiple pins of different modules")
Signed-off-by: Keerthy <j-keerthy@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pinctrl/pinctrl-single.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c
index ef04b962c3d5..23b6b8c29a99 100644
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -1273,9 +1273,9 @@ static int pcs_parse_bits_in_pinctrl_entry(struct pcs_device *pcs,
 
 		/* Parse pins in each row from LSB */
 		while (mask) {
-			bit_pos = ffs(mask);
+			bit_pos = __ffs(mask);
 			pin_num_from_lsb = bit_pos / pcs->bits_per_pin;
-			mask_pos = ((pcs->fmask) << (bit_pos - 1));
+			mask_pos = ((pcs->fmask) << bit_pos);
 			val_pos = val & mask_pos;
 			submask = mask & mask_pos;
 
@@ -1847,7 +1847,7 @@ static int pcs_probe(struct platform_device *pdev)
 	ret = of_property_read_u32(np, "pinctrl-single,function-mask",
 				   &pcs->fmask);
 	if (!ret) {
-		pcs->fshift = ffs(pcs->fmask) - 1;
+		pcs->fshift = __ffs(pcs->fmask);
 		pcs->fmax = pcs->fmask >> pcs->fshift;
 	} else {
 		/* If mask property doesn't exist, function mux is invalid. */

From 99067b8e854211316200b3d6375a664448c2fabd Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Fri, 8 Apr 2016 15:12:24 +0200
Subject: [PATCH 507/797] iommu/amd: Fix checking of pci dma aliases

commit e3156048346c28c695f5cf9db67a8cf88c90f947 upstream.

Commit 61289cb ('iommu/amd: Remove old alias handling code')
removed the old alias handling code from the AMD IOMMU
driver because this is now handled by the IOMMU core code.

But this also removed the handling of PCI aliases, which is
not handled by the core code. This caused issues with PCI
devices that have hidden PCIe-to-PCI bridges that rewrite
the request-id.

Fix this bug by re-introducing some of the removed functions
from commit 61289cbaf6c8 and add a alias field
'struct iommu_dev_data'. This field carrys the return value
of the get_alias() function and uses that instead of the
amd_iommu_alias_table[] array in the code.

Fixes: 61289cbaf6c8 ('iommu/amd: Remove old alias handling code')
Tested-by: Tomasz Golinski <tomaszg@math.uwb.edu.pl>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iommu/amd_iommu.c | 87 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 76 insertions(+), 11 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index fc836f523afa..b9319b76a8a1 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -91,6 +91,7 @@ struct iommu_dev_data {
 	struct list_head dev_data_list;	  /* For global dev_data_list */
 	struct protection_domain *domain; /* Domain the device is bound to */
 	u16 devid;			  /* PCI Device ID */
+	u16 alias;			  /* Alias Device ID */
 	bool iommu_v2;			  /* Device can make use of IOMMUv2 */
 	bool passthrough;		  /* Device is identity mapped */
 	struct {
@@ -125,6 +126,13 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom)
 	return container_of(dom, struct protection_domain, domain);
 }
 
+static inline u16 get_device_id(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	return PCI_DEVID(pdev->bus->number, pdev->devfn);
+}
+
 static struct iommu_dev_data *alloc_dev_data(u16 devid)
 {
 	struct iommu_dev_data *dev_data;
@@ -162,6 +170,68 @@ static struct iommu_dev_data *search_dev_data(u16 devid)
 	return dev_data;
 }
 
+static int __last_alias(struct pci_dev *pdev, u16 alias, void *data)
+{
+	*(u16 *)data = alias;
+	return 0;
+}
+
+static u16 get_alias(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	u16 devid, ivrs_alias, pci_alias;
+
+	devid = get_device_id(dev);
+	ivrs_alias = amd_iommu_alias_table[devid];
+	pci_for_each_dma_alias(pdev, __last_alias, &pci_alias);
+
+	if (ivrs_alias == pci_alias)
+		return ivrs_alias;
+
+	/*
+	 * DMA alias showdown
+	 *
+	 * The IVRS is fairly reliable in telling us about aliases, but it
+	 * can't know about every screwy device.  If we don't have an IVRS
+	 * reported alias, use the PCI reported alias.  In that case we may
+	 * still need to initialize the rlookup and dev_table entries if the
+	 * alias is to a non-existent device.
+	 */
+	if (ivrs_alias == devid) {
+		if (!amd_iommu_rlookup_table[pci_alias]) {
+			amd_iommu_rlookup_table[pci_alias] =
+				amd_iommu_rlookup_table[devid];
+			memcpy(amd_iommu_dev_table[pci_alias].data,
+			       amd_iommu_dev_table[devid].data,
+			       sizeof(amd_iommu_dev_table[pci_alias].data));
+		}
+
+		return pci_alias;
+	}
+
+	pr_info("AMD-Vi: Using IVRS reported alias %02x:%02x.%d "
+		"for device %s[%04x:%04x], kernel reported alias "
+		"%02x:%02x.%d\n", PCI_BUS_NUM(ivrs_alias), PCI_SLOT(ivrs_alias),
+		PCI_FUNC(ivrs_alias), dev_name(dev), pdev->vendor, pdev->device,
+		PCI_BUS_NUM(pci_alias), PCI_SLOT(pci_alias),
+		PCI_FUNC(pci_alias));
+
+	/*
+	 * If we don't have a PCI DMA alias and the IVRS alias is on the same
+	 * bus, then the IVRS table may know about a quirk that we don't.
+	 */
+	if (pci_alias == devid &&
+	    PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) {
+		pdev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN;
+		pdev->dma_alias_devfn = ivrs_alias & 0xff;
+		pr_info("AMD-Vi: Added PCI DMA alias %02x.%d for %s\n",
+			PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias),
+			dev_name(dev));
+	}
+
+	return ivrs_alias;
+}
+
 static struct iommu_dev_data *find_dev_data(u16 devid)
 {
 	struct iommu_dev_data *dev_data;
@@ -174,13 +244,6 @@ static struct iommu_dev_data *find_dev_data(u16 devid)
 	return dev_data;
 }
 
-static inline u16 get_device_id(struct device *dev)
-{
-	struct pci_dev *pdev = to_pci_dev(dev);
-
-	return PCI_DEVID(pdev->bus->number, pdev->devfn);
-}
-
 static struct iommu_dev_data *get_dev_data(struct device *dev)
 {
 	return dev->archdata.iommu;
@@ -308,6 +371,8 @@ static int iommu_init_device(struct device *dev)
 	if (!dev_data)
 		return -ENOMEM;
 
+	dev_data->alias = get_alias(dev);
+
 	if (pci_iommuv2_capable(pdev)) {
 		struct amd_iommu *iommu;
 
@@ -328,7 +393,7 @@ static void iommu_ignore_device(struct device *dev)
 	u16 devid, alias;
 
 	devid = get_device_id(dev);
-	alias = amd_iommu_alias_table[devid];
+	alias = get_alias(dev);
 
 	memset(&amd_iommu_dev_table[devid], 0, sizeof(struct dev_table_entry));
 	memset(&amd_iommu_dev_table[alias], 0, sizeof(struct dev_table_entry));
@@ -1017,7 +1082,7 @@ static int device_flush_dte(struct iommu_dev_data *dev_data)
 	int ret;
 
 	iommu = amd_iommu_rlookup_table[dev_data->devid];
-	alias = amd_iommu_alias_table[dev_data->devid];
+	alias = dev_data->alias;
 
 	ret = iommu_flush_dte(iommu, dev_data->devid);
 	if (!ret && alias != dev_data->devid)
@@ -1891,7 +1956,7 @@ static void do_attach(struct iommu_dev_data *dev_data,
 	bool ats;
 
 	iommu = amd_iommu_rlookup_table[dev_data->devid];
-	alias = amd_iommu_alias_table[dev_data->devid];
+	alias = dev_data->alias;
 	ats   = dev_data->ats.enabled;
 
 	/* Update data structures */
@@ -1925,7 +1990,7 @@ static void do_detach(struct iommu_dev_data *dev_data)
 		return;
 
 	iommu = amd_iommu_rlookup_table[dev_data->devid];
-	alias = amd_iommu_alias_table[dev_data->devid];
+	alias = dev_data->alias;
 
 	/* decrease reference counters */
 	dev_data->domain->dev_iommu[iommu->index] -= 1;

From e05cff2aa31766746f02c932e11b6b2ae357464c Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 10 Mar 2016 19:28:12 +0000
Subject: [PATCH 508/797] iommu/dma: Restore scatterlist offsets correctly

commit 07b48ac4bbe527e68cfc555f2b2b206908437141 upstream.

With the change to stashing just the IOVA-page-aligned remainder of the
CPU-page offset rather than the whole thing, the failure path in
__invalidate_sg() also needs tweaking to account for that in the case of
differing page sizes where the two offsets may not be equivalent.
Similarly in __finalise_sg(), lest the architecture-specific wrappers
later get the wrong address for cache maintenance on sync or unmap.

Fixes: 164afb1d85b8 ("iommu/dma: Use correct offset in map_sg")
Reported-by: Magnus Damm <damm+renesas@opensource.se>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iommu/dma-iommu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 72d6182666cb..58f2fe687a24 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -403,7 +403,7 @@ static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents,
 		unsigned int s_length = sg_dma_len(s);
 		unsigned int s_dma_len = s->length;
 
-		s->offset = s_offset;
+		s->offset += s_offset;
 		s->length = s_length;
 		sg_dma_address(s) = dma_addr + s_offset;
 		dma_addr += s_dma_len;
@@ -422,7 +422,7 @@ static void __invalidate_sg(struct scatterlist *sg, int nents)
 
 	for_each_sg(sg, s, nents, i) {
 		if (sg_dma_address(s) != DMA_ERROR_CODE)
-			s->offset = sg_dma_address(s);
+			s->offset += sg_dma_address(s);
 		if (sg_dma_len(s))
 			s->length = sg_dma_len(s);
 		sg_dma_address(s) = DMA_ERROR_CODE;

From b7cf6750c05ac80df28d1d66ecd949011f7e0d4b Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Tue, 12 Apr 2016 19:25:52 +0800
Subject: [PATCH 509/797] drm/amdgpu: when suspending, if uvd/vce was running.
 need to cancel delay work.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 85cc88f02eb0ecf44493c1b2ebb6f206cd5fc321 upstream.

fix the issue that when resume back, uvd/vce
dpm was disabled and uvd/vce's performace
dropped.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 1 +
 2 files changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 53f987aeeacf..0d016ce541c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -273,6 +273,8 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)
 	memcpy(adev->uvd.cpu_addr, (adev->uvd.fw->data) + offset,
 		(adev->uvd.fw->size) - offset);
 
+	cancel_delayed_work_sync(&adev->uvd.idle_work);
+
 	size = amdgpu_bo_size(adev->uvd.vcpu_bo);
 	size -= le32_to_cpu(hdr->ucode_size_bytes);
 	ptr = adev->uvd.cpu_addr;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index a745eeeb5d82..bb0da76051a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -220,6 +220,7 @@ int amdgpu_vce_suspend(struct amdgpu_device *adev)
 	if (i == AMDGPU_MAX_VCE_HANDLES)
 		return 0;
 
+	cancel_delayed_work_sync(&adev->vce.idle_work);
 	/* TODO: suspending running encoding sessions isn't supported */
 	return -EINVAL;
 }

From 57c17683f013be3aca2bf937516da9169e3b6727 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 18 Apr 2016 18:09:57 -0400
Subject: [PATCH 510/797] drm/amdgpu: use defines for CRTCs and AMFT blocks

commit 3ea25f858fd5aeee888059952bbb8e910541eebb upstream.

Prerequiste for the next patch which ups the limits.

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 064ebb347074..92f8b7bf7c64 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -308,8 +308,8 @@ struct amdgpu_mode_info {
 	struct atom_context *atom_context;
 	struct card_info *atom_card_info;
 	bool mode_config_initialized;
-	struct amdgpu_crtc *crtcs[6];
-	struct amdgpu_afmt *afmt[7];
+	struct amdgpu_crtc *crtcs[AMDGPU_MAX_CRTCS];
+	struct amdgpu_afmt *afmt[AMDGPU_MAX_AFMT_BLOCKS];
 	/* DVI-I properties */
 	struct drm_property *coherent_mode_property;
 	/* DAC enable load detect */

From 25d1be8d9fbc1a1a479483c29345f578687c478a Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 18 Apr 2016 18:25:34 -0400
Subject: [PATCH 511/797] drm/amdgpu: bump the afmt limit for CZ, ST, Polaris

commit 83c5cda2ccf40a7a7e4bb674321509b346e23d5a upstream.

Fixes array overflow on these chips.

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 92f8b7bf7c64..89df7871653d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -52,7 +52,7 @@ struct amdgpu_hpd;
 
 #define AMDGPU_MAX_HPD_PINS 6
 #define AMDGPU_MAX_CRTCS 6
-#define AMDGPU_MAX_AFMT_BLOCKS 7
+#define AMDGPU_MAX_AFMT_BLOCKS 9
 
 enum amdgpu_rmx_type {
 	RMX_OFF,

From fe98d45db9b84d436284ed156e4f5c2f78bb7999 Mon Sep 17 00:00:00 2001
From: Sonny Jiang <sonny.jiang@amd.com>
Date: Mon, 18 Apr 2016 16:05:04 -0400
Subject: [PATCH 512/797] amdgpu/uvd: add uvd fw version for amdgpu

commit 562e2689baebaa2ac25b7ec934385480ed1cb7d6 upstream.

Was previously always hardcoded to 0.

Signed-off-by: Sonny Jiang <sonny.jiang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 3 +++
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index bb1099c549df..053fc2f465df 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1673,6 +1673,7 @@ struct amdgpu_uvd {
 	struct amdgpu_bo	*vcpu_bo;
 	void			*cpu_addr;
 	uint64_t		gpu_addr;
+	unsigned		fw_version;
 	atomic_t		handles[AMDGPU_MAX_UVD_HANDLES];
 	struct drm_file		*filp[AMDGPU_MAX_UVD_HANDLES];
 	struct delayed_work	idle_work;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index e23843f4d877..4488e82f87b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -303,7 +303,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 			fw_info.feature = adev->vce.fb_version;
 			break;
 		case AMDGPU_INFO_FW_UVD:
-			fw_info.ver = 0;
+			fw_info.ver = adev->uvd.fw_version;
 			fw_info.feature = 0;
 			break;
 		case AMDGPU_INFO_FW_GMC:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 0d016ce541c2..3b35ad83867c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -156,6 +156,9 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 	DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n",
 		version_major, version_minor, family_id);
 
+	adev->uvd.fw_version = ((version_major << 24) | (version_minor << 16) |
+				(family_id << 8));
+
 	bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8)
 		 +  AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE;
 	r = amdgpu_bo_create(adev, bo_size, PAGE_SIZE, true,

From 038cf9c1977ed3bd71b107bbabe72cff8e5bb12f Mon Sep 17 00:00:00 2001
From: Grigori Goronzy <greg@chown.ath.cx>
Date: Tue, 22 Mar 2016 15:48:18 -0400
Subject: [PATCH 513/797] drm/amdgpu: fix regression on CIK (v2)

This fix was written against drm-next, but when it was
backported to 4.5 as a stable fix, the driver internal
structure change was missed.  Fix that up here to avoid
a hang due to waiting for the wrong sequence number.

v2: agd: fix up commit message

Signed-off-by: Grigori Goronzy <greg@chown.ath.cx>
Cc: stable@vger.kernel.org
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index aa491540ba85..946300764609 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -3628,7 +3628,7 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 					unsigned vm_id, uint64_t pd_addr)
 {
 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
-	uint32_t seq = ring->fence_drv.sync_seq;
+	uint32_t seq = ring->fence_drv.sync_seq[ring->idx];
 	uint64_t addr = ring->fence_drv.gpu_addr;
 
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));

From a37564fa5a495a9e3f59fb648315ad33085476cd Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 14 Apr 2016 14:15:16 -0400
Subject: [PATCH 514/797] drm/radeon: add a quirk for a XFX R9 270X

commit bcb31eba4a4ea356fd61cbd5dec5511c3883f57e upstream.

bug:
https://bugs.freedesktop.org/show_bug.cgi?id=76490

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/radeon/si_dpm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index 7285adb27099..caa73de584a5 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -2931,6 +2931,7 @@ static struct si_dpm_quirk si_dpm_quirk_list[] = {
 	{ PCI_VENDOR_ID_ATI, 0x6811, 0x1462, 0x2015, 0, 120000 },
 	{ PCI_VENDOR_ID_ATI, 0x6811, 0x1043, 0x2015, 0, 120000 },
 	{ PCI_VENDOR_ID_ATI, 0x6811, 0x148c, 0x2015, 0, 120000 },
+	{ PCI_VENDOR_ID_ATI, 0x6810, 0x1682, 0x9275, 0, 120000 },
 	{ 0, 0, 0, 0 },
 };
 

From 2ae4d4093977f2f29af5c92d6e0627eca3a97e20 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 13 Apr 2016 12:08:27 -0400
Subject: [PATCH 515/797] drm/radeon: fix initial connector audio value

commit 7403c515c49c033fec33df0814fffdc977e6acdc upstream.

This got lost somewhere along the way.  This fixes
audio not working until set_property was called.

Noticed-by: Hyungwon Hwang <hyungwon.hwang7@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/radeon/radeon_connectors.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 340f3f549f29..9cfc1c3e1965 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -1996,10 +1996,12 @@ radeon_add_atom_connector(struct drm_device *dev,
 						   rdev->mode_info.dither_property,
 						   RADEON_FMT_DITHER_DISABLE);
 
-			if (radeon_audio != 0)
+			if (radeon_audio != 0) {
 				drm_object_attach_property(&radeon_connector->base.base,
 							   rdev->mode_info.audio_property,
 							   RADEON_AUDIO_AUTO);
+				radeon_connector->audio = RADEON_AUDIO_AUTO;
+			}
 			if (ASIC_IS_DCE5(rdev))
 				drm_object_attach_property(&radeon_connector->base.base,
 							   rdev->mode_info.output_csc_property,
@@ -2124,6 +2126,7 @@ radeon_add_atom_connector(struct drm_device *dev,
 				drm_object_attach_property(&radeon_connector->base.base,
 							   rdev->mode_info.audio_property,
 							   RADEON_AUDIO_AUTO);
+				radeon_connector->audio = RADEON_AUDIO_AUTO;
 			}
 			if (connector_type == DRM_MODE_CONNECTOR_DVII) {
 				radeon_connector->dac_load_detect = true;
@@ -2179,6 +2182,7 @@ radeon_add_atom_connector(struct drm_device *dev,
 				drm_object_attach_property(&radeon_connector->base.base,
 							   rdev->mode_info.audio_property,
 							   RADEON_AUDIO_AUTO);
+				radeon_connector->audio = RADEON_AUDIO_AUTO;
 			}
 			if (ASIC_IS_DCE5(rdev))
 				drm_object_attach_property(&radeon_connector->base.base,
@@ -2231,6 +2235,7 @@ radeon_add_atom_connector(struct drm_device *dev,
 				drm_object_attach_property(&radeon_connector->base.base,
 							   rdev->mode_info.audio_property,
 							   RADEON_AUDIO_AUTO);
+				radeon_connector->audio = RADEON_AUDIO_AUTO;
 			}
 			if (ASIC_IS_DCE5(rdev))
 				drm_object_attach_property(&radeon_connector->base.base,

From 7946284184695f2ba338b5c8c45a40f0b732fb2a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= <jglisse@redhat.com>
Date: Tue, 19 Apr 2016 09:07:50 -0400
Subject: [PATCH 516/797] drm/radeon: forbid mapping of userptr bo through
 radeon device file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit b5dcec693f87cb8475f2291c0075b2422addd3d6 upstream.

Allowing userptr bo which are basicly a list of page from some vma
(so either anonymous page or file backed page) would lead to serious
corruption of kernel structures and counters (because we overwrite
the page->mapping field when mapping buffer).

This will already block if the buffer was populated before anyone does
try to mmap it because then TTM_PAGE_FLAG_SG would be set in in the
ttm_tt flags. But that flag is check before ttm_tt_populate in the ttm
vm fault handler.

So to be safe just add a check to verify_access() callback.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/radeon/radeon_ttm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index e06ac546a90f..f342aad79cc6 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -235,6 +235,8 @@ static int radeon_verify_access(struct ttm_buffer_object *bo, struct file *filp)
 {
 	struct radeon_bo *rbo = container_of(bo, struct radeon_bo, tbo);
 
+	if (radeon_ttm_tt_has_userptr(bo->ttm))
+		return -EPERM;
 	return drm_vma_node_verify_access(&rbo->gem_base.vma_node, filp);
 }
 

From 4b6b0008f9dd3f18ac1d42c28e25dc09715ebc66 Mon Sep 17 00:00:00 2001
From: Vitaly Prosyak <vitaly.prosyak@amd.com>
Date: Thu, 14 Apr 2016 13:34:03 -0400
Subject: [PATCH 517/797] drm/radeon: fix vertical bars appear on monitor (v2)

commit 5d5b7803c49bbb01bdf4c6e95e8314d0515b9484 upstream.

When crtc/timing is disabled on boot the dig block
should be stopped in order ignore timing from crtc,
reset the steering fifo otherwise we get display
corruption or hung in dp sst mode.

v2: agd: fix coding style

Signed-off-by: Vitaly Prosyak <vitaly.prosyak@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/radeon/evergreen.c     | 154 ++++++++++++++++++++++++-
 drivers/gpu/drm/radeon/evergreen_reg.h |  46 ++++++++
 2 files changed, 199 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 2ad462896896..32491355a1d4 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -2608,10 +2608,152 @@ static void evergreen_agp_enable(struct radeon_device *rdev)
 	WREG32(VM_CONTEXT1_CNTL, 0);
 }
 
+static const unsigned ni_dig_offsets[] =
+{
+	NI_DIG0_REGISTER_OFFSET,
+	NI_DIG1_REGISTER_OFFSET,
+	NI_DIG2_REGISTER_OFFSET,
+	NI_DIG3_REGISTER_OFFSET,
+	NI_DIG4_REGISTER_OFFSET,
+	NI_DIG5_REGISTER_OFFSET
+};
+
+static const unsigned ni_tx_offsets[] =
+{
+	NI_DCIO_UNIPHY0_UNIPHY_TX_CONTROL1,
+	NI_DCIO_UNIPHY1_UNIPHY_TX_CONTROL1,
+	NI_DCIO_UNIPHY2_UNIPHY_TX_CONTROL1,
+	NI_DCIO_UNIPHY3_UNIPHY_TX_CONTROL1,
+	NI_DCIO_UNIPHY4_UNIPHY_TX_CONTROL1,
+	NI_DCIO_UNIPHY5_UNIPHY_TX_CONTROL1
+};
+
+static const unsigned evergreen_dp_offsets[] =
+{
+	EVERGREEN_DP0_REGISTER_OFFSET,
+	EVERGREEN_DP1_REGISTER_OFFSET,
+	EVERGREEN_DP2_REGISTER_OFFSET,
+	EVERGREEN_DP3_REGISTER_OFFSET,
+	EVERGREEN_DP4_REGISTER_OFFSET,
+	EVERGREEN_DP5_REGISTER_OFFSET
+};
+
+
+/*
+ * Assumption is that EVERGREEN_CRTC_MASTER_EN enable for requested crtc
+ * We go from crtc to connector and it is not relible  since it
+ * should be an opposite direction .If crtc is enable then
+ * find the dig_fe which selects this crtc and insure that it enable.
+ * if such dig_fe is found then find dig_be which selects found dig_be and
+ * insure that it enable and in DP_SST mode.
+ * if UNIPHY_PLL_CONTROL1.enable then we should disconnect timing
+ * from dp symbols clocks .
+ */
+static bool evergreen_is_dp_sst_stream_enabled(struct radeon_device *rdev,
+					       unsigned crtc_id, unsigned *ret_dig_fe)
+{
+	unsigned i;
+	unsigned dig_fe;
+	unsigned dig_be;
+	unsigned dig_en_be;
+	unsigned uniphy_pll;
+	unsigned digs_fe_selected;
+	unsigned dig_be_mode;
+	unsigned dig_fe_mask;
+	bool is_enabled = false;
+	bool found_crtc = false;
+
+	/* loop through all running dig_fe to find selected crtc */
+	for (i = 0; i < ARRAY_SIZE(ni_dig_offsets); i++) {
+		dig_fe = RREG32(NI_DIG_FE_CNTL + ni_dig_offsets[i]);
+		if (dig_fe & NI_DIG_FE_CNTL_SYMCLK_FE_ON &&
+		    crtc_id == NI_DIG_FE_CNTL_SOURCE_SELECT(dig_fe)) {
+			/* found running pipe */
+			found_crtc = true;
+			dig_fe_mask = 1 << i;
+			dig_fe = i;
+			break;
+		}
+	}
+
+	if (found_crtc) {
+		/* loop through all running dig_be to find selected dig_fe */
+		for (i = 0; i < ARRAY_SIZE(ni_dig_offsets); i++) {
+			dig_be = RREG32(NI_DIG_BE_CNTL + ni_dig_offsets[i]);
+			/* if dig_fe_selected by dig_be? */
+			digs_fe_selected = NI_DIG_BE_CNTL_FE_SOURCE_SELECT(dig_be);
+			dig_be_mode = NI_DIG_FE_CNTL_MODE(dig_be);
+			if (dig_fe_mask &  digs_fe_selected &&
+			    /* if dig_be in sst mode? */
+			    dig_be_mode == NI_DIG_BE_DPSST) {
+				dig_en_be = RREG32(NI_DIG_BE_EN_CNTL +
+						   ni_dig_offsets[i]);
+				uniphy_pll = RREG32(NI_DCIO_UNIPHY0_PLL_CONTROL1 +
+						    ni_tx_offsets[i]);
+				/* dig_be enable and tx is running */
+				if (dig_en_be & NI_DIG_BE_EN_CNTL_ENABLE &&
+				    dig_en_be & NI_DIG_BE_EN_CNTL_SYMBCLK_ON &&
+				    uniphy_pll & NI_DCIO_UNIPHY0_PLL_CONTROL1_ENABLE) {
+					is_enabled = true;
+					*ret_dig_fe = dig_fe;
+					break;
+				}
+			}
+		}
+	}
+
+	return is_enabled;
+}
+
+/*
+ * Blank dig when in dp sst mode
+ * Dig ignores crtc timing
+ */
+static void evergreen_blank_dp_output(struct radeon_device *rdev,
+				      unsigned dig_fe)
+{
+	unsigned stream_ctrl;
+	unsigned fifo_ctrl;
+	unsigned counter = 0;
+
+	if (dig_fe >= ARRAY_SIZE(evergreen_dp_offsets)) {
+		DRM_ERROR("invalid dig_fe %d\n", dig_fe);
+		return;
+	}
+
+	stream_ctrl = RREG32(EVERGREEN_DP_VID_STREAM_CNTL +
+			     evergreen_dp_offsets[dig_fe]);
+	if (!(stream_ctrl & EVERGREEN_DP_VID_STREAM_CNTL_ENABLE)) {
+		DRM_ERROR("dig %d , should be enable\n", dig_fe);
+		return;
+	}
+
+	stream_ctrl &=~EVERGREEN_DP_VID_STREAM_CNTL_ENABLE;
+	WREG32(EVERGREEN_DP_VID_STREAM_CNTL +
+	       evergreen_dp_offsets[dig_fe], stream_ctrl);
+
+	stream_ctrl = RREG32(EVERGREEN_DP_VID_STREAM_CNTL +
+			     evergreen_dp_offsets[dig_fe]);
+	while (counter < 32 && stream_ctrl & EVERGREEN_DP_VID_STREAM_STATUS) {
+		msleep(1);
+		counter++;
+		stream_ctrl = RREG32(EVERGREEN_DP_VID_STREAM_CNTL +
+				     evergreen_dp_offsets[dig_fe]);
+	}
+	if (counter >= 32 )
+		DRM_ERROR("counter exceeds %d\n", counter);
+
+	fifo_ctrl = RREG32(EVERGREEN_DP_STEER_FIFO + evergreen_dp_offsets[dig_fe]);
+	fifo_ctrl |= EVERGREEN_DP_STEER_FIFO_RESET;
+	WREG32(EVERGREEN_DP_STEER_FIFO + evergreen_dp_offsets[dig_fe], fifo_ctrl);
+
+}
+
 void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save)
 {
 	u32 crtc_enabled, tmp, frame_count, blackout;
 	int i, j;
+	unsigned dig_fe;
 
 	if (!ASIC_IS_NODCE(rdev)) {
 		save->vga_render_control = RREG32(VGA_RENDER_CONTROL);
@@ -2651,7 +2793,17 @@ void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *sav
 					break;
 				udelay(1);
 			}
-
+			/*we should disable dig if it drives dp sst*/
+			/*but we are in radeon_device_init and the topology is unknown*/
+			/*and it is available after radeon_modeset_init*/
+			/*the following method radeon_atom_encoder_dpms_dig*/
+			/*does the job if we initialize it properly*/
+			/*for now we do it this manually*/
+			/**/
+			if (ASIC_IS_DCE5(rdev) &&
+			    evergreen_is_dp_sst_stream_enabled(rdev, i ,&dig_fe))
+				evergreen_blank_dp_output(rdev, dig_fe);
+			/*we could remove 6 lines below*/
 			/* XXX this is a hack to avoid strange behavior with EFI on certain systems */
 			WREG32(EVERGREEN_CRTC_UPDATE_LOCK + crtc_offsets[i], 1);
 			tmp = RREG32(EVERGREEN_CRTC_CONTROL + crtc_offsets[i]);
diff --git a/drivers/gpu/drm/radeon/evergreen_reg.h b/drivers/gpu/drm/radeon/evergreen_reg.h
index aa939dfed3a3..b436badf9efa 100644
--- a/drivers/gpu/drm/radeon/evergreen_reg.h
+++ b/drivers/gpu/drm/radeon/evergreen_reg.h
@@ -250,8 +250,43 @@
 
 /* HDMI blocks at 0x7030, 0x7c30, 0x10830, 0x11430, 0x12030, 0x12c30 */
 #define EVERGREEN_HDMI_BASE				0x7030
+/*DIG block*/
+#define NI_DIG0_REGISTER_OFFSET                 (0x7000  - 0x7000)
+#define NI_DIG1_REGISTER_OFFSET                 (0x7C00  - 0x7000)
+#define NI_DIG2_REGISTER_OFFSET                 (0x10800 - 0x7000)
+#define NI_DIG3_REGISTER_OFFSET                 (0x11400 - 0x7000)
+#define NI_DIG4_REGISTER_OFFSET                 (0x12000 - 0x7000)
+#define NI_DIG5_REGISTER_OFFSET                 (0x12C00 - 0x7000)
+
+
+#define NI_DIG_FE_CNTL                               0x7000
+#       define NI_DIG_FE_CNTL_SOURCE_SELECT(x)        ((x) & 0x3)
+#       define NI_DIG_FE_CNTL_SYMCLK_FE_ON            (1<<24)
+
+
+#define NI_DIG_BE_CNTL                    0x7140
+#       define NI_DIG_BE_CNTL_FE_SOURCE_SELECT(x)     (((x) >> 8 ) & 0x3F)
+#       define NI_DIG_FE_CNTL_MODE(x)                 (((x) >> 16) & 0x7 )
+
+#define NI_DIG_BE_EN_CNTL                              0x7144
+#       define NI_DIG_BE_EN_CNTL_ENABLE               (1 << 0)
+#       define NI_DIG_BE_EN_CNTL_SYMBCLK_ON           (1 << 8)
+#       define NI_DIG_BE_DPSST 0
 
 /* Display Port block */
+#define EVERGREEN_DP0_REGISTER_OFFSET                 (0x730C  - 0x730C)
+#define EVERGREEN_DP1_REGISTER_OFFSET                 (0x7F0C  - 0x730C)
+#define EVERGREEN_DP2_REGISTER_OFFSET                 (0x10B0C - 0x730C)
+#define EVERGREEN_DP3_REGISTER_OFFSET                 (0x1170C - 0x730C)
+#define EVERGREEN_DP4_REGISTER_OFFSET                 (0x1230C - 0x730C)
+#define EVERGREEN_DP5_REGISTER_OFFSET                 (0x12F0C - 0x730C)
+
+
+#define EVERGREEN_DP_VID_STREAM_CNTL                    0x730C
+#       define EVERGREEN_DP_VID_STREAM_CNTL_ENABLE     (1 << 0)
+#       define EVERGREEN_DP_VID_STREAM_STATUS          (1 <<16)
+#define EVERGREEN_DP_STEER_FIFO                         0x7310
+#       define EVERGREEN_DP_STEER_FIFO_RESET           (1 << 0)
 #define EVERGREEN_DP_SEC_CNTL                           0x7280
 #       define EVERGREEN_DP_SEC_STREAM_ENABLE           (1 << 0)
 #       define EVERGREEN_DP_SEC_ASP_ENABLE              (1 << 4)
@@ -266,4 +301,15 @@
 #       define EVERGREEN_DP_SEC_N_BASE_MULTIPLE(x)      (((x) & 0xf) << 24)
 #       define EVERGREEN_DP_SEC_SS_EN                   (1 << 28)
 
+/*DCIO_UNIPHY block*/
+#define NI_DCIO_UNIPHY0_UNIPHY_TX_CONTROL1            (0x6600  -0x6600)
+#define NI_DCIO_UNIPHY1_UNIPHY_TX_CONTROL1            (0x6640  -0x6600)
+#define NI_DCIO_UNIPHY2_UNIPHY_TX_CONTROL1            (0x6680 - 0x6600)
+#define NI_DCIO_UNIPHY3_UNIPHY_TX_CONTROL1            (0x66C0 - 0x6600)
+#define NI_DCIO_UNIPHY4_UNIPHY_TX_CONTROL1            (0x6700 - 0x6600)
+#define NI_DCIO_UNIPHY5_UNIPHY_TX_CONTROL1            (0x6740 - 0x6600)
+
+#define NI_DCIO_UNIPHY0_PLL_CONTROL1                   0x6618
+#       define NI_DCIO_UNIPHY0_PLL_CONTROL1_ENABLE     (1 << 0)
+
 #endif

From 7973c7c36e96d9c2afda1df9e4f4c2518cbe6588 Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Tue, 19 Apr 2016 19:19:11 +0800
Subject: [PATCH 518/797] drm: Loongson-3 doesn't fully support wc memory

commit 221004c66a58949a0f25c937a6789c0839feb530 upstream.

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/drm/drm_cache.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h
index 461a0558bca4..cebecff536a3 100644
--- a/include/drm/drm_cache.h
+++ b/include/drm/drm_cache.h
@@ -39,6 +39,8 @@ static inline bool drm_arch_can_wc_memory(void)
 {
 #if defined(CONFIG_PPC) && !defined(CONFIG_NOT_COHERENT_CACHE)
 	return false;
+#elif defined(CONFIG_MIPS) && defined(CONFIG_CPU_LOONGSON3)
+	return false;
 #else
 	return true;
 #endif

From 20a32ec7ae6c46768d91c61d99228f13c2a7912b Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Fri, 22 Apr 2016 10:05:21 +1000
Subject: [PATCH 519/797] drm/nouveau/gr/gf100: select a stream master to fixup
 tfb offset queries

commit 28dca90533750c7e31e8641c3df426bad9c12941 upstream.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
index 9f5dfc85147a..36655a74c538 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -1717,6 +1717,8 @@ gf100_gr_init(struct gf100_gr *gr)
 
 	gf100_gr_mmio(gr, gr->func->mmio);
 
+	nvkm_mask(device, TPC_UNIT(0, 0, 0x05c), 0x00000001, 0x00000001);
+
 	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
 	for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
 		do {

From e51d7655d3dcac3ea2185fec178872b11b9f03be Mon Sep 17 00:00:00 2001
From: "cpaul@redhat.com" <cpaul@redhat.com>
Date: Mon, 4 Apr 2016 19:58:47 -0400
Subject: [PATCH 520/797] drm/dp/mst: Validate port in
 drm_dp_payload_send_msg()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit deba0a2af9592b2022a0bce7b085a318b53ce1db upstream.

With the joys of things running concurrently, there's always a chance
that the port we get passed in drm_dp_payload_send_msg() isn't actually
valid anymore. Because of this, we need to make sure we validate the
reference to the port before we use it otherwise we risk running into
various race conditions. For instance, on the Dell MST monitor I have
here for testing, hotplugging it enough times causes us to kernel panic:

[drm:intel_mst_enable_dp] 1
[drm:drm_dp_update_payload_part2] payload 0 1
[drm:intel_get_hpd_pins] hotplug event received, stat 0x00200000, dig 0x10101011, pins 0x00000020
[drm:intel_hpd_irq_handler] digital hpd port B - short
[drm:intel_dp_hpd_pulse] got hpd irq on port B - short
[drm:intel_dp_check_mst_status] got esi 00 10 00
[drm:drm_dp_update_payload_part2] payload 1 1
general protection fault: 0000 [#1] SMP
…
Call Trace:
 [<ffffffffa012b632>] drm_dp_update_payload_part2+0xc2/0x130 [drm_kms_helper]
 [<ffffffffa032ef08>] intel_mst_enable_dp+0xf8/0x180 [i915]
 [<ffffffffa0310dbd>] haswell_crtc_enable+0x3ed/0x8c0 [i915]
 [<ffffffffa030c84d>] intel_atomic_commit+0x5ad/0x1590 [i915]
 [<ffffffffa01db877>] ? drm_atomic_set_crtc_for_connector+0x57/0xe0 [drm]
 [<ffffffffa01dc4e7>] drm_atomic_commit+0x37/0x60 [drm]
 [<ffffffffa0130a3a>] drm_atomic_helper_set_config+0x7a/0xb0 [drm_kms_helper]
 [<ffffffffa01cc482>] drm_mode_set_config_internal+0x62/0x100 [drm]
 [<ffffffffa01d02ad>] drm_mode_setcrtc+0x3cd/0x4e0 [drm]
 [<ffffffffa01c18e3>] drm_ioctl+0x143/0x510 [drm]
 [<ffffffffa01cfee0>] ? drm_mode_setplane+0x1b0/0x1b0 [drm]
 [<ffffffff810f79a7>] ? hrtimer_start_range_ns+0x1b7/0x3a0
 [<ffffffff81212962>] do_vfs_ioctl+0x92/0x570
 [<ffffffff81590852>] ? __sys_recvmsg+0x42/0x80
 [<ffffffff81212eb9>] SyS_ioctl+0x79/0x90
 [<ffffffff816b4e32>] entry_SYSCALL_64_fastpath+0x1a/0xa4
RIP  [<ffffffffa012b026>] drm_dp_payload_send_msg+0x146/0x1f0 [drm_kms_helper]

Which occurs because of the hotplug event shown in the log, which ends
up causing DRM's dp helpers to drop the port we're updating the payload
on and panic.

Signed-off-by: Lyude <cpaul@redhat.com>
Reviewed-by: David Airlie <airlied@linux.ie>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/drm_dp_mst_topology.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index 39d7e2e15c11..a4a3de372b69 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -1665,13 +1665,19 @@ static int drm_dp_payload_send_msg(struct drm_dp_mst_topology_mgr *mgr,
 	struct drm_dp_mst_branch *mstb;
 	int len, ret, port_num;
 
+	port = drm_dp_get_validated_port_ref(mgr, port);
+	if (!port)
+		return -EINVAL;
+
 	port_num = port->port_num;
 	mstb = drm_dp_get_validated_mstb_ref(mgr, port->parent);
 	if (!mstb) {
 		mstb = drm_dp_get_last_connected_port_and_mstb(mgr, port->parent, &port_num);
 
-		if (!mstb)
+		if (!mstb) {
+			drm_dp_put_port(port);
 			return -EINVAL;
+		}
 	}
 
 	txmsg = kzalloc(sizeof(*txmsg), GFP_KERNEL);
@@ -1697,6 +1703,7 @@ static int drm_dp_payload_send_msg(struct drm_dp_mst_topology_mgr *mgr,
 	kfree(txmsg);
 fail_put:
 	drm_dp_put_mst_branch_device(mstb);
+	drm_dp_put_port(port);
 	return ret;
 }
 

From 3ae01ae65df95a372451e476725ce278bec8787c Mon Sep 17 00:00:00 2001
From: Lyude <cpaul@redhat.com>
Date: Wed, 13 Apr 2016 16:50:18 -0400
Subject: [PATCH 521/797] drm/dp/mst: Restore primary hub guid on resume

commit 9dc0487d96a0396367a1451b31873482080b527f upstream.

Some hubs are forgetful, and end up forgetting whatever GUID we set
previously after we do a suspend/resume cycle. This can lead to
hotplugging breaking (along with probably other things) since the hub
will start sending connection notifications with the wrong GUID. As
such, we need to check on resume whether or not the GUID the hub is
giving us is valid.

Signed-off-by: Lyude <cpaul@redhat.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/1460580618-7421-1-git-send-email-cpaul@redhat.com
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/drm_dp_mst_topology.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index a4a3de372b69..04350dfa4959 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -2116,6 +2116,8 @@ int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr)
 
 	if (mgr->mst_primary) {
 		int sret;
+		u8 guid[16];
+
 		sret = drm_dp_dpcd_read(mgr->aux, DP_DPCD_REV, mgr->dpcd, DP_RECEIVER_CAP_SIZE);
 		if (sret != DP_RECEIVER_CAP_SIZE) {
 			DRM_DEBUG_KMS("dpcd read failed - undocked during suspend?\n");
@@ -2130,6 +2132,16 @@ int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr)
 			ret = -1;
 			goto out_unlock;
 		}
+
+		/* Some hubs forget their guids after they resume */
+		sret = drm_dp_dpcd_read(mgr->aux, DP_GUID, guid, 16);
+		if (sret != 16) {
+			DRM_DEBUG_KMS("dpcd read failed - undocked during suspend?\n");
+			ret = -1;
+			goto out_unlock;
+		}
+		drm_dp_check_mstb_guid(mgr->mst_primary, guid);
+
 		ret = 0;
 	} else
 		ret = -1;

From 385af1d58254412e42d06b19e3cbe60b55cf34a6 Mon Sep 17 00:00:00 2001
From: "cpaul@redhat.com" <cpaul@redhat.com>
Date: Fri, 22 Apr 2016 16:08:46 -0400
Subject: [PATCH 522/797] drm/dp/mst: Get validated port ref in
 drm_dp_update_payload_part1()

commit 263efde31f97c498e1ebad30e4d2906609d7ad6b upstream.

We can thank KASAN for finding this, otherwise I probably would have spent
hours on it. This fixes a somewhat harder to trigger kernel panic, occuring
while enabling MST where the port we were currently updating the payload on
would have all of it's refs dropped before we finished what we were doing:

==================================================================
BUG: KASAN: use-after-free in drm_dp_update_payload_part1+0xb3f/0xdb0 [drm_kms_helper] at addr ffff8800d29de018
Read of size 4 by task Xorg/973
=============================================================================
BUG kmalloc-2048 (Tainted: G    B   W      ): kasan: bad access detected
-----------------------------------------------------------------------------

INFO: Allocated in drm_dp_add_port+0x1aa/0x1ed0 [drm_kms_helper] age=16477 cpu=0 pid=2175
	___slab_alloc+0x472/0x490
	__slab_alloc+0x20/0x40
	kmem_cache_alloc_trace+0x151/0x190
	drm_dp_add_port+0x1aa/0x1ed0 [drm_kms_helper]
	drm_dp_send_link_address+0x526/0x960 [drm_kms_helper]
	drm_dp_check_and_send_link_address+0x1ac/0x210 [drm_kms_helper]
	drm_dp_mst_link_probe_work+0x77/0xd0 [drm_kms_helper]
	process_one_work+0x562/0x1350
	worker_thread+0xd9/0x1390
	kthread+0x1c5/0x260
	ret_from_fork+0x22/0x40
INFO: Freed in drm_dp_free_mst_port+0x50/0x60 [drm_kms_helper] age=7521 cpu=0 pid=2175
	__slab_free+0x17f/0x2d0
	kfree+0x169/0x180
	drm_dp_free_mst_port+0x50/0x60 [drm_kms_helper]
	drm_dp_destroy_connector_work+0x2b8/0x490 [drm_kms_helper]
	process_one_work+0x562/0x1350
	worker_thread+0xd9/0x1390
	kthread+0x1c5/0x260
	ret_from_fork+0x22/0x40

which on this T460s, would eventually lead to kernel panics in somewhat
random places later in intel_mst_enable_dp() if we got lucky enough.

Signed-off-by: Lyude <cpaul@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/drm_dp_mst_topology.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index 04350dfa4959..d268bf18a662 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -1786,6 +1786,11 @@ int drm_dp_update_payload_part1(struct drm_dp_mst_topology_mgr *mgr)
 		req_payload.start_slot = cur_slots;
 		if (mgr->proposed_vcpis[i]) {
 			port = container_of(mgr->proposed_vcpis[i], struct drm_dp_mst_port, vcpi);
+			port = drm_dp_get_validated_port_ref(mgr, port);
+			if (!port) {
+				mutex_unlock(&mgr->payload_lock);
+				return -EINVAL;
+			}
 			req_payload.num_slots = mgr->proposed_vcpis[i]->num_slots;
 		} else {
 			port = NULL;
@@ -1811,6 +1816,9 @@ int drm_dp_update_payload_part1(struct drm_dp_mst_topology_mgr *mgr)
 			mgr->payloads[i].payload_state = req_payload.payload_state;
 		}
 		cur_slots += req_payload.num_slots;
+
+		if (port)
+			drm_dp_put_port(port);
 	}
 
 	for (i = 0; i < mgr->max_payloads; i++) {

From 194de738b69315721adc4e6dbafe81c790b318c8 Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vz@mleia.com>
Date: Sun, 6 Mar 2016 03:21:46 +0200
Subject: [PATCH 523/797] pwm: brcmstb: Fix check of devm_ioremap_resource()
 return code

commit c5857e3f94ab2719dfac649a146cb5dd6f21fcf3 upstream.

The change fixes potential oops while accessing iomem on invalid address
if devm_ioremap_resource() fails due to some reason.

The devm_ioremap_resource() function returns ERR_PTR() and never returns
NULL, which makes useless a following check for NULL.

Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
Fixes: 3a9f5957020f ("pwm: Add Broadcom BCM7038 PWM controller support")
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pwm/pwm-brcmstb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/pwm/pwm-brcmstb.c b/drivers/pwm/pwm-brcmstb.c
index 423ce087cd9c..5d5adee16886 100644
--- a/drivers/pwm/pwm-brcmstb.c
+++ b/drivers/pwm/pwm-brcmstb.c
@@ -274,8 +274,8 @@ static int brcmstb_pwm_probe(struct platform_device *pdev)
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	p->base = devm_ioremap_resource(&pdev->dev, res);
-	if (!p->base) {
-		ret = -ENOMEM;
+	if (IS_ERR(p->base)) {
+		ret = PTR_ERR(p->base);
 		goto out_clk;
 	}
 

From 39fa719753bcef274084502c1ec8cfefc556209f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 11 Jan 2016 20:48:32 +0200
Subject: [PATCH 524/797] drm/i915: Cleanup phys status page too
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 7d3fdfff23852fe458a0d0979a3555fe60f1e563 upstream.

Restore the lost phys status page cleanup.

Fixes the following splat with DMA_API_DEBUG=y:

WARNING: CPU: 0 PID: 21615 at ../lib/dma-debug.c:974 dma_debug_device_change+0x190/0x1f0()
pci 0000:00:02.0: DMA-API: device driver has pending DMA allocations while released from device [count=1]
               One of leaked entries details: [device address=0x0000000023163000] [size=4096 bytes] [mapped with DMA_BIDIRECTIONAL] [mapped as coherent]
Modules linked in: i915(-) i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm sha256_generic hmac drbg ctr ccm sch_fq_codel binfmt_misc joydev mousedev arc4 ath5k iTCO_wdt mac80211 smsc_ircc2 ath snd_intel8x0m snd_intel8x0 snd_ac97_codec ac97_bus psmouse snd_pcm input_leds i2c_i801 pcspkr snd_timer cfg80211 snd soundcore i2c_core ehci_pci firewire_ohci ehci_hcd firewire_core lpc_ich 8139too rfkill crc_itu_t mfd_core mii usbcore rng_core intel_agp intel_gtt usb_common agpgart irda crc_ccitt fujitsu_laptop led_class parport_pc video parport evdev backlight
CPU: 0 PID: 21615 Comm: rmmod Tainted: G     U          4.4.0-rc4-mgm-ovl+ #4
Hardware name: FUJITSU SIEMENS LIFEBOOK S6120/FJNB16C, BIOS Version 1.26  05/10/2004
 e31a3de0 e31a3de0 e31a3d9c c128d4bd e31a3dd0 c1045a0c c15e00c4 e31a3dfc
 0000546f c15dfad2 000003ce c12b3740 000003ce c12b3740 00000000 00000001
 f61fb8a0 e31a3de8 c1045a83 00000009 e31a3de0 c15e00c4 e31a3dfc e31a3e4c
Call Trace:
 [<c128d4bd>] dump_stack+0x16/0x19
 [<c1045a0c>] warn_slowpath_common+0x8c/0xd0
 [<c12b3740>] ? dma_debug_device_change+0x190/0x1f0
 [<c12b3740>] ? dma_debug_device_change+0x190/0x1f0
 [<c1045a83>] warn_slowpath_fmt+0x33/0x40
 [<c12b3740>] dma_debug_device_change+0x190/0x1f0
 [<c1065499>] notifier_call_chain+0x59/0x70
 [<c10655af>] __blocking_notifier_call_chain+0x3f/0x80
 [<c106560f>] blocking_notifier_call_chain+0x1f/0x30
 [<c134cfb3>] __device_release_driver+0xc3/0xf0
 [<c134d0d7>] driver_detach+0x97/0xa0
 [<c134c440>] bus_remove_driver+0x40/0x90
 [<c134db18>] driver_unregister+0x28/0x60
 [<c1079e8c>] ? trace_hardirqs_on_caller+0x12c/0x1d0
 [<c12c0618>] pci_unregister_driver+0x18/0x80
 [<f83e96e7>] drm_pci_exit+0x87/0xb0 [drm]
 [<f8b3be2d>] i915_exit+0x1b/0x1ee [i915]
 [<c10b999c>] SyS_delete_module+0x14c/0x210
 [<c1079e8c>] ? trace_hardirqs_on_caller+0x12c/0x1d0
 [<c115a9bd>] ? ____fput+0xd/0x10
 [<c1002014>] do_fast_syscall_32+0xa4/0x450
 [<c149f6fa>] sysenter_past_esp+0x3b/0x5d
---[ end trace c2ecbc77760f10a0 ]---
Mapped at:
 [<c12b3183>] debug_dma_alloc_coherent+0x33/0x90
 [<f83e989c>] drm_pci_alloc+0x18c/0x1e0 [drm]
 [<f8acd59f>] intel_init_ring_buffer+0x2af/0x490 [i915]
 [<f8acd8b0>] intel_init_render_ring_buffer+0x130/0x750 [i915]
 [<f8aaea4e>] i915_gem_init_rings+0x1e/0x110 [i915]

v2: s/BUG_ON/WARN_ON/ since dim doens't like the former anymore

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Fixes: 5c6c600 ("drm/i915: Remove DRI1 ring accessors and API")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Link: http://patchwork.freedesktop.org/patch/msgid/1452538112-5331-1-git-send-email-ville.syrjala@linux.intel.com
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index f6b2a814e629..0b1015de8536 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1922,6 +1922,17 @@ i915_dispatch_execbuffer(struct drm_i915_gem_request *req,
 	return 0;
 }
 
+static void cleanup_phys_status_page(struct intel_engine_cs *ring)
+{
+	struct drm_i915_private *dev_priv = to_i915(ring->dev);
+
+	if (!dev_priv->status_page_dmah)
+		return;
+
+	drm_pci_free(ring->dev, dev_priv->status_page_dmah);
+	ring->status_page.page_addr = NULL;
+}
+
 static void cleanup_status_page(struct intel_engine_cs *ring)
 {
 	struct drm_i915_gem_object *obj;
@@ -1938,9 +1949,9 @@ static void cleanup_status_page(struct intel_engine_cs *ring)
 
 static int init_status_page(struct intel_engine_cs *ring)
 {
-	struct drm_i915_gem_object *obj;
+	struct drm_i915_gem_object *obj = ring->status_page.obj;
 
-	if ((obj = ring->status_page.obj) == NULL) {
+	if (obj == NULL) {
 		unsigned flags;
 		int ret;
 
@@ -2134,7 +2145,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 		if (ret)
 			goto error;
 	} else {
-		BUG_ON(ring->id != RCS);
+		WARN_ON(ring->id != RCS);
 		ret = init_phys_status_page(ring);
 		if (ret)
 			goto error;
@@ -2179,7 +2190,12 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
 	if (ring->cleanup)
 		ring->cleanup(ring);
 
-	cleanup_status_page(ring);
+	if (I915_NEED_GFX_HWS(ring->dev)) {
+		cleanup_status_page(ring);
+	} else {
+		WARN_ON(ring->id != RCS);
+		cleanup_phys_status_page(ring);
+	}
 
 	i915_cmd_parser_fini_ring(ring);
 	i915_gem_batch_pool_fini(&ring->batch_pool);

From 80220c4827ddde2b1c49ababa6c1ab0ad0691112 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Thu, 15 Oct 2015 17:01:58 +0300
Subject: [PATCH 525/797] drm/i915: skl_update_scaler() wants a rotation
 bitmask instead of bit number
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit fa5a7970d372c9c9beb3a0ce79ee1d0c23387d0a upstream.

Pass BIT(DRM_ROTATE_0) instead of DRM_ROTATE_0 to skl_update_scaler().
The former is a mask, the latter just the bit number.

Fortunately the only thing skl_update_scaler() does with the rotation
is check if it's 90/270 degrees or not, and so in this case it would
still do the right thing.

Cc: Chandra Konduru <chandra.konduru@intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1444917718-28495-1-git-send-email-ville.syrjala@linux.intel.com
Fixes: 6156a45602f9 ("drm/i915: skylake primary plane scaling using shared scalers")
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/intel_display.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index f859a5b87ed4..afa81691163d 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -4447,7 +4447,7 @@ int skl_update_scaler_crtc(struct intel_crtc_state *state)
 		      intel_crtc->base.base.id, intel_crtc->pipe, SKL_CRTC_INDEX);
 
 	return skl_update_scaler(state, !state->base.active, SKL_CRTC_INDEX,
-		&state->scaler_state.scaler_id, DRM_ROTATE_0,
+		&state->scaler_state.scaler_id, BIT(DRM_ROTATE_0),
 		state->pipe_src_w, state->pipe_src_h,
 		adjusted_mode->crtc_hdisplay, adjusted_mode->crtc_vdisplay);
 }

From 20d948d8b63538e5a1af20b275c4562a5a6bc470 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 11 Mar 2016 10:51:51 +0300
Subject: [PATCH 526/797] drm/amdkfd: uninitialized variable in
 dbgdev_wave_control_set_registers()

commit 93fce954427effee89e44a976299b15dd75b4bbc upstream.

At the end of the function we expect "status" to be zero, but it's
either -EINVAL or uninitialized.

Fixes: 788bf83db301 ('drm/amdkfd: Add wave control operation to debugger')
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
index c34c393e9aea..d5e19b5fbbfb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -513,7 +513,7 @@ static int dbgdev_wave_control_set_registers(
 				union SQ_CMD_BITS *in_reg_sq_cmd,
 				union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
 {
-	int status;
+	int status = 0;
 	union SQ_CMD_BITS reg_sq_cmd;
 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
 	struct HsaDbgWaveMsgAMDGen2 *pMsg;

From e0a2d244dcb6068887ef7d3c3af302fe9152298f Mon Sep 17 00:00:00 2001
From: Akash Goel <akash.goel@intel.com>
Date: Fri, 11 Mar 2016 14:56:42 +0530
Subject: [PATCH 527/797] drm/i915: Fixup the free space logic in ring_prepare

commit d43f3ebf12f59c57782ec652da65ef61c2662b40 upstream.

Currently for the case where there is enough space at the end of Ring
buffer for accommodating only the base request, the wrapround is done
immediately and as a result the base request gets added at the start
of Ring buffer. But there may not be enough free space at the beginning
to accommodate the base request, as before the wraparound, the wait was
effectively done for the reserved_size free space from the start of
Ring buffer. In such a case there is a potential of Ring buffer overflow,
the instructions at the head of Ring (ACTHD) can get overwritten.

Since the base request can fit in the remaining space, there is no need
to wraparound immediately. The wraparound will anyway happen later when
the reserved part starts getting used.

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Akash Goel <akash.goel@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/1457688402-10411-1-git-send-email-akash.goel@intel.com
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
(cherry picked from commit 782f6bc0aba037436d6a04d19b23f8b61020a576)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/intel_lrc.c        | 6 +++---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d69547a65dbb..7058f75c7b42 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -776,11 +776,11 @@ static int logical_ring_prepare(struct drm_i915_gem_request *req, int bytes)
 		if (unlikely(total_bytes > remain_usable)) {
 			/*
 			 * The base request will fit but the reserved space
-			 * falls off the end. So only need to to wait for the
-			 * reserved size after flushing out the remainder.
+			 * falls off the end. So don't need an immediate wrap
+			 * and only need to effectively wait for the reserved
+			 * size space from the start of ringbuffer.
 			 */
 			wait_bytes = remain_actual + ringbuf->reserved_size;
-			need_wrap = true;
 		} else if (total_bytes > ringbuf->space) {
 			/* No wrapping required, just waiting. */
 			wait_bytes = total_bytes;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 0b1015de8536..9d48443bca2e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2357,11 +2357,11 @@ static int __intel_ring_prepare(struct intel_engine_cs *ring, int bytes)
 		if (unlikely(total_bytes > remain_usable)) {
 			/*
 			 * The base request will fit but the reserved space
-			 * falls off the end. So only need to to wait for the
-			 * reserved size after flushing out the remainder.
+			 * falls off the end. So don't need an immediate wrap
+			 * and only need to effectively wait for the reserved
+			 * size space from the start of ringbuffer.
 			 */
 			wait_bytes = remain_actual + ringbuf->reserved_size;
-			need_wrap = true;
 		} else if (total_bytes > ringbuf->space) {
 			/* No wrapping required, just waiting. */
 			wait_bytes = total_bytes;

From f4276d5753538996fa93a34646554bfd92f6e071 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Thu, 14 Apr 2016 14:39:02 +0300
Subject: [PATCH 528/797] drm/i915: Use fw_domains_put_with_fifo() on HSW
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 31318a922395ec9e78d6e2ddf70779355afc7594 upstream.

HSW still has the wake FIFO, so let's check it.

Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Deepak S <deepak.s@linux.intel.com>
Fixes: 05a2fb157e44 ("drm/i915: Consolidate forcewake code")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1460633942-24013-1-git-send-email-ville.syrjala@linux.intel.com
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
(cherry picked from commit 3d7d0c85e41afb5a05e98b3a8a72c38357f02594)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/intel_uncore.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 43cba129a0c0..cc91ae832ffb 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1132,7 +1132,11 @@ static void intel_uncore_fw_domains_init(struct drm_device *dev)
 	} else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
 		dev_priv->uncore.funcs.force_wake_get =
 			fw_domains_get_with_thread_status;
-		dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
+		if (IS_HASWELL(dev))
+			dev_priv->uncore.funcs.force_wake_put =
+				fw_domains_put_with_fifo;
+		else
+			dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
 		fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
 			       FORCEWAKE_MT, FORCEWAKE_ACK_HSW);
 	} else if (IS_IVYBRIDGE(dev)) {

From 0ea82073cb5e7039299350aba5bc135994c8cbda Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 18 Apr 2016 13:57:48 +0300
Subject: [PATCH 529/797] perf intel-pt: Fix segfault tracing transactions

commit 1342e0b7a6c1a060c593037fbac9f4b717f1cb3b upstream.

Tracing a workload that uses transactions gave a seg fault as follows:

  perf record -e intel_pt// workload
  perf report
  Program received signal SIGSEGV, Segmentation fault.
  0x000000000054b58c in intel_pt_reset_last_branch_rb (ptq=0x1a36110)
  	at util/intel-pt.c:929
  929 ptq->last_branch_rb->nr = 0;
  (gdb) p ptq->last_branch_rb
  $1 = (struct branch_stack *) 0x0
  (gdb) up
  1148 intel_pt_reset_last_branch_rb(ptq);
  (gdb) l
  1143 if (ret)
  1144 pr_err("Intel Processor Trace: failed to deliver transaction event
  1145 ret);
  1146
  1147 if (pt->synth_opts.callchain)
  1148 intel_pt_reset_last_branch_rb(ptq);
  1149
  1150 return ret;
  1151 }
  1152
  (gdb) p pt->synth_opts.callchain
  $2 = true
  (gdb)
  (gdb) bt
   #0 0x000000000054b58c in intel_pt_reset_last_branch_rb (ptq=0x1a36110)
   #1 0x000000000054c1e0 in intel_pt_synth_transaction_sample (ptq=0x1a36110)
   #2 0x000000000054c5b2 in intel_pt_sample (ptq=0x1a36110)

Caused by checking the 'callchain' flag when it should have been the
'last_branch' flag.  Fix that.

Reported-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Fixes: f14445ee72c5 ("perf intel-pt: Support generating branch stack")
Link: http://lkml.kernel.org/r/1460977068-11566-1-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/util/intel-pt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 97f963a3dcb9..9227c2f076c3 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -1127,7 +1127,7 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
 		pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n",
 		       ret);
 
-	if (pt->synth_opts.callchain)
+	if (pt->synth_opts.last_branch)
 		intel_pt_reset_last_branch_rb(ptq);
 
 	return ret;

From 46b9a1550e0ecf73b83c02c8435eedc01dde2055 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 13 Apr 2016 13:59:14 +1000
Subject: [PATCH 530/797] i2c: cpm: Fix build break due to incompatible pointer
 types

commit 609d5a1b2b35bb62b4b3750396e55453160c2a17 upstream.

Since commit ea8daa7b9784 ("kbuild: Add option to turn incompatible
pointer check into error"), assignments from an incompatible pointer
types have become a hard error, eg:

  drivers/i2c/busses/i2c-cpm.c:545:91: error: passing argument 3 of
  'dma_alloc_coherent' from incompatible pointer type

Fix the build break by converting txdma & rxdma to dma_addr_t.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Fixes: ea8daa7b9784
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/i2c/busses/i2c-cpm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/i2c/busses/i2c-cpm.c b/drivers/i2c/busses/i2c-cpm.c
index 714bdc837769..b167ab25310a 100644
--- a/drivers/i2c/busses/i2c-cpm.c
+++ b/drivers/i2c/busses/i2c-cpm.c
@@ -116,8 +116,8 @@ struct cpm_i2c {
 	cbd_t __iomem *rbase;
 	u_char *txbuf[CPM_MAXBD];
 	u_char *rxbuf[CPM_MAXBD];
-	u32 txdma[CPM_MAXBD];
-	u32 rxdma[CPM_MAXBD];
+	dma_addr_t txdma[CPM_MAXBD];
+	dma_addr_t rxdma[CPM_MAXBD];
 };
 
 static irqreturn_t cpm_i2c_interrupt(int irq, void *dev_id)

From 3b566a5c38b7311a545ac536a3b43944153918d2 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@osg.samsung.com>
Date: Sat, 16 Apr 2016 21:14:52 -0400
Subject: [PATCH 531/797] i2c: exynos5: Fix possible ABBA deadlock by keeping
 I2C clock prepared

commit 10ff4c5239a137abfc896ec73ef3d15a0f86a16a upstream.

The exynos5 I2C controller driver always prepares and enables a clock
before using it and then disables unprepares it when the clock is not
used anymore.

But this can cause a possible ABBA deadlock in some scenarios since a
driver that uses regmap to access its I2C registers, will first grab
the regmap lock and then the I2C xfer function will grab the prepare
lock when preparing the I2C clock. But since the clock driver also
uses regmap for I2C accesses, preparing a clock will first grab the
prepare lock and then the regmap lock when using the regmap API.

An example of this happens on the Exynos5422 Odroid XU4 board where a
s2mps11 PMIC is used and both the s2mps11 regulators and clk drivers
share the same I2C regmap.

The possible deadlock is reported by the kernel lockdep:

  Possible unsafe locking scenario:

        CPU0                    CPU1
        ----                    ----
   lock(sec_core:428:(regmap)->lock);
                                lock(prepare_lock);
                                lock(sec_core:428:(regmap)->lock);
   lock(prepare_lock);

  *** DEADLOCK ***

Fix it by leaving the code prepared on probe and use {en,dis}able in
the I2C transfer function.

This patch is similar to commit 34e81ad5f0b6 ("i2c: s3c2410: fix ABBA
deadlock by keeping clock prepared") that fixes the same bug in other
driver for an I2C controller found in Samsung SoCs.

Reported-by: Anand Moon <linux.amoon@gmail.com>
Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Reviewed-by: Anand Moon <linux.amoon@gmail.com>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/i2c/busses/i2c-exynos5.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/drivers/i2c/busses/i2c-exynos5.c b/drivers/i2c/busses/i2c-exynos5.c
index b29c7500461a..f54ece8fce78 100644
--- a/drivers/i2c/busses/i2c-exynos5.c
+++ b/drivers/i2c/busses/i2c-exynos5.c
@@ -671,7 +671,9 @@ static int exynos5_i2c_xfer(struct i2c_adapter *adap,
 		return -EIO;
 	}
 
-	clk_prepare_enable(i2c->clk);
+	ret = clk_enable(i2c->clk);
+	if (ret)
+		return ret;
 
 	for (i = 0; i < num; i++, msgs++) {
 		stop = (i == num - 1);
@@ -695,7 +697,7 @@ static int exynos5_i2c_xfer(struct i2c_adapter *adap,
 	}
 
  out:
-	clk_disable_unprepare(i2c->clk);
+	clk_disable(i2c->clk);
 	return ret;
 }
 
@@ -747,7 +749,9 @@ static int exynos5_i2c_probe(struct platform_device *pdev)
 		return -ENOENT;
 	}
 
-	clk_prepare_enable(i2c->clk);
+	ret = clk_prepare_enable(i2c->clk);
+	if (ret)
+		return ret;
 
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	i2c->regs = devm_ioremap_resource(&pdev->dev, mem);
@@ -799,6 +803,10 @@ static int exynos5_i2c_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, i2c);
 
+	clk_disable(i2c->clk);
+
+	return 0;
+
  err_clk:
 	clk_disable_unprepare(i2c->clk);
 	return ret;
@@ -810,6 +818,8 @@ static int exynos5_i2c_remove(struct platform_device *pdev)
 
 	i2c_del_adapter(&i2c->adap);
 
+	clk_unprepare(i2c->clk);
+
 	return 0;
 }
 
@@ -821,6 +831,8 @@ static int exynos5_i2c_suspend_noirq(struct device *dev)
 
 	i2c->suspended = 1;
 
+	clk_unprepare(i2c->clk);
+
 	return 0;
 }
 
@@ -830,7 +842,9 @@ static int exynos5_i2c_resume_noirq(struct device *dev)
 	struct exynos5_i2c *i2c = platform_get_drvdata(pdev);
 	int ret = 0;
 
-	clk_prepare_enable(i2c->clk);
+	ret = clk_prepare_enable(i2c->clk);
+	if (ret)
+		return ret;
 
 	ret = exynos5_hsi2c_clock_setup(i2c);
 	if (ret) {
@@ -839,7 +853,7 @@ static int exynos5_i2c_resume_noirq(struct device *dev)
 	}
 
 	exynos5_i2c_init(i2c);
-	clk_disable_unprepare(i2c->clk);
+	clk_disable(i2c->clk);
 	i2c->suspended = 0;
 
 	return 0;

From 7f8150d728eef82de079ce4fc9e8b4c47aca101e Mon Sep 17 00:00:00 2001
From: Azael Avalos <coproscefalo@gmail.com>
Date: Fri, 22 Apr 2016 09:29:36 -0600
Subject: [PATCH 532/797] toshiba_acpi: Fix regression caused by hotkey
 enabling value

commit a30b8f81d9d6fe24eab8a023794548b048f08e3c upstream.

Commit 52cbae0127ad ("toshiba_acpi: Change default Hotkey enabling value")
changed the hotkeys enabling value, as it was the same value Windows uses,
however, it turns out that the value tells the EC that the driver will now
take care of the hardware events like the physical RFKill switch or the
pointing device toggle button.

This patch reverts such commit by changing the default hotkey enabling
value to 0x09, which enables hotkey events only, making the hardware
buttons working again.

Fixes bugs 113331 and 114941.

Signed-off-by: Azael Avalos <coproscefalo@gmail.com>
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/platform/x86/toshiba_acpi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index b0f62141ea4d..f774cb576ffa 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -131,7 +131,7 @@ MODULE_LICENSE("GPL");
 /* Field definitions */
 #define HCI_ACCEL_MASK			0x7fff
 #define HCI_HOTKEY_DISABLE		0x0b
-#define HCI_HOTKEY_ENABLE		0x01
+#define HCI_HOTKEY_ENABLE		0x09
 #define HCI_HOTKEY_SPECIAL_FUNCTIONS	0x10
 #define HCI_LCD_BRIGHTNESS_BITS		3
 #define HCI_LCD_BRIGHTNESS_SHIFT	(16-HCI_LCD_BRIGHTNESS_BITS)

From 4d32650fcd8c9097fa0f69d39f0aae80a4b7fd79 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Fri, 29 Apr 2016 15:42:25 +0200
Subject: [PATCH 533/797] EDAC: i7core, sb_edac: Don't return NOTIFY_BAD from
 mce_decoder callback

commit c4fc1956fa31003bfbe4f597e359d751568e2954 upstream.

Both of these drivers can return NOTIFY_BAD, but this terminates
processing other callbacks that were registered later on the chain.
Since the driver did nothing to log the error it seems wrong to prevent
other interested parties from seeing it. E.g. neither of them had even
bothered to check the type of the error to see if it was a memory error
before the return NOTIFY_BAD.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Acked-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/72937355dd92318d2630979666063f8a2853495b.1461864507.git.tony.luck@intel.com
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/edac/i7core_edac.c | 2 +-
 drivers/edac/sb_edac.c     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index 01087a38da22..792bdae2b91d 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -1866,7 +1866,7 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
 
 	i7_dev = get_i7core_dev(mce->socketid);
 	if (!i7_dev)
-		return NOTIFY_BAD;
+		return NOTIFY_DONE;
 
 	mci = i7_dev->mci;
 	pvt = mci->pvt_info;
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 90c3fe99c786..37649221f81c 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -2254,7 +2254,7 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
 
 	mci = get_mci_for_node_id(mce->socketid);
 	if (!mci)
-		return NOTIFY_BAD;
+		return NOTIFY_DONE;
 	pvt = mci->pvt_info;
 
 	/*

From 9d3e910464dbeaae0746ef29c0192caa3e0418c3 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 25 Jan 2016 18:07:33 +0100
Subject: [PATCH 534/797] ASoC: s3c24xx: use const snd_soc_component_driver
 pointer

commit ba4bc32eaa39ba7687f0958ae90eec94da613b46 upstream.

An older patch to convert the API in the s3c i2s driver
ended up passing a const pointer into a function that takes
a non-const pointer, so we now get a warning:

sound/soc/samsung/s3c2412-i2s.c: In function 's3c2412_iis_dev_probe':
sound/soc/samsung/s3c2412-i2s.c:172:9: error: passing argument 3 of 's3c_i2sv2_register_component' discards 'const' qualifier from pointer target type [-Werror=discarded-qualifiers]

However, the s3c_i2sv2_register_component() function again
passes the pointer into another function taking a const, so
we just need to change its prototype.

Fixes: eca3b01d0885 ("ASoC: switch over to use snd_soc_register_component() on s3c i2s")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/samsung/s3c-i2s-v2.c | 2 +-
 sound/soc/samsung/s3c-i2s-v2.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/soc/samsung/s3c-i2s-v2.c b/sound/soc/samsung/s3c-i2s-v2.c
index df65c5b494b1..b6ab3fc5789e 100644
--- a/sound/soc/samsung/s3c-i2s-v2.c
+++ b/sound/soc/samsung/s3c-i2s-v2.c
@@ -709,7 +709,7 @@ static int s3c2412_i2s_resume(struct snd_soc_dai *dai)
 #endif
 
 int s3c_i2sv2_register_component(struct device *dev, int id,
-			   struct snd_soc_component_driver *cmp_drv,
+			   const struct snd_soc_component_driver *cmp_drv,
 			   struct snd_soc_dai_driver *dai_drv)
 {
 	struct snd_soc_dai_ops *ops = (struct snd_soc_dai_ops *)dai_drv->ops;
diff --git a/sound/soc/samsung/s3c-i2s-v2.h b/sound/soc/samsung/s3c-i2s-v2.h
index 90abab364b49..d0684145ed1f 100644
--- a/sound/soc/samsung/s3c-i2s-v2.h
+++ b/sound/soc/samsung/s3c-i2s-v2.h
@@ -101,7 +101,7 @@ extern int s3c_i2sv2_probe(struct snd_soc_dai *dai,
  * soc core.
  */
 extern int s3c_i2sv2_register_component(struct device *dev, int id,
-					struct snd_soc_component_driver *cmp_drv,
+					const struct snd_soc_component_driver *cmp_drv,
 					struct snd_soc_dai_driver *dai_drv);
 
 #endif /* __SND_SOC_S3C24XX_S3C_I2SV2_I2S_H */

From c276b2c81f2a10f6d74e5cb1cb7d6b6c7ff85e74 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Wed, 27 Jan 2016 14:26:18 +0100
Subject: [PATCH 535/797] ASoC: ssm4567: Reset device before regcache_sync()

commit 712a8038cc24dba668afe82f0413714ca87184e0 upstream.

When the ssm4567 is powered up the driver calles regcache_sync() to restore
the register map content. regcache_sync() assumes that the device is in its
power-on reset state. Make sure that this is the case by explicitly
resetting the ssm4567 register map before calling regcache_sync() otherwise
we might end up with a incorrect register map which leads to undefined
behaviour.

One such undefined behaviour was observed when returning from system
suspend while a playback stream is active, in that case the ssm4567 was
kept muted after resume.

Fixes: 1ee44ce03011 ("ASoC: ssm4567: Add driver for Analog Devices SSM4567 amplifier")
Reported-by: Harsha Priya <harshapriya.n@intel.com>
Tested-by: Fang, Yang A <yang.a.fang@intel.com>
Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/codecs/ssm4567.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/sound/soc/codecs/ssm4567.c b/sound/soc/codecs/ssm4567.c
index e619d5651b09..080c78e88e10 100644
--- a/sound/soc/codecs/ssm4567.c
+++ b/sound/soc/codecs/ssm4567.c
@@ -352,6 +352,11 @@ static int ssm4567_set_power(struct ssm4567 *ssm4567, bool enable)
 	regcache_cache_only(ssm4567->regmap, !enable);
 
 	if (enable) {
+		ret = regmap_write(ssm4567->regmap, SSM4567_REG_SOFT_RESET,
+			0x00);
+		if (ret)
+			return ret;
+
 		ret = regmap_update_bits(ssm4567->regmap,
 			SSM4567_REG_POWER_CTRL,
 			SSM4567_POWER_SPWDN, 0x00);

From 99070b6b5154f69e1f85a6547e8113b03986de7f Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Fri, 18 Mar 2016 12:04:23 +0000
Subject: [PATCH 536/797] ASoC: dapm: Make sure we have a card when displaying
 component widgets

commit 47325078f2a3e543150e7df967e45756b2fff7ec upstream.

The dummy component is reused for all cards so we special case and don't
bind it to any of them.  This means that code like that displaying the
component widgets that tries to look at the card will crash.  In the
future we will fix this by ensuring that the dummy component looks like
other components but that is invasive and so not suitable for a fix.
Instead add a special case check here.

Reported-by: Harry Pan <harry.pan@intel.com>
Suggested-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/soc-dapm.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 416514fe9e63..afb70a5d4fd3 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -2188,6 +2188,13 @@ static ssize_t dapm_widget_show_component(struct snd_soc_component *cmpnt,
 	int count = 0;
 	char *state = "not set";
 
+	/* card won't be set for the dummy component, as a spot fix
+	 * we're checking for that case specifically here but in future
+	 * we will ensure that the dummy component looks like others.
+	 */
+	if (!cmpnt->card)
+		return 0;
+
 	list_for_each_entry(w, &cmpnt->card->widgets, list) {
 		if (w->dapm != dapm)
 			continue;

From d74252fd2010e660b0f4b2b7bca0feccaf0214c9 Mon Sep 17 00:00:00 2001
From: Sugar Zhang <sugar.zhang@rock-chips.com>
Date: Fri, 18 Mar 2016 14:54:22 +0800
Subject: [PATCH 537/797] ASoC: rt5640: Correct the digital interface data
 select

commit 653aa4645244042826f105aab1be3d01b3d493ca upstream.

this patch corrects the interface adc/dac control register definition
according to datasheet.

Signed-off-by: Sugar Zhang <sugar.zhang@rock-chips.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/codecs/rt5640.c |  2 +-
 sound/soc/codecs/rt5640.h | 36 ++++++++++++++++++------------------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c
index f2beb1aa5763..b1c8bb39cdf1 100644
--- a/sound/soc/codecs/rt5640.c
+++ b/sound/soc/codecs/rt5640.c
@@ -359,7 +359,7 @@ static const DECLARE_TLV_DB_RANGE(bst_tlv,
 
 /* Interface data select */
 static const char * const rt5640_data_select[] = {
-	"Normal", "left copy to right", "right copy to left", "Swap"};
+	"Normal", "Swap", "left copy to right", "right copy to left"};
 
 static SOC_ENUM_SINGLE_DECL(rt5640_if1_dac_enum, RT5640_DIG_INF_DATA,
 			    RT5640_IF1_DAC_SEL_SFT, rt5640_data_select);
diff --git a/sound/soc/codecs/rt5640.h b/sound/soc/codecs/rt5640.h
index 3deb8babeabb..243f42633989 100644
--- a/sound/soc/codecs/rt5640.h
+++ b/sound/soc/codecs/rt5640.h
@@ -442,39 +442,39 @@
 #define RT5640_IF1_DAC_SEL_MASK			(0x3 << 14)
 #define RT5640_IF1_DAC_SEL_SFT			14
 #define RT5640_IF1_DAC_SEL_NOR			(0x0 << 14)
-#define RT5640_IF1_DAC_SEL_L2R			(0x1 << 14)
-#define RT5640_IF1_DAC_SEL_R2L			(0x2 << 14)
-#define RT5640_IF1_DAC_SEL_SWAP			(0x3 << 14)
+#define RT5640_IF1_DAC_SEL_SWAP			(0x1 << 14)
+#define RT5640_IF1_DAC_SEL_L2R			(0x2 << 14)
+#define RT5640_IF1_DAC_SEL_R2L			(0x3 << 14)
 #define RT5640_IF1_ADC_SEL_MASK			(0x3 << 12)
 #define RT5640_IF1_ADC_SEL_SFT			12
 #define RT5640_IF1_ADC_SEL_NOR			(0x0 << 12)
-#define RT5640_IF1_ADC_SEL_L2R			(0x1 << 12)
-#define RT5640_IF1_ADC_SEL_R2L			(0x2 << 12)
-#define RT5640_IF1_ADC_SEL_SWAP			(0x3 << 12)
+#define RT5640_IF1_ADC_SEL_SWAP			(0x1 << 12)
+#define RT5640_IF1_ADC_SEL_L2R			(0x2 << 12)
+#define RT5640_IF1_ADC_SEL_R2L			(0x3 << 12)
 #define RT5640_IF2_DAC_SEL_MASK			(0x3 << 10)
 #define RT5640_IF2_DAC_SEL_SFT			10
 #define RT5640_IF2_DAC_SEL_NOR			(0x0 << 10)
-#define RT5640_IF2_DAC_SEL_L2R			(0x1 << 10)
-#define RT5640_IF2_DAC_SEL_R2L			(0x2 << 10)
-#define RT5640_IF2_DAC_SEL_SWAP			(0x3 << 10)
+#define RT5640_IF2_DAC_SEL_SWAP			(0x1 << 10)
+#define RT5640_IF2_DAC_SEL_L2R			(0x2 << 10)
+#define RT5640_IF2_DAC_SEL_R2L			(0x3 << 10)
 #define RT5640_IF2_ADC_SEL_MASK			(0x3 << 8)
 #define RT5640_IF2_ADC_SEL_SFT			8
 #define RT5640_IF2_ADC_SEL_NOR			(0x0 << 8)
-#define RT5640_IF2_ADC_SEL_L2R			(0x1 << 8)
-#define RT5640_IF2_ADC_SEL_R2L			(0x2 << 8)
-#define RT5640_IF2_ADC_SEL_SWAP			(0x3 << 8)
+#define RT5640_IF2_ADC_SEL_SWAP			(0x1 << 8)
+#define RT5640_IF2_ADC_SEL_L2R			(0x2 << 8)
+#define RT5640_IF2_ADC_SEL_R2L			(0x3 << 8)
 #define RT5640_IF3_DAC_SEL_MASK			(0x3 << 6)
 #define RT5640_IF3_DAC_SEL_SFT			6
 #define RT5640_IF3_DAC_SEL_NOR			(0x0 << 6)
-#define RT5640_IF3_DAC_SEL_L2R			(0x1 << 6)
-#define RT5640_IF3_DAC_SEL_R2L			(0x2 << 6)
-#define RT5640_IF3_DAC_SEL_SWAP			(0x3 << 6)
+#define RT5640_IF3_DAC_SEL_SWAP			(0x1 << 6)
+#define RT5640_IF3_DAC_SEL_L2R			(0x2 << 6)
+#define RT5640_IF3_DAC_SEL_R2L			(0x3 << 6)
 #define RT5640_IF3_ADC_SEL_MASK			(0x3 << 4)
 #define RT5640_IF3_ADC_SEL_SFT			4
 #define RT5640_IF3_ADC_SEL_NOR			(0x0 << 4)
-#define RT5640_IF3_ADC_SEL_L2R			(0x1 << 4)
-#define RT5640_IF3_ADC_SEL_R2L			(0x2 << 4)
-#define RT5640_IF3_ADC_SEL_SWAP			(0x3 << 4)
+#define RT5640_IF3_ADC_SEL_SWAP			(0x1 << 4)
+#define RT5640_IF3_ADC_SEL_L2R			(0x2 << 4)
+#define RT5640_IF3_ADC_SEL_R2L			(0x3 << 4)
 
 /* REC Left Mixer Control 1 (0x3b) */
 #define RT5640_G_HP_L_RM_L_MASK			(0x7 << 13)

From b4ea6cf4883569a7c9c0297305033e9e678a03e4 Mon Sep 17 00:00:00 2001
From: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Date: Thu, 3 Mar 2016 16:12:48 -0300
Subject: [PATCH 538/797] vb2-memops: Fix over allocation of frame vectors

commit 89a095668304e8a02502ffd35edacffdbf49aa8c upstream.

On page unaligned frames, create_framevec forces get_vaddr_frames to
allocate an extra page at the end of the buffer. Under some
circumstances, this leads to -EINVAL on VIDIOC_QBUF.

E.g:
We have vm_a that vm_area that goes from 0x1000 to 0x3000. And a
frame that goes from 0x1800 to 0x2800, i.e. 2 pages.

frame_vector_create will be called with the following params:

get_vaddr_frames(0x1800, 2, write, 1, vec);

get_vaddr will allocate the first page after checking that the memory
0x1800-0x27ff is valid, but it will not allocate the second page because
the range 0x2800-0x37ff is out of the vm_a range. This results in
create_framevec returning -EFAULT

Error Trace:
[ 9083.793015] video0: VIDIOC_QBUF: 00:00:00.00000000 index=1,
type=vid-cap, flags=0x00002002, field=any, sequence=0,
memory=userptr, bytesused=0, offset/userptr=0x7ff2b023ca80, length=5765760
[ 9083.793028] timecode=00:00:00 type=0, flags=0x00000000,
frames=0, userbits=0x00000000
[ 9083.793117] video0: VIDIOC_QBUF: error -22: 00:00:00.00000000
index=2, type=vid-cap, flags=0x00000000, field=any, sequence=0,
memory=userptr, bytesused=0, offset/userptr=0x7ff2b07bc500, length=5765760

Also use true instead of 1 since that argument is a bool in the
get_vaddr_frames() prototype.

Fixes: 21fb0cb7ec65 ("[media] vb2: Provide helpers for mapping virtual addresses")

Reported-by: Albert Antony <albert@newtec.dk>
Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
[hans.verkuil@cisco.com: merged the 'bool' change into this patch]
Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 drivers/media/v4l2-core/videobuf2-memops.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/v4l2-core/videobuf2-memops.c b/drivers/media/v4l2-core/videobuf2-memops.c
index dbec5923fcf0..3c3b517f1d1c 100644
--- a/drivers/media/v4l2-core/videobuf2-memops.c
+++ b/drivers/media/v4l2-core/videobuf2-memops.c
@@ -49,7 +49,7 @@ struct frame_vector *vb2_create_framevec(unsigned long start,
 	vec = frame_vector_create(nr);
 	if (!vec)
 		return ERR_PTR(-ENOMEM);
-	ret = get_vaddr_frames(start, nr, write, 1, vec);
+	ret = get_vaddr_frames(start & PAGE_MASK, nr, write, true, vec);
 	if (ret < 0)
 		goto out_destroy;
 	/* We accept only complete set of PFNs */

From a9da0b3dc72e074a2f84fad5f176750968a76bdb Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Fri, 22 Apr 2016 04:00:50 -0300
Subject: [PATCH 539/797] v4l2-dv-timings.h: fix polarity for 4k formats

commit 3020ca711871fdaf0c15c8bab677a6bc302e28fe upstream.

The VSync polarity was negative instead of positive for the 4k CEA formats.
I probably copy-and-pasted these from the DMT 4k format, which does have a
negative VSync polarity.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Reported-by: Martin Bugge <marbugge@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/v4l2-dv-timings.h | 30 ++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/include/uapi/linux/v4l2-dv-timings.h b/include/uapi/linux/v4l2-dv-timings.h
index c039f1d68a09..086168e18ca8 100644
--- a/include/uapi/linux/v4l2-dv-timings.h
+++ b/include/uapi/linux/v4l2-dv-timings.h
@@ -183,7 +183,8 @@
 
 #define V4L2_DV_BT_CEA_3840X2160P24 { \
 	.type = V4L2_DV_BT_656_1120, \
-	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \
+		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		297000000, 1276, 88, 296, 8, 10, 72, 0, 0, 0, \
 		V4L2_DV_BT_STD_CEA861, \
 		V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \
@@ -191,14 +192,16 @@
 
 #define V4L2_DV_BT_CEA_3840X2160P25 { \
 	.type = V4L2_DV_BT_656_1120, \
-	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \
+		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		297000000, 1056, 88, 296, 8, 10, 72, 0, 0, 0, \
 		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_3840X2160P30 { \
 	.type = V4L2_DV_BT_656_1120, \
-	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \
+		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		297000000, 176, 88, 296, 8, 10, 72, 0, 0, 0, \
 		V4L2_DV_BT_STD_CEA861, \
 		V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \
@@ -206,14 +209,16 @@
 
 #define V4L2_DV_BT_CEA_3840X2160P50 { \
 	.type = V4L2_DV_BT_656_1120, \
-	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \
+		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		594000000, 1056, 88, 296, 8, 10, 72, 0, 0, 0, \
 		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_3840X2160P60 { \
 	.type = V4L2_DV_BT_656_1120, \
-	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \
+		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		594000000, 176, 88, 296, 8, 10, 72, 0, 0, 0, \
 		V4L2_DV_BT_STD_CEA861, \
 		V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \
@@ -221,7 +226,8 @@
 
 #define V4L2_DV_BT_CEA_4096X2160P24 { \
 	.type = V4L2_DV_BT_656_1120, \
-	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \
+		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		297000000, 1020, 88, 296, 8, 10, 72, 0, 0, 0, \
 		V4L2_DV_BT_STD_CEA861, \
 		V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \
@@ -229,14 +235,16 @@
 
 #define V4L2_DV_BT_CEA_4096X2160P25 { \
 	.type = V4L2_DV_BT_656_1120, \
-	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \
+		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		297000000, 968, 88, 128, 8, 10, 72, 0, 0, 0, \
 		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_4096X2160P30 { \
 	.type = V4L2_DV_BT_656_1120, \
-	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \
+		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		297000000, 88, 88, 128, 8, 10, 72, 0, 0, 0, \
 		V4L2_DV_BT_STD_CEA861, \
 		V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \
@@ -244,14 +252,16 @@
 
 #define V4L2_DV_BT_CEA_4096X2160P50 { \
 	.type = V4L2_DV_BT_656_1120, \
-	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \
+		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		594000000, 968, 88, 128, 8, 10, 72, 0, 0, 0, \
 		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_4096X2160P60 { \
 	.type = V4L2_DV_BT_656_1120, \
-	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \
+		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		594000000, 88, 88, 128, 8, 10, 72, 0, 0, 0, \
 		V4L2_DV_BT_STD_CEA861, \
 		V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \

From b184522f688a31765a24081ed231e480e76edae6 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Fri, 22 Apr 2016 14:57:48 +1000
Subject: [PATCH 540/797] cxl: Keep IRQ mappings on context teardown

commit d6776bba44d9752f6cdf640046070e71ee4bba7b upstream.

Keep IRQ mappings on context teardown.  This won't leak IRQs as if we
allocate the mapping again, the generic code will give the same
mapping used last time.

Doing this works around a race in the generic code. Masking the
interrupt introduces a race which can crash the kernel or result in
IRQ that is never EOIed. The lost of EOI results in all subsequent
mappings to the same HW IRQ never receiving an interrupt.

We've seen this race with cxl test cases which are doing heavy context
startup and teardown at the same time as heavy interrupt load.

A fix to the generic code is being investigated also.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Tested-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Acked-by: Ian Munsie <imunsie@au1.ibm.com>
Tested-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/cxl/irq.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c
index 09a406058c46..efbb6945eb18 100644
--- a/drivers/misc/cxl/irq.c
+++ b/drivers/misc/cxl/irq.c
@@ -288,7 +288,6 @@ unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq,
 void cxl_unmap_irq(unsigned int virq, void *cookie)
 {
 	free_irq(virq, cookie);
-	irq_dispose_mapping(virq);
 }
 
 static int cxl_register_one_irq(struct cxl *adapter,

From 29ebbba744cf8951202b5f4ea62b4a297f4662c1 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 31 Mar 2016 19:03:25 +0300
Subject: [PATCH 541/797] IB/mlx5: Expose correct max_sge_rd limit

commit 986ef95ecdd3eb6fa29433e68faa94c7624083be upstream.

mlx5 devices (Connect-IB, ConnectX-4, ConnectX-4-LX) has a limitation
where rdma read work queue entries cannot exceed 512 bytes.
A rdma_read wqe needs to fit in 512 bytes:
- wqe control segment (16 bytes)
- rdma segment (16 bytes)
- scatter elements (16 bytes each)

So max_sge_rd should be: (512 - 16 - 16) / 16 = 30.

Reported-by: Christoph Hellwig <hch@lst.de>
Tested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagig@grimberg.me>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/hw/mlx5/main.c |  2 +-
 include/linux/mlx5/device.h       | 11 +++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index c4e091528390..721d63f5b461 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -273,7 +273,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 		     sizeof(struct mlx5_wqe_ctrl_seg)) /
 		     sizeof(struct mlx5_wqe_data_seg);
 	props->max_sge = min(max_rq_sg, max_sq_sg);
-	props->max_sge_rd = props->max_sge;
+	props->max_sge_rd	   = MLX5_MAX_SGE_RD;
 	props->max_cq		   = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
 	props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
 	props->max_mr		   = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 0b473cbfa7ef..a91b67b18a73 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -334,6 +334,17 @@ enum {
 	MLX5_CAP_OFF_CMDIF_CSUM		= 46,
 };
 
+enum {
+	/*
+	 * Max wqe size for rdma read is 512 bytes, so this
+	 * limits our max_sge_rd as the wqe needs to fit:
+	 * - ctrl segment (16 bytes)
+	 * - rdma segment (16 bytes)
+	 * - scatter elements (16 bytes each)
+	 */
+	MLX5_MAX_SGE_RD	= (512 - 16 - 16) / 16
+};
+
 struct mlx5_inbox_hdr {
 	__be16		opcode;
 	u8		rsvd[4];

From c92003c18feb8159cbf64bc0afa7b048869fe3c6 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Date: Sun, 10 Apr 2016 19:13:13 -0600
Subject: [PATCH 542/797] IB/security: Restrict use of the write() interface

commit e6bd18f57aad1a2d1ef40e646d03ed0f2515c9e3 upstream.

The drivers/infiniband stack uses write() as a replacement for
bi-directional ioctl().  This is not safe. There are ways to
trigger write calls that result in the return structure that
is normally written to user space being shunted off to user
specified kernel memory instead.

For the immediate repair, detect and deny suspicious accesses to
the write API.

For long term, update the user space libraries and the kernel API
to something that doesn't present the same security vulnerabilities
(likely a structured ioctl() interface).

The impacted uAPI interfaces are generally only available if
hardware from drivers/infiniband is installed in the system.

Reported-by: Jann Horn <jann@thejh.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
[ Expanded check to all known write() entry points ]
Signed-off-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/core/ucm.c            |  4 ++++
 drivers/infiniband/core/ucma.c           |  3 +++
 drivers/infiniband/core/uverbs_main.c    |  5 +++++
 drivers/infiniband/hw/qib/qib_file_ops.c |  5 +++++
 drivers/staging/rdma/hfi1/TODO           |  2 +-
 drivers/staging/rdma/hfi1/file_ops.c     |  6 ++++++
 include/rdma/ib.h                        | 16 ++++++++++++++++
 7 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 6b4e8a008bc0..564adf3116e8 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -48,6 +48,7 @@
 
 #include <asm/uaccess.h>
 
+#include <rdma/ib.h>
 #include <rdma/ib_cm.h>
 #include <rdma/ib_user_cm.h>
 #include <rdma/ib_marshall.h>
@@ -1103,6 +1104,9 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf,
 	struct ib_ucm_cmd_hdr hdr;
 	ssize_t result;
 
+	if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
+		return -EACCES;
+
 	if (len < sizeof(hdr))
 		return -EINVAL;
 
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 8b5a934e1133..886f61ea6cc7 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -1574,6 +1574,9 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf,
 	struct rdma_ucm_cmd_hdr hdr;
 	ssize_t ret;
 
+	if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
+		return -EACCES;
+
 	if (len < sizeof(hdr))
 		return -EINVAL;
 
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index e3ef28861be6..24f3ca2c4ad7 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -48,6 +48,8 @@
 
 #include <asm/uaccess.h>
 
+#include <rdma/ib.h>
+
 #include "uverbs.h"
 
 MODULE_AUTHOR("Roland Dreier");
@@ -682,6 +684,9 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
 	int srcu_key;
 	ssize_t ret;
 
+	if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
+		return -EACCES;
+
 	if (count < sizeof hdr)
 		return -EINVAL;
 
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index e449e394963f..24f4a782e0f4 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -45,6 +45,8 @@
 #include <linux/export.h>
 #include <linux/uio.h>
 
+#include <rdma/ib.h>
+
 #include "qib.h"
 #include "qib_common.h"
 #include "qib_user_sdma.h"
@@ -2067,6 +2069,9 @@ static ssize_t qib_write(struct file *fp, const char __user *data,
 	ssize_t ret = 0;
 	void *dest;
 
+	if (WARN_ON_ONCE(!ib_safe_file_access(fp)))
+		return -EACCES;
+
 	if (count < sizeof(cmd.type)) {
 		ret = -EINVAL;
 		goto bail;
diff --git a/drivers/staging/rdma/hfi1/TODO b/drivers/staging/rdma/hfi1/TODO
index 05de0dad8762..4c6f1d7d2eaf 100644
--- a/drivers/staging/rdma/hfi1/TODO
+++ b/drivers/staging/rdma/hfi1/TODO
@@ -3,4 +3,4 @@ July, 2015
 - Remove unneeded file entries in sysfs
 - Remove software processing of IB protocol and place in library for use
   by qib, ipath (if still present), hfi1, and eventually soft-roce
-
+- Replace incorrect uAPI
diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c
index aae9826ec62b..c851e51b1dc3 100644
--- a/drivers/staging/rdma/hfi1/file_ops.c
+++ b/drivers/staging/rdma/hfi1/file_ops.c
@@ -62,6 +62,8 @@
 #include <linux/cred.h>
 #include <linux/uio.h>
 
+#include <rdma/ib.h>
+
 #include "hfi.h"
 #include "pio.h"
 #include "device.h"
@@ -214,6 +216,10 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
 	int uctxt_required = 1;
 	int must_be_root = 0;
 
+	/* FIXME: This interface cannot continue out of staging */
+	if (WARN_ON_ONCE(!ib_safe_file_access(fp)))
+		return -EACCES;
+
 	if (count < sizeof(cmd)) {
 		ret = -EINVAL;
 		goto bail;
diff --git a/include/rdma/ib.h b/include/rdma/ib.h
index cf8f9e700e48..a6b93706b0fc 100644
--- a/include/rdma/ib.h
+++ b/include/rdma/ib.h
@@ -34,6 +34,7 @@
 #define _RDMA_IB_H
 
 #include <linux/types.h>
+#include <linux/sched.h>
 
 struct ib_addr {
 	union {
@@ -86,4 +87,19 @@ struct sockaddr_ib {
 	__u64			sib_scope_id;
 };
 
+/*
+ * The IB interfaces that use write() as bi-directional ioctl() are
+ * fundamentally unsafe, since there are lots of ways to trigger "write()"
+ * calls from various contexts with elevated privileges. That includes the
+ * traditional suid executable error message writes, but also various kernel
+ * interfaces that can write to file descriptors.
+ *
+ * This function provides protection for the legacy API by restricting the
+ * calling context.
+ */
+static inline bool ib_safe_file_access(struct file *filp)
+{
+	return filp->f_cred == current_cred() && segment_eq(get_fs(), USER_DS);
+}
+
 #endif /* _RDMA_IB_H */

From 513f5c33b5208dbd090f56c843aead053cb3d7a3 Mon Sep 17 00:00:00 2001
From: Laszlo Ersek <lersek@redhat.com>
Date: Thu, 21 Apr 2016 18:21:11 +0200
Subject: [PATCH 543/797] efi: Fix out-of-bounds read in variable_matches()

commit 630ba0cc7a6dbafbdee43795617c872b35cde1b4 upstream.

The variable_matches() function can currently read "var_name[len]", for
example when:

 - var_name[0] == 'a',
 - len == 1
 - match_name points to the NUL-terminated string "ab".

This function is supposed to accept "var_name" inputs that are not
NUL-terminated (hence the "len" parameter"). Document the function, and
access "var_name[*match]" only if "*match" is smaller than "len".

Reported-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Cc: Peter Jones <pjones@redhat.com>
Cc: Matthew Garrett <mjg59@coreos.com>
Cc: Jason Andryuk <jandryuk@gmail.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Link: http://thread.gmane.org/gmane.comp.freedesktop.xorg.drivers.intel/86906
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/firmware/efi/vars.c | 37 ++++++++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
index 7f2ea21c730d..6f182fd91a6d 100644
--- a/drivers/firmware/efi/vars.c
+++ b/drivers/firmware/efi/vars.c
@@ -202,29 +202,44 @@ static const struct variable_validate variable_validate[] = {
 	{ NULL_GUID, "", NULL },
 };
 
+/*
+ * Check if @var_name matches the pattern given in @match_name.
+ *
+ * @var_name: an array of @len non-NUL characters.
+ * @match_name: a NUL-terminated pattern string, optionally ending in "*". A
+ *              final "*" character matches any trailing characters @var_name,
+ *              including the case when there are none left in @var_name.
+ * @match: on output, the number of non-wildcard characters in @match_name
+ *         that @var_name matches, regardless of the return value.
+ * @return: whether @var_name fully matches @match_name.
+ */
 static bool
 variable_matches(const char *var_name, size_t len, const char *match_name,
 		 int *match)
 {
 	for (*match = 0; ; (*match)++) {
 		char c = match_name[*match];
-		char u = var_name[*match];
 
-		/* Wildcard in the matching name means we've matched */
-		if (c == '*')
+		switch (c) {
+		case '*':
+			/* Wildcard in @match_name means we've matched. */
 			return true;
 
-		/* Case sensitive match */
-		if (!c && *match == len)
-			return true;
+		case '\0':
+			/* @match_name has ended. Has @var_name too? */
+			return (*match == len);
 
-		if (c != u)
+		default:
+			/*
+			 * We've reached a non-wildcard char in @match_name.
+			 * Continue only if there's an identical character in
+			 * @var_name.
+			 */
+			if (*match < len && c == var_name[*match])
+				continue;
 			return false;
-
-		if (!c)
-			return true;
+		}
 	}
-	return true;
 }
 
 bool

From b8f80ba7e09ca1945946d4a6d7391c0795ff99f7 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 1 Feb 2016 22:06:55 +0000
Subject: [PATCH 544/797] efi: Expose non-blocking set_variable() wrapper to
 efivars

commit 9c6672ac9c91f7eb1ec436be1442b8c26d098e55 upstream.

Commit 6d80dba1c9fe ("efi: Provide a non-blocking SetVariable()
operation") implemented a non-blocking alternative for the UEFI
SetVariable() invocation performed by efivars, since it may
occur in atomic context. However, this version of the function
was never exposed via the efivars struct, so the non-blocking
versions was not actually callable. Fix that.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Fixes: 6d80dba1c9fe ("efi: Provide a non-blocking SetVariable() operation")
Link: http://lkml.kernel.org/r/1454364428-494-2-git-send-email-matt@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/firmware/efi/efi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 027ca212179f..3b52677f459a 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -180,6 +180,7 @@ static int generic_ops_register(void)
 {
 	generic_ops.get_variable = efi.get_variable;
 	generic_ops.set_variable = efi.set_variable;
+	generic_ops.set_variable_nonblocking = efi.set_variable_nonblocking;
 	generic_ops.get_next_variable = efi.get_next_variable;
 	generic_ops.query_variable_store = efi_query_variable_store;
 

From 01d5ccd341290e771ac6b94b08c220df6f81a630 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Wed, 27 Apr 2016 14:22:32 -0600
Subject: [PATCH 545/797] x86/apic: Handle zero vector gracefully in
 clear_vector_irq()

commit 1bdb8970392a68489b469c3a330a1adb5ef61beb upstream.

If x86_vector_alloc_irq() fails x86_vector_free_irqs() is invoked to cleanup
the already allocated vectors. This subsequently calls clear_vector_irq().

The failed irq has no vector assigned, which triggers the BUG_ON(!vector) in
clear_vector_irq().

We cannot suppress the call to x86_vector_free_irqs() for the failed
interrupt, because the other data related to this irq must be cleaned up as
well. So calling clear_vector_irq() with vector == 0 is legitimate.

Remove the BUG_ON and return if vector is zero,

[ tglx: Massaged changelog ]

Fixes: b5dc8e6c21e7 "x86/irq: Use hierarchical irqdomain to manage CPU interrupt vectors"
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/apic/vector.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 7af2505f20c2..df6b4eeac0bd 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -254,7 +254,8 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
 	struct irq_desc *desc;
 	int cpu, vector;
 
-	BUG_ON(!data->cfg.vector);
+	if (!data->cfg.vector)
+		return;
 
 	vector = data->cfg.vector;
 	for_each_cpu_and(cpu, data->domain, cpu_online_mask)

From 2da9606aea5a8fd1b710f8c8dd5295da4825e9cd Mon Sep 17 00:00:00 2001
From: Roman Pen <roman.penyaev@profitbricks.com>
Date: Tue, 26 Apr 2016 13:15:35 +0200
Subject: [PATCH 546/797] workqueue: fix ghost PENDING flag while doing MQ IO

commit 346c09f80459a3ad97df1816d6d606169a51001a upstream.

The bug in a workqueue leads to a stalled IO request in MQ ctx->rq_list
with the following backtrace:

[  601.347452] INFO: task kworker/u129:5:1636 blocked for more than 120 seconds.
[  601.347574]       Tainted: G           O    4.4.5-1-storage+ #6
[  601.347651] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  601.348142] kworker/u129:5  D ffff880803077988     0  1636      2 0x00000000
[  601.348519] Workqueue: ibnbd_server_fileio_wq ibnbd_dev_file_submit_io_worker [ibnbd_server]
[  601.348999]  ffff880803077988 ffff88080466b900 ffff8808033f9c80 ffff880803078000
[  601.349662]  ffff880807c95000 7fffffffffffffff ffffffff815b0920 ffff880803077ad0
[  601.350333]  ffff8808030779a0 ffffffff815b01d5 0000000000000000 ffff880803077a38
[  601.350965] Call Trace:
[  601.351203]  [<ffffffff815b0920>] ? bit_wait+0x60/0x60
[  601.351444]  [<ffffffff815b01d5>] schedule+0x35/0x80
[  601.351709]  [<ffffffff815b2dd2>] schedule_timeout+0x192/0x230
[  601.351958]  [<ffffffff812d43f7>] ? blk_flush_plug_list+0xc7/0x220
[  601.352208]  [<ffffffff810bd737>] ? ktime_get+0x37/0xa0
[  601.352446]  [<ffffffff815b0920>] ? bit_wait+0x60/0x60
[  601.352688]  [<ffffffff815af784>] io_schedule_timeout+0xa4/0x110
[  601.352951]  [<ffffffff815b3a4e>] ? _raw_spin_unlock_irqrestore+0xe/0x10
[  601.353196]  [<ffffffff815b093b>] bit_wait_io+0x1b/0x70
[  601.353440]  [<ffffffff815b056d>] __wait_on_bit+0x5d/0x90
[  601.353689]  [<ffffffff81127bd0>] wait_on_page_bit+0xc0/0xd0
[  601.353958]  [<ffffffff81096db0>] ? autoremove_wake_function+0x40/0x40
[  601.354200]  [<ffffffff81127cc4>] __filemap_fdatawait_range+0xe4/0x140
[  601.354441]  [<ffffffff81127d34>] filemap_fdatawait_range+0x14/0x30
[  601.354688]  [<ffffffff81129a9f>] filemap_write_and_wait_range+0x3f/0x70
[  601.354932]  [<ffffffff811ced3b>] blkdev_fsync+0x1b/0x50
[  601.355193]  [<ffffffff811c82d9>] vfs_fsync_range+0x49/0xa0
[  601.355432]  [<ffffffff811cf45a>] blkdev_write_iter+0xca/0x100
[  601.355679]  [<ffffffff81197b1a>] __vfs_write+0xaa/0xe0
[  601.355925]  [<ffffffff81198379>] vfs_write+0xa9/0x1a0
[  601.356164]  [<ffffffff811c59d8>] kernel_write+0x38/0x50

The underlying device is a null_blk, with default parameters:

  queue_mode    = MQ
  submit_queues = 1

Verification that nullb0 has something inflight:

root@pserver8:~# cat /sys/block/nullb0/inflight
       0        1
root@pserver8:~# find /sys/block/nullb0/mq/0/cpu* -name rq_list -print -exec cat {} \;
...
/sys/block/nullb0/mq/0/cpu2/rq_list
CTX pending:
        ffff8838038e2400
...

During debug it became clear that stalled request is always inserted in
the rq_list from the following path:

   save_stack_trace_tsk + 34
   blk_mq_insert_requests + 231
   blk_mq_flush_plug_list + 281
   blk_flush_plug_list + 199
   wait_on_page_bit + 192
   __filemap_fdatawait_range + 228
   filemap_fdatawait_range + 20
   filemap_write_and_wait_range + 63
   blkdev_fsync + 27
   vfs_fsync_range + 73
   blkdev_write_iter + 202
   __vfs_write + 170
   vfs_write + 169
   kernel_write + 56

So blk_flush_plug_list() was called with from_schedule == true.

If from_schedule is true, that means that finally blk_mq_insert_requests()
offloads execution of __blk_mq_run_hw_queue() and uses kblockd workqueue,
i.e. it calls kblockd_schedule_delayed_work_on().

That means, that we race with another CPU, which is about to execute
__blk_mq_run_hw_queue() work.

Further debugging shows the following traces from different CPUs:

  CPU#0                                  CPU#1
  ----------------------------------     -------------------------------
  reqeust A inserted
  STORE hctx->ctx_map[0] bit marked
  kblockd_schedule...() returns 1
  <schedule to kblockd workqueue>
                                         request B inserted
                                         STORE hctx->ctx_map[1] bit marked
                                         kblockd_schedule...() returns 0
  *** WORK PENDING bit is cleared ***
  flush_busy_ctxs() is executed, but
  bit 1, set by CPU#1, is not observed

As a result request B pended forever.

This behaviour can be explained by speculative LOAD of hctx->ctx_map on
CPU#0, which is reordered with clear of PENDING bit and executed _before_
actual STORE of bit 1 on CPU#1.

The proper fix is an explicit full barrier <mfence>, which guarantees
that clear of PENDING bit is to be executed before all possible
speculative LOADS or STORES inside actual work function.

Signed-off-by: Roman Pen <roman.penyaev@profitbricks.com>
Cc: Gioh Kim <gi-oh.kim@profitbricks.com>
Cc: Michael Wang <yun.wang@profitbricks.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/workqueue.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 450c21fd0e6e..0ec05948a97b 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -649,6 +649,35 @@ static void set_work_pool_and_clear_pending(struct work_struct *work,
 	 */
 	smp_wmb();
 	set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
+	/*
+	 * The following mb guarantees that previous clear of a PENDING bit
+	 * will not be reordered with any speculative LOADS or STORES from
+	 * work->current_func, which is executed afterwards.  This possible
+	 * reordering can lead to a missed execution on attempt to qeueue
+	 * the same @work.  E.g. consider this case:
+	 *
+	 *   CPU#0                         CPU#1
+	 *   ----------------------------  --------------------------------
+	 *
+	 * 1  STORE event_indicated
+	 * 2  queue_work_on() {
+	 * 3    test_and_set_bit(PENDING)
+	 * 4 }                             set_..._and_clear_pending() {
+	 * 5                                 set_work_data() # clear bit
+	 * 6                                 smp_mb()
+	 * 7                               work->current_func() {
+	 * 8				      LOAD event_indicated
+	 *				   }
+	 *
+	 * Without an explicit full barrier speculative LOAD on line 8 can
+	 * be executed before CPU#0 does STORE on line 1.  If that happens,
+	 * CPU#0 observes the PENDING bit is still set and new execution of
+	 * a @work is not queued in a hope, that CPU#1 will eventually
+	 * finish the queued @work.  Meanwhile CPU#1 does not see
+	 * event_indicated is set, because speculative LOAD was executed
+	 * before actual STORE.
+	 */
+	smp_mb();
 }
 
 static void clear_work_data(struct work_struct *work)

From a4e25ff31103e7c9084904418cb95596e3e9d9cf Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Tue, 15 Mar 2016 14:53:32 -0700
Subject: [PATCH 547/797] slub: clean up code for kmem cgroup support to
 kmem_cache_free_bulk

commit 376bf125ac781d32e202760ed7deb1ae4ed35d31 upstream.

This change is primarily an attempt to make it easier to realize the
optimizations the compiler performs in-case CONFIG_MEMCG_KMEM is not
enabled.

Performance wise, even when CONFIG_MEMCG_KMEM is compiled in, the
overhead is zero.  This is because, as long as no process have enabled
kmem cgroups accounting, the assignment is replaced by asm-NOP
operations.  This is possible because memcg_kmem_enabled() uses a
static_key_false() construct.

It also helps readability as it avoid accessing the p[] array like:
p[size - 1] which "expose" that the array is processed backwards inside
helper function build_detached_freelist().

Lastly this also makes the code more robust, in error case like passing
NULL pointers in the array.  Which were previously handled before commit
033745189b1b ("slub: add missing kmem cgroup support to
kmem_cache_free_bulk").

Fixes: 033745189b1b ("slub: add missing kmem cgroup support to kmem_cache_free_bulk")
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Vladimir Davydov <vdavydov@virtuozzo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/slub.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 46997517406e..65d5f92d51d2 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2819,6 +2819,7 @@ struct detached_freelist {
 	void *tail;
 	void *freelist;
 	int cnt;
+	struct kmem_cache *s;
 };
 
 /*
@@ -2833,8 +2834,9 @@ struct detached_freelist {
  * synchronization primitive.  Look ahead in the array is limited due
  * to performance reasons.
  */
-static int build_detached_freelist(struct kmem_cache *s, size_t size,
-				   void **p, struct detached_freelist *df)
+static inline
+int build_detached_freelist(struct kmem_cache *s, size_t size,
+			    void **p, struct detached_freelist *df)
 {
 	size_t first_skipped_index = 0;
 	int lookahead = 3;
@@ -2850,8 +2852,11 @@ static int build_detached_freelist(struct kmem_cache *s, size_t size,
 	if (!object)
 		return 0;
 
+	/* Support for memcg, compiler can optimize this out */
+	df->s = cache_from_obj(s, object);
+
 	/* Start new detached freelist */
-	set_freepointer(s, object, NULL);
+	set_freepointer(df->s, object, NULL);
 	df->page = virt_to_head_page(object);
 	df->tail = object;
 	df->freelist = object;
@@ -2866,7 +2871,7 @@ static int build_detached_freelist(struct kmem_cache *s, size_t size,
 		/* df->page is always set at this point */
 		if (df->page == virt_to_head_page(object)) {
 			/* Opportunity build freelist */
-			set_freepointer(s, object, df->freelist);
+			set_freepointer(df->s, object, df->freelist);
 			df->freelist = object;
 			df->cnt++;
 			p[size] = NULL; /* mark object processed */
@@ -2885,25 +2890,20 @@ static int build_detached_freelist(struct kmem_cache *s, size_t size,
 	return first_skipped_index;
 }
 
-
 /* Note that interrupts must be enabled when calling this function. */
-void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
+void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
 {
 	if (WARN_ON(!size))
 		return;
 
 	do {
 		struct detached_freelist df;
-		struct kmem_cache *s;
-
-		/* Support for memcg */
-		s = cache_from_obj(orig_s, p[size - 1]);
 
 		size = build_detached_freelist(s, size, p, &df);
 		if (unlikely(!df.page))
 			continue;
 
-		slab_free(s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_);
+		slab_free(df.s, df.page, df.freelist, df.tail, df.cnt,_RET_IP_);
 	} while (likely(size));
 }
 EXPORT_SYMBOL(kmem_cache_free_bulk);

From d52097476caeb14f4d7e3417dda08220d2813cc4 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 21 Apr 2016 19:06:48 -0400
Subject: [PATCH 548/797] cgroup, cpuset: replace cpuset_post_attach_flush()
 with cgroup_subsys->post_attach callback

commit 5cf1cacb49aee39c3e02ae87068fc3c6430659b0 upstream.

Since e93ad19d0564 ("cpuset: make mm migration asynchronous"), cpuset
kicks off asynchronous NUMA node migration if necessary during task
migration and flushes it from cpuset_post_attach_flush() which is
called at the end of __cgroup_procs_write().  This is to avoid
performing migration with cgroup_threadgroup_rwsem write-locked which
can lead to deadlock through dependency on kworker creation.

memcg has a similar issue with charge moving, so let's convert it to
an official callback rather than the current one-off cpuset specific
function.  This patch adds cgroup_subsys->post_attach callback and
makes cpuset register cpuset_post_attach_flush() as its ->post_attach.

The conversion is mostly one-to-one except that the new callback is
called under cgroup_mutex.  This is to guarantee that no other
migration operations are started before ->post_attach callbacks are
finished.  cgroup_mutex is one of the outermost mutex in the system
and has never been and shouldn't be a problem.  We can add specialized
synchronization around __cgroup_procs_write() but I don't think
there's any noticeable benefit.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/cgroup-defs.h | 1 +
 include/linux/cpuset.h      | 6 ------
 kernel/cgroup.c             | 7 +++++--
 kernel/cpuset.c             | 4 ++--
 4 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index a7c7f74808a4..8da263299754 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -434,6 +434,7 @@ struct cgroup_subsys {
 	int (*can_attach)(struct cgroup_taskset *tset);
 	void (*cancel_attach)(struct cgroup_taskset *tset);
 	void (*attach)(struct cgroup_taskset *tset);
+	void (*post_attach)(void);
 	int (*can_fork)(struct task_struct *task, void **priv_p);
 	void (*cancel_fork)(struct task_struct *task, void *priv);
 	void (*fork)(struct task_struct *task, void *priv);
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index fea160ee5803..85a868ccb493 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -137,8 +137,6 @@ static inline void set_mems_allowed(nodemask_t nodemask)
 	task_unlock(current);
 }
 
-extern void cpuset_post_attach_flush(void);
-
 #else /* !CONFIG_CPUSETS */
 
 static inline bool cpusets_enabled(void) { return false; }
@@ -245,10 +243,6 @@ static inline bool read_mems_allowed_retry(unsigned int seq)
 	return false;
 }
 
-static inline void cpuset_post_attach_flush(void)
-{
-}
-
 #endif /* !CONFIG_CPUSETS */
 
 #endif /* _LINUX_CPUSET_H */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index dc94f8beb097..b0ea3aebc05a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2721,9 +2721,10 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
 				    size_t nbytes, loff_t off, bool threadgroup)
 {
 	struct task_struct *tsk;
+	struct cgroup_subsys *ss;
 	struct cgroup *cgrp;
 	pid_t pid;
-	int ret;
+	int ssid, ret;
 
 	if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
 		return -EINVAL;
@@ -2771,8 +2772,10 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
 	rcu_read_unlock();
 out_unlock_threadgroup:
 	percpu_up_write(&cgroup_threadgroup_rwsem);
+	for_each_subsys(ss, ssid)
+		if (ss->post_attach)
+			ss->post_attach();
 	cgroup_kn_unlock(of->kn);
-	cpuset_post_attach_flush();
 	return ret ?: nbytes;
 }
 
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 2ade632197d5..11eaf14b52c2 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -57,7 +57,6 @@
 #include <asm/uaccess.h>
 #include <linux/atomic.h>
 #include <linux/mutex.h>
-#include <linux/workqueue.h>
 #include <linux/cgroup.h>
 #include <linux/wait.h>
 
@@ -1015,7 +1014,7 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
 	}
 }
 
-void cpuset_post_attach_flush(void)
+static void cpuset_post_attach(void)
 {
 	flush_workqueue(cpuset_migrate_mm_wq);
 }
@@ -2083,6 +2082,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
 	.can_attach	= cpuset_can_attach,
 	.cancel_attach	= cpuset_cancel_attach,
 	.attach		= cpuset_attach,
+	.post_attach	= cpuset_post_attach,
 	.bind		= cpuset_bind,
 	.legacy_cftypes	= files,
 	.early_init	= 1,

From 52526076a5a686906a0acc22d27530ecb9364d84 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 21 Apr 2016 19:09:02 -0400
Subject: [PATCH 549/797] memcg: relocate charge moving from ->attach to
 ->post_attach

commit 264a0ae164bc0e9144bebcd25ff030d067b1a878 upstream.

Hello,

So, this ended up a lot simpler than I originally expected.  I tested
it lightly and it seems to work fine.  Petr, can you please test these
two patches w/o the lru drain drop patch and see whether the problem
is gone?

Thanks.
------ 8< ------
If charge moving is used, memcg performs relabeling of the affected
pages from its ->attach callback which is called under both
cgroup_threadgroup_rwsem and thus can't create new kthreads.  This is
fragile as various operations may depend on workqueues making forward
progress which relies on the ability to create new kthreads.

There's no reason to perform charge moving from ->attach which is deep
in the task migration path.  Move it to ->post_attach which is called
after the actual migration is finished and cgroup_threadgroup_rwsem is
dropped.

* move_charge_struct->mm is added and ->can_attach is now responsible
  for pinning and recording the target mm.  mem_cgroup_clear_mc() is
  updated accordingly.  This also simplifies mem_cgroup_move_task().

* mem_cgroup_move_task() is now called from ->post_attach instead of
  ->attach.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@kernel.org>
Debugged-and-tested-by: Petr Mladek <pmladek@suse.com>
Reported-by: Cyril Hrubis <chrubis@suse.cz>
Reported-by: Johannes Weiner <hannes@cmpxchg.org>
Fixes: 1ed1328792ff ("sched, cgroup: replace signal_struct->group_rwsem with a global percpu_rwsem")
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/memcontrol.c | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index fc0bcc41d57f..6ba4dd988e2e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -196,6 +196,7 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
 /* "mc" and its members are protected by cgroup_mutex */
 static struct move_charge_struct {
 	spinlock_t	  lock; /* for from, to */
+	struct mm_struct  *mm;
 	struct mem_cgroup *from;
 	struct mem_cgroup *to;
 	unsigned long flags;
@@ -4800,6 +4801,8 @@ static void __mem_cgroup_clear_mc(void)
 
 static void mem_cgroup_clear_mc(void)
 {
+	struct mm_struct *mm = mc.mm;
+
 	/*
 	 * we must clear moving_task before waking up waiters at the end of
 	 * task migration.
@@ -4809,7 +4812,10 @@ static void mem_cgroup_clear_mc(void)
 	spin_lock(&mc.lock);
 	mc.from = NULL;
 	mc.to = NULL;
+	mc.mm = NULL;
 	spin_unlock(&mc.lock);
+
+	mmput(mm);
 }
 
 static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
@@ -4866,6 +4872,7 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
 		VM_BUG_ON(mc.moved_swap);
 
 		spin_lock(&mc.lock);
+		mc.mm = mm;
 		mc.from = from;
 		mc.to = memcg;
 		mc.flags = move_flags;
@@ -4875,8 +4882,9 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
 		ret = mem_cgroup_precharge_mc(mm);
 		if (ret)
 			mem_cgroup_clear_mc();
+	} else {
+		mmput(mm);
 	}
-	mmput(mm);
 	return ret;
 }
 
@@ -4985,11 +4993,11 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
 	return ret;
 }
 
-static void mem_cgroup_move_charge(struct mm_struct *mm)
+static void mem_cgroup_move_charge(void)
 {
 	struct mm_walk mem_cgroup_move_charge_walk = {
 		.pmd_entry = mem_cgroup_move_charge_pte_range,
-		.mm = mm,
+		.mm = mc.mm,
 	};
 
 	lru_add_drain_all();
@@ -5001,7 +5009,7 @@ static void mem_cgroup_move_charge(struct mm_struct *mm)
 	atomic_inc(&mc.from->moving_account);
 	synchronize_rcu();
 retry:
-	if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
+	if (unlikely(!down_read_trylock(&mc.mm->mmap_sem))) {
 		/*
 		 * Someone who are holding the mmap_sem might be waiting in
 		 * waitq. So we cancel all extra charges, wake up all waiters,
@@ -5018,23 +5026,16 @@ static void mem_cgroup_move_charge(struct mm_struct *mm)
 	 * additional charge, the page walk just aborts.
 	 */
 	walk_page_range(0, ~0UL, &mem_cgroup_move_charge_walk);
-	up_read(&mm->mmap_sem);
+	up_read(&mc.mm->mmap_sem);
 	atomic_dec(&mc.from->moving_account);
 }
 
-static void mem_cgroup_move_task(struct cgroup_taskset *tset)
+static void mem_cgroup_move_task(void)
 {
-	struct cgroup_subsys_state *css;
-	struct task_struct *p = cgroup_taskset_first(tset, &css);
-	struct mm_struct *mm = get_task_mm(p);
-
-	if (mm) {
-		if (mc.to)
-			mem_cgroup_move_charge(mm);
-		mmput(mm);
-	}
-	if (mc.to)
+	if (mc.to) {
+		mem_cgroup_move_charge();
 		mem_cgroup_clear_mc();
+	}
 }
 #else	/* !CONFIG_MMU */
 static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
@@ -5044,7 +5045,7 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
 static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset)
 {
 }
-static void mem_cgroup_move_task(struct cgroup_taskset *tset)
+static void mem_cgroup_move_task(void)
 {
 }
 #endif
@@ -5258,7 +5259,7 @@ struct cgroup_subsys memory_cgrp_subsys = {
 	.css_reset = mem_cgroup_css_reset,
 	.can_attach = mem_cgroup_can_attach,
 	.cancel_attach = mem_cgroup_cancel_attach,
-	.attach = mem_cgroup_move_task,
+	.post_attach = mem_cgroup_move_task,
 	.bind = mem_cgroup_bind,
 	.dfl_cftypes = memory_files,
 	.legacy_cftypes = mem_cgroup_legacy_files,

From be591a683e3b4cc58466e08cd6b5e4a71c02b19a Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <koct9i@gmail.com>
Date: Thu, 28 Apr 2016 16:18:32 -0700
Subject: [PATCH 550/797] mm/huge_memory: replace VM_NO_THP VM_BUG_ON with
 actual VMA check

commit 3486b85a29c1741db99d0c522211c82d2b7a56d0 upstream.

Khugepaged detects own VMAs by checking vm_file and vm_ops but this way
it cannot distinguish private /dev/zero mappings from other special
mappings like /dev/hpet which has no vm_ops and popultes PTEs in mmap.

This fixes false-positive VM_BUG_ON and prevents installing THP where
they are not expected.

Link: http://lkml.kernel.org/r/CACT4Y+ZmuZMV5CjSFOeXviwQdABAgT7T+StKfTqan9YDtgEi5g@mail.gmail.com
Fixes: 78f11a255749 ("mm: thp: fix /dev/zero MAP_PRIVATE and vm_flags cleanups")
Signed-off-by: Konstantin Khlebnikov <koct9i@gmail.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/huge_memory.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 62fe06bb7d04..530e6427f823 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2134,10 +2134,9 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
 		 * page fault if needed.
 		 */
 		return 0;
-	if (vma->vm_ops)
+	if (vma->vm_ops || (vm_flags & VM_NO_THP))
 		/* khugepaged not yet working on file or special mappings */
 		return 0;
-	VM_BUG_ON_VMA(vm_flags & VM_NO_THP, vma);
 	hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
 	hend = vma->vm_end & HPAGE_PMD_MASK;
 	if (hstart < hend)
@@ -2498,8 +2497,7 @@ static bool hugepage_vma_check(struct vm_area_struct *vma)
 		return false;
 	if (is_vma_temporary_stack(vma))
 		return false;
-	VM_BUG_ON_VMA(vma->vm_flags & VM_NO_THP, vma);
-	return true;
+	return !(vma->vm_flags & VM_NO_THP);
 }
 
 static void collapse_huge_page(struct mm_struct *mm,

From e513b90a9aef91e6399decb8e9592f2d75f7ebad Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Thu, 28 Apr 2016 16:18:35 -0700
Subject: [PATCH 551/797] numa: fix /proc/<pid>/numa_maps for THP

commit 28093f9f34cedeaea0f481c58446d9dac6dd620f upstream.

In gather_pte_stats() a THP pmd is cast into a pte, which is wrong
because the layouts may differ depending on the architecture.  On s390
this will lead to inaccurate numa_maps accounting in /proc because of
misguided pte_present() and pte_dirty() checks on the fake pte.

On other architectures pte_present() and pte_dirty() may work by chance,
but there may be an issue with direct-access (dax) mappings w/o
underlying struct pages when HAVE_PTE_SPECIAL is set and THP is
available.  In vm_normal_page() the fake pte will be checked with
pte_special() and because there is no "special" bit in a pmd, this will
always return false and the VM_PFNMAP | VM_MIXEDMAP checking will be
skipped.  On dax mappings w/o struct pages, an invalid struct page
pointer would then be returned that can crash the kernel.

This patch fixes the numa_maps THP handling by introducing new "_pmd"
variants of the can_gather_numa_stats() and vm_normal_page() functions.

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Jerome Marchand <jmarchan@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/proc/task_mmu.c | 33 ++++++++++++++++++++++++++++++---
 include/linux/mm.h |  2 ++
 mm/memory.c        | 40 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 72 insertions(+), 3 deletions(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 09cd3edde08a..f6478301db00 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1435,6 +1435,32 @@ static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma,
 	return page;
 }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static struct page *can_gather_numa_stats_pmd(pmd_t pmd,
+					      struct vm_area_struct *vma,
+					      unsigned long addr)
+{
+	struct page *page;
+	int nid;
+
+	if (!pmd_present(pmd))
+		return NULL;
+
+	page = vm_normal_page_pmd(vma, addr, pmd);
+	if (!page)
+		return NULL;
+
+	if (PageReserved(page))
+		return NULL;
+
+	nid = page_to_nid(page);
+	if (!node_isset(nid, node_states[N_MEMORY]))
+		return NULL;
+
+	return page;
+}
+#endif
+
 static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
 		unsigned long end, struct mm_walk *walk)
 {
@@ -1444,13 +1470,13 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
 	pte_t *orig_pte;
 	pte_t *pte;
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
-		pte_t huge_pte = *(pte_t *)pmd;
 		struct page *page;
 
-		page = can_gather_numa_stats(huge_pte, vma, addr);
+		page = can_gather_numa_stats_pmd(*pmd, vma, addr);
 		if (page)
-			gather_stats(page, md, pte_dirty(huge_pte),
+			gather_stats(page, md, pmd_dirty(*pmd),
 				     HPAGE_PMD_SIZE/PAGE_SIZE);
 		spin_unlock(ptl);
 		return 0;
@@ -1458,6 +1484,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
 
 	if (pmd_trans_unstable(pmd))
 		return 0;
+#endif
 	orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
 	do {
 		struct page *page = can_gather_numa_stats(*pte, vma, addr);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 00bad7793788..fb8b20e5d021 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1084,6 +1084,8 @@ struct zap_details {
 
 struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
 		pte_t pte);
+struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
+				pmd_t pmd);
 
 int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
 		unsigned long size);
diff --git a/mm/memory.c b/mm/memory.c
index b80bf4746b67..76dcee317714 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -797,6 +797,46 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
 	return pfn_to_page(pfn);
 }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
+				pmd_t pmd)
+{
+	unsigned long pfn = pmd_pfn(pmd);
+
+	/*
+	 * There is no pmd_special() but there may be special pmds, e.g.
+	 * in a direct-access (dax) mapping, so let's just replicate the
+	 * !HAVE_PTE_SPECIAL case from vm_normal_page() here.
+	 */
+	if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
+		if (vma->vm_flags & VM_MIXEDMAP) {
+			if (!pfn_valid(pfn))
+				return NULL;
+			goto out;
+		} else {
+			unsigned long off;
+			off = (addr - vma->vm_start) >> PAGE_SHIFT;
+			if (pfn == vma->vm_pgoff + off)
+				return NULL;
+			if (!is_cow_mapping(vma->vm_flags))
+				return NULL;
+		}
+	}
+
+	if (is_zero_pfn(pfn))
+		return NULL;
+	if (unlikely(pfn > highest_memmap_pfn))
+		return NULL;
+
+	/*
+	 * NOTE! We still have PageReserved() pages in the page tables.
+	 * eg. VDSO mappings can cause them to exist.
+	 */
+out:
+	return pfn_to_page(pfn);
+}
+#endif
+
 /*
  * copy one vm_area from one task to the other. Assumes the page tables
  * already present in the new task to be cleared in the whole range

From 87c855f150be9317b9b6ad82c1611ed8d577d986 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Thu, 28 Apr 2016 16:18:38 -0700
Subject: [PATCH 552/797] mm: vmscan: reclaim highmem zone if buffer_heads is
 over limit

commit 7bf52fb891b64b8d61caf0b82060adb9db761aec upstream.

We have been reclaimed highmem zone if buffer_heads is over limit but
commit 6b4f7799c6a5 ("mm: vmscan: invoke slab shrinkers from
shrink_zone()") changed the behavior so it doesn't reclaim highmem zone
although buffer_heads is over the limit.  This patch restores the logic.

Fixes: 6b4f7799c6a5 ("mm: vmscan: invoke slab shrinkers from shrink_zone()")
Signed-off-by: Minchan Kim <minchan@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/vmscan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 2aec4241b42a..0c114e2b01d3 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2534,7 +2534,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
 		sc->gfp_mask |= __GFP_HIGHMEM;
 
 	for_each_zone_zonelist_nodemask(zone, z, zonelist,
-					requested_highidx, sc->nodemask) {
+					gfp_zone(sc->gfp_mask), sc->nodemask) {
 		enum zone_type classzone_idx;
 
 		if (!populated_zone(zone))

From 36abe7272a248a7e47a4cec8d8ec9c76ef387bac Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Thu, 28 Apr 2016 16:18:44 -0700
Subject: [PATCH 553/797] mm/hwpoison: fix wrong num_poisoned_pages accounting

commit d7e69488bd04de165667f6bc741c1c0ec6042ab9 upstream.

Currently, migration code increses num_poisoned_pages on *failed*
migration page as well as successfully migrated one at the trial of
memory-failure.  It will make the stat wrong.  As well, it marks the
page as PG_HWPoison even if the migration trial failed.  It would mean
we cannot recover the corrupted page using memory-failure facility.

This patches fixes it.

Signed-off-by: Minchan Kim <minchan@kernel.org>
Reported-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/migrate.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index 6d17e0ab42d4..bbeb0b71fcf4 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -963,7 +963,13 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
 		dec_zone_page_state(page, NR_ISOLATED_ANON +
 				page_is_file_cache(page));
 		/* Soft-offlined page shouldn't go through lru cache list */
-		if (reason == MR_MEMORY_FAILURE) {
+		if (reason == MR_MEMORY_FAILURE && rc == MIGRATEPAGE_SUCCESS) {
+			/*
+			 * With this release, we free successfully migrated
+			 * page and set PG_HWPoison on just freed page
+			 * intentionally. Although it's rather weird, it's how
+			 * HWPoison flag works at the moment.
+			 */
 			put_page(page);
 			if (!test_set_page_hwpoison(page))
 				num_poisoned_pages_inc();

From 3c6266d57c4c4fa02588070347acf21b610bbd96 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 21 Jan 2016 15:32:15 -0500
Subject: [PATCH 554/797] cgroup: make sure a parent css isn't freed before its
 children

commit 8bb5ef79bc0f4016ecf79e8dce6096a3c63603e4 upstream.

There are three subsystem callbacks in css shutdown path -
css_offline(), css_released() and css_free().  Except for
css_released(), cgroup core didn't guarantee the order of invocation.
css_offline() or css_free() could be called on a parent css before its
children.  This behavior is unexpected and led to bugs in cpu and
memory controller.

The previous patch updated ordering for css_offline() which fixes the
cpu controller issue.  While there currently isn't a known bug caused
by misordering of css_free() invocations, let's fix it too for
consistency.

css_free() ordering can be trivially fixed by moving putting of the
parent css below css_free() invocation.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/cgroup.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index b0ea3aebc05a..1c9d701f7a72 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4692,14 +4692,15 @@ static void css_free_work_fn(struct work_struct *work)
 
 	if (ss) {
 		/* css free path */
+		struct cgroup_subsys_state *parent = css->parent;
 		int id = css->id;
 
-		if (css->parent)
-			css_put(css->parent);
-
 		ss->css_free(css);
 		cgroup_idr_remove(&ss->css_idr, id);
 		cgroup_put(cgrp);
+
+		if (parent)
+			css_put(parent);
 	} else {
 		/* cgroup free path */
 		atomic_dec(&cgrp->root->nr_cgrps);

From 4a1bb501e4b65908b102f0b371b0621ff18ad5c3 Mon Sep 17 00:00:00 2001
From: Ignat Korchagin <ignat.korchagin@gmail.com>
Date: Thu, 17 Mar 2016 18:00:29 +0000
Subject: [PATCH 555/797] USB: usbip: fix potential out-of-bounds write

commit b348d7dddb6c4fbfc810b7a0626e8ec9e29f7cbb upstream.

Fix potential out-of-bounds write to urb->transfer_buffer
usbip handles network communication directly in the kernel. When receiving a
packet from its peer, usbip code parses headers according to protocol. As
part of this parsing urb->actual_length is filled. Since the input for
urb->actual_length comes from the network, it should be treated as untrusted.
Any entity controlling the network may put any value in the input and the
preallocated urb->transfer_buffer may not be large enough to hold the data.
Thus, the malicious entity is able to write arbitrary data to kernel memory.

Signed-off-by: Ignat Korchagin <ignat.korchagin@gmail.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/usbip/usbip_common.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c
index facaaf003f19..e40da7759a0e 100644
--- a/drivers/usb/usbip/usbip_common.c
+++ b/drivers/usb/usbip/usbip_common.c
@@ -741,6 +741,17 @@ int usbip_recv_xbuff(struct usbip_device *ud, struct urb *urb)
 	if (!(size > 0))
 		return 0;
 
+	if (size > urb->transfer_buffer_length) {
+		/* should not happen, probably malicious packet */
+		if (ud->side == USBIP_STUB) {
+			usbip_event_add(ud, SDEV_EVENT_ERROR_TCP);
+			return 0;
+		} else {
+			usbip_event_add(ud, VDEV_EVENT_ERROR_TCP);
+			return -EPIPE;
+		}
+	}
+
 	ret = usbip_recv(ud->tcp_socket, urb->transfer_buffer, size);
 	if (ret != size) {
 		dev_err(&urb->dev->dev, "recv xbuf, %d\n", ret);

From 3a4b3d187dba0255cbbb749f64c3b71f8105f44f Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Sun, 3 Apr 2016 16:15:00 -0300
Subject: [PATCH 556/797] videobuf2-core: Check user space planes array in
 dqbuf

commit e7e0c3e26587749b62d17b9dd0532874186c77f7 upstream.

The number of planes in videobuf2 is specific to a buffer. In order to
verify that the planes array provided by the user is long enough, a new
vb2_buf_op is required.

Call __verify_planes_array() when the dequeued buffer is known. Return an
error to the caller if there was one, otherwise remove the buffer from the
done list.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/v4l2-core/videobuf2-core.c | 10 +++++-----
 include/media/videobuf2-core.h           |  1 +
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c
index 33bdd81065e8..11f39791ec33 100644
--- a/drivers/media/v4l2-core/videobuf2-core.c
+++ b/drivers/media/v4l2-core/videobuf2-core.c
@@ -1502,7 +1502,7 @@ static int __vb2_wait_for_done_vb(struct vb2_queue *q, int nonblocking)
  * Will sleep if required for nonblocking == false.
  */
 static int __vb2_get_done_vb(struct vb2_queue *q, struct vb2_buffer **vb,
-				int nonblocking)
+			     void *pb, int nonblocking)
 {
 	unsigned long flags;
 	int ret;
@@ -1523,10 +1523,10 @@ static int __vb2_get_done_vb(struct vb2_queue *q, struct vb2_buffer **vb,
 	/*
 	 * Only remove the buffer from done_list if v4l2_buffer can handle all
 	 * the planes.
-	 * Verifying planes is NOT necessary since it already has been checked
-	 * before the buffer is queued/prepared. So it can never fail.
 	 */
-	list_del(&(*vb)->done_entry);
+	ret = call_bufop(q, verify_planes_array, *vb, pb);
+	if (!ret)
+		list_del(&(*vb)->done_entry);
 	spin_unlock_irqrestore(&q->done_lock, flags);
 
 	return ret;
@@ -1604,7 +1604,7 @@ int vb2_core_dqbuf(struct vb2_queue *q, void *pb, bool nonblocking)
 	struct vb2_buffer *vb = NULL;
 	int ret;
 
-	ret = __vb2_get_done_vb(q, &vb, nonblocking);
+	ret = __vb2_get_done_vb(q, &vb, pb, nonblocking);
 	if (ret < 0)
 		return ret;
 
diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h
index 647ebfe5174f..d4227a8a2a23 100644
--- a/include/media/videobuf2-core.h
+++ b/include/media/videobuf2-core.h
@@ -363,6 +363,7 @@ struct vb2_ops {
 };
 
 struct vb2_buf_ops {
+	int (*verify_planes_array)(struct vb2_buffer *vb, const void *pb);
 	int (*fill_user_buffer)(struct vb2_buffer *vb, void *pb);
 	int (*fill_vb2_buffer)(struct vb2_buffer *vb, const void *pb,
 				struct vb2_plane *planes);

From 19a4e46b4513bab7d6b368175be2e24ad4665e5a Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Sun, 3 Apr 2016 16:31:03 -0300
Subject: [PATCH 557/797] videobuf2-v4l2: Verify planes array in buffer
 dequeueing

commit 2c1f6951a8a82e6de0d82b1158b5e493fc6c54ab upstream.

When a buffer is being dequeued using VIDIOC_DQBUF IOCTL, the exact buffer
which will be dequeued is not known until the buffer has been removed from
the queue. The number of planes is specific to a buffer, not to the queue.

This does lead to the situation where multi-plane buffers may be requested
and queued with n planes, but VIDIOC_DQBUF IOCTL may be passed an argument
struct with fewer planes.

__fill_v4l2_buffer() however uses the number of planes from the dequeued
videobuf2 buffer, overwriting kernel memory (the m.planes array allocated
in video_usercopy() in v4l2-ioctl.c)  if the user provided fewer
planes than the dequeued buffer had. Oops!

Fixes: b0e0e1f83de3 ("[media] media: videobuf2: Prepare to divide videobuf2")

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/v4l2-core/videobuf2-v4l2.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/media/v4l2-core/videobuf2-v4l2.c b/drivers/media/v4l2-core/videobuf2-v4l2.c
index 502984c724ff..6c441be8f893 100644
--- a/drivers/media/v4l2-core/videobuf2-v4l2.c
+++ b/drivers/media/v4l2-core/videobuf2-v4l2.c
@@ -67,6 +67,11 @@ static int __verify_planes_array(struct vb2_buffer *vb, const struct v4l2_buffer
 	return 0;
 }
 
+static int __verify_planes_array_core(struct vb2_buffer *vb, const void *pb)
+{
+	return __verify_planes_array(vb, pb);
+}
+
 /**
  * __verify_length() - Verify that the bytesused value for each plane fits in
  * the plane length and that the data offset doesn't exceed the bytesused value.
@@ -432,6 +437,7 @@ static int __fill_vb2_buffer(struct vb2_buffer *vb,
 }
 
 static const struct vb2_buf_ops v4l2_buf_ops = {
+	.verify_planes_array	= __verify_planes_array_core,
 	.fill_user_buffer	= __fill_v4l2_buffer,
 	.fill_vb2_buffer	= __fill_vb2_buffer,
 	.set_timestamp		= __set_timestamp,

From 34af67eb941ae5371110c9adbd5392c7a3aa841e Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 2 May 2016 11:14:34 -0700
Subject: [PATCH 558/797] Revert "regulator: core: Fix nested locking of
 supplies"

This reverts commit b1999fa6e8145305a6c8bda30ea20783717708e6 which was
commit 70a7fb80e85ae7f78f8e90cec3fbd862ea6a4d4b upstream.

It causes run-time breakage in the 4.4-stable tree and more patches are
needed to be applied first before this one in order to resolve the
issue.

Reported-by: Guenter Roeck <linux@roeck-us.net>
Cc: Mark Brown <broonie@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Thierry Reding <treding@nvidia.com>
Cc: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/regulator/core.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 7b94b8ee087c..c70017d5f74b 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -132,14 +132,6 @@ static bool have_full_constraints(void)
 	return has_full_constraints || of_have_populated_dt();
 }
 
-static inline struct regulator_dev *rdev_get_supply(struct regulator_dev *rdev)
-{
-	if (rdev && rdev->supply)
-		return rdev->supply->rdev;
-
-	return NULL;
-}
-
 /**
  * regulator_lock_supply - lock a regulator and its supplies
  * @rdev:         regulator source
@@ -148,7 +140,7 @@ static void regulator_lock_supply(struct regulator_dev *rdev)
 {
 	int i;
 
-	for (i = 0; rdev->supply; rdev = rdev_get_supply(rdev), i++)
+	for (i = 0; rdev->supply; rdev = rdev->supply->rdev, i++)
 		mutex_lock_nested(&rdev->mutex, i);
 }
 

From f500da32a1663c1bb2587ff04be08d5220b3afca Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 27 Nov 2015 14:46:41 +0100
Subject: [PATCH 559/797] regulator: core: fix regulator_lock_supply regression

commit bb41897e38c53458a88b271f2fbcd905ee1f9584 upstream.

As noticed by Geert Uytterhoeven, my patch to avoid a harmless build warning
in regulator_lock_supply() was total crap and introduced a real bug:

> [ BUG: bad unlock balance detected! ]
> kworker/u4:0/6 is trying to release lock (&rdev->mutex) at:
> [<c0247b84>] regulator_set_voltage+0x38/0x50

we still lock the regulator supplies, but not the actual regulators,
so we are missing a lock, and the unlock is unbalanced.

This rectifies it by first locking the regulator device itself before
using the same loop as before to lock its supplies.

Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 716fec9d1965 ("[SUBMITTED] regulator: core: avoid unused variable warning")
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/regulator/core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index c70017d5f74b..daffff83ced2 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -140,7 +140,8 @@ static void regulator_lock_supply(struct regulator_dev *rdev)
 {
 	int i;
 
-	for (i = 0; rdev->supply; rdev = rdev->supply->rdev, i++)
+	mutex_lock(&rdev->mutex);
+	for (i = 1; rdev->supply; rdev = rdev->supply->rdev, i++)
 		mutex_lock_nested(&rdev->mutex, i);
 }
 

From 29c9f634cb132107df3e4f07c8e48b35d04b527b Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 1 Dec 2015 15:51:52 +0000
Subject: [PATCH 560/797] regulator: core: Ensure we lock all regulators

commit 49a6bb7a1c0963f260e4b0dcc2c0e56ec65a28b2 upstream.

The latest workaround for the lockdep interface's not using the second
argument of mutex_lock_nested() changed the loop missed locking the last
regulator due to a thinko with the loop termination condition exiting
one regulator too soon.

Reported-by: Tyler Baker <tyler.baker@linaro.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/regulator/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index daffff83ced2..f71db02fcb71 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -141,7 +141,7 @@ static void regulator_lock_supply(struct regulator_dev *rdev)
 	int i;
 
 	mutex_lock(&rdev->mutex);
-	for (i = 1; rdev->supply; rdev = rdev->supply->rdev, i++)
+	for (i = 1; rdev; rdev = rdev->supply->rdev, i++)
 		mutex_lock_nested(&rdev->mutex, i);
 }
 

From 5a58f809d731c23c0b898d2021903db8dee4466f Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 2 Dec 2015 16:54:50 +0100
Subject: [PATCH 561/797] regulator: core: Fix nested locking of supplies

commit 70a7fb80e85ae7f78f8e90cec3fbd862ea6a4d4b upstream.

Commit fa731ac7ea04 ("regulator: core: avoid unused variable warning")
introduced a subtle change in how supplies are locked. Where previously
code was always locking the regulator of the current iteration, the new
implementation only locks the regulator if it has a supply. For any
given power tree that means that the root will never get locked.

On the other hand the regulator_unlock_supply() will still release all
the locks, which in turn causes the lock debugging code to warn about a
mutex being unlocked which wasn't locked.

Cc: Mark Brown <broonie@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Fixes: fa731ac7ea04 ("regulator: core: avoid unused variable warning")
Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/regulator/core.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index f71db02fcb71..732ac71b82cd 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -132,6 +132,14 @@ static bool have_full_constraints(void)
 	return has_full_constraints || of_have_populated_dt();
 }
 
+static inline struct regulator_dev *rdev_get_supply(struct regulator_dev *rdev)
+{
+	if (rdev && rdev->supply)
+		return rdev->supply->rdev;
+
+	return NULL;
+}
+
 /**
  * regulator_lock_supply - lock a regulator and its supplies
  * @rdev:         regulator source
@@ -140,8 +148,7 @@ static void regulator_lock_supply(struct regulator_dev *rdev)
 {
 	int i;
 
-	mutex_lock(&rdev->mutex);
-	for (i = 1; rdev; rdev = rdev->supply->rdev, i++)
+	for (i = 0; rdev; rdev = rdev_get_supply(rdev), i++)
 		mutex_lock_nested(&rdev->mutex, i);
 }
 

From 23a67ddd4636584816e2dc2c6393511d55944974 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 1 Feb 2016 15:11:28 +0100
Subject: [PATCH 562/797] locking/mcs: Fix mcs_spin_lock() ordering

commit 920c720aa5aa3900a7f1689228fdfc2580a91e7e upstream.

Similar to commit b4b29f94856a ("locking/osq: Fix ordering of node
initialisation in osq_lock") the use of xchg_acquire() is
fundamentally broken with MCS like constructs.

Furthermore, it turns out we rely on the global transitivity of this
operation because the unlock path observes the pointer with a
READ_ONCE(), not an smp_load_acquire().

This is non-critical because the MCS code isn't actually used and
mostly serves as documentation, a stepping stone to the more complex
things we've build on top of the idea.

Reported-by: Andrea Parri <parri.andrea@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Fixes: 3552a07a9c4a ("locking/mcs: Use acquire/release semantics")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/locking/mcs_spinlock.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
index 5b9102a47ea5..c835270f0c2f 100644
--- a/kernel/locking/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.h
@@ -67,7 +67,13 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 	node->locked = 0;
 	node->next   = NULL;
 
-	prev = xchg_acquire(lock, node);
+	/*
+	 * We rely on the full barrier with global transitivity implied by the
+	 * below xchg() to order the initialization stores above against any
+	 * observation of @node. And to provide the ACQUIRE ordering associated
+	 * with a LOCK primitive.
+	 */
+	prev = xchg(lock, node);
 	if (likely(prev == NULL)) {
 		/*
 		 * Lock acquired, don't need to set node->locked to 1. Threads

From 791e8462e48c6259375f59acf905b05884d648c3 Mon Sep 17 00:00:00 2001
From: Huibin Hong <huibin.hong@rock-chips.com>
Date: Wed, 24 Feb 2016 18:00:04 +0800
Subject: [PATCH 563/797] spi/rockchip: Make sure spi clk is on in
 rockchip_spi_set_cs

commit b920cc3191d7612f26f36ee494e05b5ffd9044c0 upstream.

Rockchip_spi_set_cs could be called by spi_setup, but
spi_setup may be called by device driver after runtime suspend.
Then the spi clock is closed, rockchip_spi_set_cs may access the
spi registers, which causes cpu block in some socs.

Fixes: 64e36824b32 ("spi/rockchip: add driver for Rockchip RK3xxx")
Signed-off-by: Huibin Hong <huibin.hong@rock-chips.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/spi/spi-rockchip.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index 79a8bc4f6cec..035767c02072 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c
@@ -265,7 +265,10 @@ static inline u32 rx_max(struct rockchip_spi *rs)
 static void rockchip_spi_set_cs(struct spi_device *spi, bool enable)
 {
 	u32 ser;
-	struct rockchip_spi *rs = spi_master_get_devdata(spi->master);
+	struct spi_master *master = spi->master;
+	struct rockchip_spi *rs = spi_master_get_devdata(master);
+
+	pm_runtime_get_sync(rs->dev);
 
 	ser = readl_relaxed(rs->regs + ROCKCHIP_SPI_SER) & SER_MASK;
 
@@ -290,6 +293,8 @@ static void rockchip_spi_set_cs(struct spi_device *spi, bool enable)
 		ser &= ~(1 << spi->chip_select);
 
 	writel_relaxed(ser, rs->regs + ROCKCHIP_SPI_SER);
+
+	pm_runtime_put_sync(rs->dev);
 }
 
 static int rockchip_spi_prepare_message(struct spi_master *master,

From fd66dc5d5123672069acba5cb5856e7b0aa9e6d4 Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vz@mleia.com>
Date: Wed, 9 Mar 2016 03:21:29 +0200
Subject: [PATCH 564/797] irqchip/sunxi-nmi: Fix error check of
 of_io_request_and_map()

commit cfe199afefe6201e998ddc07102fc1fdb55f196c upstream.

The of_io_request_and_map() returns a valid pointer in iomem region or
ERR_PTR(), check for NULL always fails and may cause a NULL pointer
dereference on error path.

Fixes: 0e841b04c829 ("irqchip/sunxi-nmi: Switch to of_io_request_and_map() from of_iomap()")
Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Chen-Yu Tsai <wens@csie.org>
Cc: Maxime Ripard <maxime.ripard@free-electrons.com>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1457486489-10189-1-git-send-email-vz@mleia.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/irqchip/irq-sunxi-nmi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/irqchip/irq-sunxi-nmi.c b/drivers/irqchip/irq-sunxi-nmi.c
index 4ef178078e5b..1254e98f6b57 100644
--- a/drivers/irqchip/irq-sunxi-nmi.c
+++ b/drivers/irqchip/irq-sunxi-nmi.c
@@ -154,9 +154,9 @@ static int __init sunxi_sc_nmi_irq_init(struct device_node *node,
 
 	gc = irq_get_domain_generic_chip(domain, 0);
 	gc->reg_base = of_io_request_and_map(node, 0, of_node_full_name(node));
-	if (!gc->reg_base) {
+	if (IS_ERR(gc->reg_base)) {
 		pr_err("unable to map resource\n");
-		ret = -ENOMEM;
+		ret = PTR_ERR(gc->reg_base);
 		goto fail_irqd_remove;
 	}
 

From e60711a18bccf26d0159b263dfb05b374532caf5 Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vz@mleia.com>
Date: Wed, 9 Mar 2016 03:21:40 +0200
Subject: [PATCH 565/797] irqchip/mxs: Fix error check of
 of_io_request_and_map()

commit edf8fcdc6b254236be005851af35ea5e826e7e09 upstream.

The of_io_request_and_map() returns a valid pointer in iomem region or
ERR_PTR(), check for NULL always fails and may cause a NULL pointer
dereference on error path.

Fixes: 25e34b44313b ("irqchip/mxs: Prepare driver for hardware with different offsets")
Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Oleksij Rempel <linux@rempel-privat.de>
Cc: Sascha Hauer <kernel@pengutronix.de>
Cc: Shawn Guo <shawnguo@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1457486500-10237-1-git-send-email-vz@mleia.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/irqchip/irq-mxs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-mxs.c b/drivers/irqchip/irq-mxs.c
index efe50845939d..17304705f2cf 100644
--- a/drivers/irqchip/irq-mxs.c
+++ b/drivers/irqchip/irq-mxs.c
@@ -183,7 +183,7 @@ static void __iomem * __init icoll_init_iobase(struct device_node *np)
 	void __iomem *icoll_base;
 
 	icoll_base = of_io_request_and_map(np, 0, np->name);
-	if (!icoll_base)
+	if (IS_ERR(icoll_base))
 		panic("%s: unable to map resource", np->full_name);
 	return icoll_base;
 }

From 72291d619e2928556db1d446f0b4afff330276a7 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 16 Feb 2016 15:53:11 +0100
Subject: [PATCH 566/797] regulator: s5m8767: fix get_register() error handling

commit e07ff9434167981c993a26d2edbbcb8e13801dbb upstream.

The s5m8767_pmic_probe() function calls s5m8767_get_register() to
read data without checking the return code, which produces a compile-time
warning when that data is accessed:

drivers/regulator/s5m8767.c: In function 's5m8767_pmic_probe':
drivers/regulator/s5m8767.c:924:7: error: 'enable_reg' may be used uninitialized in this function [-Werror=maybe-uninitialized]
drivers/regulator/s5m8767.c:944:30: error: 'enable_val' may be used uninitialized in this function [-Werror=maybe-uninitialized]

This changes the s5m8767_get_register() function to return a -EINVAL
not just for an invalid register number but also for an invalid
regulator number, as both would result in returning uninitialized
data. The s5m8767_pmic_probe() function is then changed accordingly
to fail on a read error, as all the other callers of s5m8767_get_register()
already do.

In practice this probably cannot happen, as we don't call
s5m8767_get_register() with invalid arguments, but the gcc
warning seems valid in principle, in terms writing safe
error checking.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 9c4c60554acf ("regulator: s5m8767: Convert to use regulator_[enable|disable|is_enabled]_regmap")
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/regulator/s5m8767.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/regulator/s5m8767.c b/drivers/regulator/s5m8767.c
index 58f5d3b8e981..27343e1c43ef 100644
--- a/drivers/regulator/s5m8767.c
+++ b/drivers/regulator/s5m8767.c
@@ -202,9 +202,10 @@ static int s5m8767_get_register(struct s5m8767_info *s5m8767, int reg_id,
 		}
 	}
 
-	if (i < s5m8767->num_regulators)
-		*enable_ctrl =
-		s5m8767_opmode_reg[reg_id][mode] << S5M8767_ENCTRL_SHIFT;
+	if (i >= s5m8767->num_regulators)
+		return -EINVAL;
+
+	*enable_ctrl = s5m8767_opmode_reg[reg_id][mode] << S5M8767_ENCTRL_SHIFT;
 
 	return 0;
 }
@@ -937,8 +938,12 @@ static int s5m8767_pmic_probe(struct platform_device *pdev)
 			else
 				regulators[id].vsel_mask = 0xff;
 
-			s5m8767_get_register(s5m8767, id, &enable_reg,
+			ret = s5m8767_get_register(s5m8767, id, &enable_reg,
 					     &enable_val);
+			if (ret) {
+				dev_err(s5m8767->dev, "error reading registers\n");
+				return ret;
+			}
 			regulators[id].enable_reg = enable_reg;
 			regulators[id].enable_mask = S5M8767_ENCTRL_MASK;
 			regulators[id].enable_val = enable_val;

From aea6995abbe4e13299d8606679cfe1b92fa45932 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 15 Mar 2016 14:53:29 -0700
Subject: [PATCH 567/797] paride: make 'verbose' parameter an 'int' again

commit dec63a4dec2d6d01346fd5d96062e67c0636852b upstream.

gcc-6.0 found an ancient bug in the paride driver, which had a
"module_param(verbose, bool, 0);" since before 2.6.12, but actually uses
it to accept '0', '1' or '2' as arguments:

  drivers/block/paride/pd.c: In function 'pd_init_dev_parms':
  drivers/block/paride/pd.c:298:29: warning: comparison of constant '1' with boolean expression is always false [-Wbool-compare]
   #define DBMSG(msg) ((verbose>1)?(msg):NULL)

In 2012, Rusty did a cleanup patch that also changed the type of the
variable to 'bool', which introduced what is now a gcc warning.

This changes the type back to 'int' and adapts the module_param() line
instead, so it should work as documented in case anyone ever cares about
running the ancient driver with debugging.

Fixes: 90ab5ee94171 ("module_param: make bool parameters really bool (drivers & misc)")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Rusty Russell <rusty@rustcorp.com.au>
Cc: Tim Waugh <tim@cyberelk.net>
Cc: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Cc: Jens Axboe <axboe@fb.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/block/paride/pd.c | 4 ++--
 drivers/block/paride/pt.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 562b5a4ca7b7..78a39f736c64 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -126,7 +126,7 @@
 */
 #include <linux/types.h>
 
-static bool verbose = 0;
+static int verbose = 0;
 static int major = PD_MAJOR;
 static char *name = PD_NAME;
 static int cluster = 64;
@@ -161,7 +161,7 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_GEO, D_SBY, D_DLY, D_SLV};
 static DEFINE_MUTEX(pd_mutex);
 static DEFINE_SPINLOCK(pd_lock);
 
-module_param(verbose, bool, 0);
+module_param(verbose, int, 0);
 module_param(major, int, 0);
 module_param(name, charp, 0);
 module_param(cluster, int, 0);
diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c
index 1740d75e8a32..216a94fed5b4 100644
--- a/drivers/block/paride/pt.c
+++ b/drivers/block/paride/pt.c
@@ -117,7 +117,7 @@
 
 */
 
-static bool verbose = 0;
+static int verbose = 0;
 static int major = PT_MAJOR;
 static char *name = PT_NAME;
 static int disable = 0;
@@ -152,7 +152,7 @@ static int (*drives[4])[6] = {&drive0, &drive1, &drive2, &drive3};
 
 #include <asm/uaccess.h>
 
-module_param(verbose, bool, 0);
+module_param(verbose, int, 0);
 module_param(major, int, 0);
 module_param(name, charp, 0);
 module_param_array(drive0, int, NULL, 0);

From 9d9fefc8283a2bda6c7daeaab3b310965cb35f85 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 27 Jan 2016 16:57:23 +0100
Subject: [PATCH 568/797] scsi_dh: force modular build if SCSI is a module

commit 0c994c03c926d26ce48e6bbabbbe60366044fcae upstream.

When the scsi_dh core was moved into the scsi core module,
CONFIG_SCSI_DH became a 'bool' option, and now anything depending on it
can be built-in even when CONFIG_SCSI=m. This of course cannot link
successfully:

drivers/scsi/built-in.o: In function `rdac_init':
scsi_dh_alua.c:(.init.text+0x14): undefined reference to `scsi_register_device_handler'
scsi_dh_alua.c:(.init.text+0x64): undefined reference to `scsi_unregister_device_handler'
drivers/scsi/built-in.o: In function `alua_init':
scsi_dh_alua.c:(.init.text+0xb0): undefined reference to `scsi_register_device_handler'

As a workaround, this adds an extra dependency on CONFIG_SCSI, so
Kconfig can figure out whether built-in is allowed or not.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 086b91d052eb ("scsi_dh: integrate into the core SCSI code")
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/device_handler/Kconfig | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/device_handler/Kconfig b/drivers/scsi/device_handler/Kconfig
index e5647d59224f..0b331c9c0a8f 100644
--- a/drivers/scsi/device_handler/Kconfig
+++ b/drivers/scsi/device_handler/Kconfig
@@ -13,13 +13,13 @@ menuconfig SCSI_DH
 
 config SCSI_DH_RDAC
 	tristate "LSI RDAC Device Handler"
-	depends on SCSI_DH
+	depends on SCSI_DH && SCSI
 	help
 	If you have a LSI RDAC select y. Otherwise, say N.
 
 config SCSI_DH_HP_SW
 	tristate "HP/COMPAQ MSA Device Handler"
-	depends on SCSI_DH
+	depends on SCSI_DH && SCSI
 	help
 	If you have a HP/COMPAQ MSA device that requires START_STOP to
 	be sent to start it and cannot upgrade the firmware then select y.
@@ -27,13 +27,13 @@ config SCSI_DH_HP_SW
 
 config SCSI_DH_EMC
 	tristate "EMC CLARiiON Device Handler"
-	depends on SCSI_DH
+	depends on SCSI_DH && SCSI
 	help
 	If you have a EMC CLARiiON select y. Otherwise, say N.
 
 config SCSI_DH_ALUA
 	tristate "SPC-3 ALUA Device Handler"
-	depends on SCSI_DH
+	depends on SCSI_DH && SCSI
 	help
 	  SCSI Device handler for generic SPC-3 Asymmetric Logical Unit
 	  Access (ALUA).

From 0658e8c5e8cc02f12f9ae3df1f3b87ee7283bb24 Mon Sep 17 00:00:00 2001
From: Sushaanth Srirangapathi <sushaanth.s@ti.com>
Date: Mon, 29 Feb 2016 18:42:19 +0530
Subject: [PATCH 569/797] fbdev: da8xx-fb: fix videomodes of lcd panels

commit 713fced8d10fa1c759c8fb6bf9aaa681bae68cad upstream.

Commit 028cd86b794f4a ("video: da8xx-fb: fix the polarities of the
hsync/vsync pulse") fixes polarities of HSYNC/VSYNC pulse but
forgot to update known_lcd_panels[] which had sync values
according to old logic. This breaks LCD at least on DA850 EVM.

This patch fixes this issue and I have tested this for panel
"Sharp_LK043T1DG01" using DA850 EVM board.

Fixes: 028cd86b794f4a ("video: da8xx-fb: fix the polarities of the hsync/vsync pulse")
Signed-off-by: Sushaanth Srirangapathi <sushaanth.s@ti.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/video/fbdev/da8xx-fb.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/video/fbdev/da8xx-fb.c b/drivers/video/fbdev/da8xx-fb.c
index 0081725c6b5b..d00510029c93 100644
--- a/drivers/video/fbdev/da8xx-fb.c
+++ b/drivers/video/fbdev/da8xx-fb.c
@@ -209,8 +209,7 @@ static struct fb_videomode known_lcd_panels[] = {
 		.lower_margin   = 2,
 		.hsync_len      = 0,
 		.vsync_len      = 0,
-		.sync           = FB_SYNC_CLK_INVERT |
-			FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+		.sync           = FB_SYNC_CLK_INVERT,
 	},
 	/* Sharp LK043T1DG01 */
 	[1] = {
@@ -224,7 +223,7 @@ static struct fb_videomode known_lcd_panels[] = {
 		.lower_margin   = 2,
 		.hsync_len      = 41,
 		.vsync_len      = 10,
-		.sync           = FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+		.sync           = 0,
 		.flag           = 0,
 	},
 	[2] = {
@@ -239,7 +238,7 @@ static struct fb_videomode known_lcd_panels[] = {
 		.lower_margin   = 10,
 		.hsync_len      = 10,
 		.vsync_len      = 10,
-		.sync           = FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+		.sync           = 0,
 		.flag           = 0,
 	},
 	[3] = {

From 81b3a56ed84b0f2c1e2ff75ee2e05d5d4cd2462b Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.com>
Date: Wed, 17 Feb 2016 14:46:59 +0100
Subject: [PATCH 570/797] lib/mpi: Endianness fix

commit 3ee0cb5fb5eea2110db1b5cb7f67029b7be8a376 upstream.

The limbs are integers in the host endianness, so we can't simply
iterate over the individual bytes. The current code happens to work on
little-endian, because the order of the limbs in the MPI array is the
same as the order of the bytes in each limb, but it breaks on
big-endian.

Fixes: 0f74fbf77d45 ("MPI: Fix mpi_read_buffer")
Signed-off-by: Michal Marek <mmarek@suse.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/mpi/mpicoder.c | 39 +++++++++++++++++++++------------------
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
index 3db76b8c1115..e00ff00e861c 100644
--- a/lib/mpi/mpicoder.c
+++ b/lib/mpi/mpicoder.c
@@ -128,6 +128,23 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread)
 }
 EXPORT_SYMBOL_GPL(mpi_read_from_buffer);
 
+static int count_lzeros(MPI a)
+{
+	mpi_limb_t alimb;
+	int i, lzeros = 0;
+
+	for (i = a->nlimbs - 1; i >= 0; i--) {
+		alimb = a->d[i];
+		if (alimb == 0) {
+			lzeros += sizeof(mpi_limb_t);
+		} else {
+			lzeros += count_leading_zeros(alimb) / 8;
+			break;
+		}
+	}
+	return lzeros;
+}
+
 /**
  * mpi_read_buffer() - read MPI to a bufer provided by user (msb first)
  *
@@ -146,7 +163,7 @@ int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes,
 	uint8_t *p;
 	mpi_limb_t alimb;
 	unsigned int n = mpi_get_size(a);
-	int i, lzeros = 0;
+	int i, lzeros;
 
 	if (buf_len < n || !buf || !nbytes)
 		return -EINVAL;
@@ -154,14 +171,7 @@ int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes,
 	if (sign)
 		*sign = a->sign;
 
-	p = (void *)&a->d[a->nlimbs] - 1;
-
-	for (i = a->nlimbs * sizeof(alimb) - 1; i >= 0; i--, p--) {
-		if (!*p)
-			lzeros++;
-		else
-			break;
-	}
+	lzeros = count_lzeros(a);
 
 	p = buf;
 	*nbytes = n - lzeros;
@@ -343,7 +353,7 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes,
 	u8 *p, *p2;
 	mpi_limb_t alimb, alimb2;
 	unsigned int n = mpi_get_size(a);
-	int i, x, y = 0, lzeros = 0, buf_len;
+	int i, x, y = 0, lzeros, buf_len;
 
 	if (!nbytes || *nbytes < n)
 		return -EINVAL;
@@ -351,14 +361,7 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes,
 	if (sign)
 		*sign = a->sign;
 
-	p = (void *)&a->d[a->nlimbs] - 1;
-
-	for (i = a->nlimbs * sizeof(alimb) - 1; i >= 0; i--, p--) {
-		if (!*p)
-			lzeros++;
-		else
-			break;
-	}
+	lzeros = count_lzeros(a);
 
 	*nbytes = n - lzeros;
 	buf_len = sgl->length;

From 01c8261c5ec46183e14dec7df335ee88bb037e30 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Mon, 14 Dec 2015 14:29:23 +0000
Subject: [PATCH 571/797] misc/bmp085: Enable building as a module

commit 50e6315dba721cbc24ccd6d7b299f1782f210a98 upstream.

Commit 985087dbcb02 'misc: add support for bmp18x chips to the bmp085
driver' changed the BMP085 config symbol to a boolean.  I see no
reason why the shared code cannot be built as a module, so change it
back to tristate.

Fixes: 985087dbcb02 ("misc: add support for bmp18x chips to the bmp085 driver")
Cc: Eric Andersson <eric.andersson@unixphere.com>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 22892c701c63..4bf7d50b1bc7 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -439,7 +439,7 @@ config ARM_CHARLCD
 	  still useful.
 
 config BMP085
-	bool
+	tristate
 	depends on SYSFS
 
 config BMP085_I2C

From 4f8e29e7547be52fa24b0cdc7cf69baac9d82328 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 19 Oct 2015 14:19:01 +0300
Subject: [PATCH 572/797] misc: mic/scif: fix wrap around tests

commit 7b64dbf849abdd7e769820e25120758f956a7f13 upstream.

Signed integer overflow is undefined.  Also I added a check for
"(offset < 0)" in scif_unregister() because that makes it match the
other conditions and because I didn't want to subtract a negative.

Fixes: ba612aa8b487 ('misc: mic: SCIF memory registration and unregistration')
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mic/scif/scif_rma.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c
index 8310b4dbff06..6a451bd65bf3 100644
--- a/drivers/misc/mic/scif/scif_rma.c
+++ b/drivers/misc/mic/scif/scif_rma.c
@@ -1511,7 +1511,7 @@ off_t scif_register_pinned_pages(scif_epd_t epd,
 	if ((map_flags & SCIF_MAP_FIXED) &&
 	    ((ALIGN(offset, PAGE_SIZE) != offset) ||
 	    (offset < 0) ||
-	    (offset + (off_t)len < offset)))
+	    (len > LONG_MAX - offset)))
 		return -EINVAL;
 
 	might_sleep();
@@ -1614,7 +1614,7 @@ off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
 	if ((map_flags & SCIF_MAP_FIXED) &&
 	    ((ALIGN(offset, PAGE_SIZE) != offset) ||
 	    (offset < 0) ||
-	    (offset + (off_t)len < offset)))
+	    (len > LONG_MAX - offset)))
 		return -EINVAL;
 
 	/* Unsupported protection requested */
@@ -1732,7 +1732,8 @@ scif_unregister(scif_epd_t epd, off_t offset, size_t len)
 
 	/* Offset is not page aligned or offset+len wraps around */
 	if ((ALIGN(offset, PAGE_SIZE) != offset) ||
-	    (offset + (off_t)len < offset))
+	    (offset < 0) ||
+	    (len > LONG_MAX - offset))
 		return -EINVAL;
 
 	err = scif_verify_epd(ep);

From dabe14168a929839b4757f496ad6886489078997 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 15 Feb 2016 10:21:53 +0530
Subject: [PATCH 573/797] PM / OPP: Initialize u_volt_min/max to a valid value

commit c88c395f4a6485f23f81e385c79945d68bcd5c5d upstream.

We kept u_volt_min/max initialized to 0, when only the target voltage is
present in DT, instead of the target/min/max triplet.

This didn't go well with the regulator framework, as on few calls the
min voltage was set to target and max was set to 0 and so resulted in a
kernel crash like below:

kernel BUG at ../drivers/regulator/core.c:216!

[<c0684af4>] (regulator_check_voltage) from [<c06857ac>] (regulator_set_voltage_unlocked+0x58/0x230)
[<c06857ac>] (regulator_set_voltage_unlocked) from [<c06859ac>] (regulator_set_voltage+0x28/0x54)
[<c06859ac>] (regulator_set_voltage) from [<c0775b28>] (_set_opp_voltage+0x30/0x98)
[<c0775b28>] (_set_opp_voltage) from [<c0776630>] (dev_pm_opp_set_rate+0xf0/0x28c)
[<c0776630>] (dev_pm_opp_set_rate) from [<c096f784>] (__cpufreq_driver_target+0x184/0x2b4)
[<c096f784>] (__cpufreq_driver_target) from [<c0973760>] (dbs_check_cpu+0x1b0/0x1f4)
[<c0973760>] (dbs_check_cpu) from [<c0973f30>] (cpufreq_governor_dbs+0x324/0x5c4)
[<c0973f30>] (cpufreq_governor_dbs) from [<c0970958>] (__cpufreq_governor+0xe4/0x1ec)
[<c0970958>] (__cpufreq_governor) from [<c09711e0>] (cpufreq_init_policy+0x64/0x8c)
[<c09711e0>] (cpufreq_init_policy) from [<c09718cc>] (cpufreq_online+0x2fc/0x708)
[<c09718cc>] (cpufreq_online) from [<c0765ff0>] (subsys_interface_register+0x94/0xd8)
[<c0765ff0>] (subsys_interface_register) from [<c0970530>] (cpufreq_register_driver+0x14c/0x19c)
[<c0970530>] (cpufreq_register_driver) from [<c09746dc>] (dt_cpufreq_probe+0x70/0xec)
[<c09746dc>] (dt_cpufreq_probe) from [<c076907c>] (platform_drv_probe+0x4c/0xb0)
[<c076907c>] (platform_drv_probe) from [<c07678e0>] (driver_probe_device+0x214/0x2c0)
[<c07678e0>] (driver_probe_device) from [<c0767a18>] (__driver_attach+0x8c/0x90)
[<c0767a18>] (__driver_attach) from [<c0765c2c>] (bus_for_each_dev+0x68/0x9c)
[<c0765c2c>] (bus_for_each_dev) from [<c0766d78>] (bus_add_driver+0x1a0/0x218)
[<c0766d78>] (bus_add_driver) from [<c076810c>] (driver_register+0x78/0xf8)
[<c076810c>] (driver_register) from [<c0301d74>] (do_one_initcall+0x90/0x1d8)
[<c0301d74>] (do_one_initcall) from [<c1100e14>] (kernel_init_freeable+0x15c/0x1fc)
[<c1100e14>] (kernel_init_freeable) from [<c0b27a0c>] (kernel_init+0x8/0xf0)
[<c0b27a0c>] (kernel_init) from [<c0307d78>] (ret_from_fork+0x14/0x3c)
Code: e1550004 baffffeb e3a00000 e8bd8070 (e7f001f2)

Fix that by initializing u_volt_min/max to the target voltage in such cases.

Reported-and-tested-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Fixes: 274659029c9d (PM / OPP: Add support to parse "operating-points-v2" bindings)
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/power/opp/core.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index b8e76f75073b..f8580900c273 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -809,8 +809,14 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev)
 	}
 
 	opp->u_volt = microvolt[0];
-	opp->u_volt_min = microvolt[1];
-	opp->u_volt_max = microvolt[2];
+
+	if (count == 1) {
+		opp->u_volt_min = opp->u_volt;
+		opp->u_volt_max = opp->u_volt;
+	} else {
+		opp->u_volt_min = microvolt[1];
+		opp->u_volt_max = microvolt[2];
+	}
 
 	if (!of_property_read_u32(opp->np, "opp-microamp", &val))
 		opp->u_amp = val;

From cab4c949ade11ac67f69bc05124c9d6ffef31917 Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Fri, 4 Mar 2016 10:55:14 +0000
Subject: [PATCH 574/797] PM / Domains: Fix removal of a subdomain

commit beda5fc1ff9b527059290a97b672d2ee0eb7b92f upstream.

Commit 30e7a65b3fdb (PM / Domains: Ensure subdomain is not in use
before removing) added a test to ensure that a subdomain is not a
master to another subdomain or if any devices are using the subdomain
before removing. This change incorrectly used the "slave_links" list to
determine if the subdomain is a master to another subdomain, where it
should have been using the "master_links" list instead. The
"slave_links" list will never be empty for a subdomain and so a
subdomain can never be removed. Fix this by testing if the
"master_links" list is empty instead.

Fixes: 30e7a65b3fdb (PM / Domains: Ensure subdomain is not in use before removing)
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-by: Thierry Reding <treding@nvidia.com>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/power/domain.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 65f50eccd49b..a48824deabc5 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1381,7 +1381,7 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 
 	mutex_lock(&genpd->lock);
 
-	if (!list_empty(&subdomain->slave_links) || subdomain->device_count) {
+	if (!list_empty(&subdomain->master_links) || subdomain->device_count) {
 		pr_warn("%s: unable to remove subdomain %s\n", genpd->name,
 			subdomain->name);
 		ret = -EBUSY;

From 1392ec2a303a53512ad16dcf1ab31e77b08c52d9 Mon Sep 17 00:00:00 2001
From: Alexander Kochetkov <al.kochet@gmail.com>
Date: Sun, 6 Mar 2016 12:43:57 +0300
Subject: [PATCH 575/797] rtc: hym8563: fix invalid year calculation

commit d5861262210067fc01b2fb4f7af2fd85a3453f15 upstream.

Year field must be in BCD format, according to
hym8563 datasheet.

Due to the bug year 2016 became 2010.

Fixes: dcaf03849352 ("rtc: add hym8563 rtc-driver")
Signed-off-by: Alexander Kochetkov <al.kochet@gmail.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/rtc/rtc-hym8563.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-hym8563.c b/drivers/rtc/rtc-hym8563.c
index 097325d96db5..b1b4746a0eab 100644
--- a/drivers/rtc/rtc-hym8563.c
+++ b/drivers/rtc/rtc-hym8563.c
@@ -144,7 +144,7 @@ static int hym8563_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	 * it does not seem to carry it over a subsequent write/read.
 	 * So we'll limit ourself to 100 years, starting at 2000 for now.
 	 */
-	buf[6] = tm->tm_year - 100;
+	buf[6] = bin2bcd(tm->tm_year - 100);
 
 	/*
 	 * CTL1 only contains TEST-mode bits apart from stop,

From 041f2ca3ff039ebe10a48a775b29bf3fa7c993fa Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Tue, 1 Mar 2016 09:50:01 +0100
Subject: [PATCH 576/797] rtc: vr41xx: Wire up alarm_irq_enable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit a25f4a95ec3cded34c1250364eba704c5e4fdac4 upstream.

drivers/rtc/rtc-vr41xx.c:229: warning: ‘vr41xx_rtc_alarm_irq_enable’ defined but not used

Apparently the conversion to alarm_irq_enable forgot to wire up the
callback.

Fixes: 16380c153a69c378 ("RTC: Convert rtc drivers to use the alarm_irq_enable method")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/rtc/rtc-vr41xx.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/rtc/rtc-vr41xx.c b/drivers/rtc/rtc-vr41xx.c
index f64c282275b3..e1b86bb01062 100644
--- a/drivers/rtc/rtc-vr41xx.c
+++ b/drivers/rtc/rtc-vr41xx.c
@@ -272,12 +272,13 @@ static irqreturn_t rtclong1_interrupt(int irq, void *dev_id)
 }
 
 static const struct rtc_class_ops vr41xx_rtc_ops = {
-	.release	= vr41xx_rtc_release,
-	.ioctl		= vr41xx_rtc_ioctl,
-	.read_time	= vr41xx_rtc_read_time,
-	.set_time	= vr41xx_rtc_set_time,
-	.read_alarm	= vr41xx_rtc_read_alarm,
-	.set_alarm	= vr41xx_rtc_set_alarm,
+	.release		= vr41xx_rtc_release,
+	.ioctl			= vr41xx_rtc_ioctl,
+	.read_time		= vr41xx_rtc_read_time,
+	.set_time		= vr41xx_rtc_set_time,
+	.read_alarm		= vr41xx_rtc_read_alarm,
+	.set_alarm		= vr41xx_rtc_set_alarm,
+	.alarm_irq_enable	= vr41xx_rtc_alarm_irq_enable,
 };
 
 static int rtc_probe(struct platform_device *pdev)

From 83fe55baa881f1d7fed118b4f4ec3bf325d7285a Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 2 Mar 2016 13:07:45 +0300
Subject: [PATCH 577/797] rtc: ds1685: passing bogus values to irq_restore

commit 8c09b9fdecab1f4a289f07b46e2ad174b6641928 upstream.

We call spin_lock_irqrestore with "flags" set to zero instead of to the
value from spin_lock_irqsave().

Fixes: aaaf5fbf56f1 ('rtc: add driver for DS1685 family of real time clocks')
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/rtc/rtc-ds1685.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/rtc/rtc-ds1685.c b/drivers/rtc/rtc-ds1685.c
index 05a51ef52703..d5c1b057a739 100644
--- a/drivers/rtc/rtc-ds1685.c
+++ b/drivers/rtc/rtc-ds1685.c
@@ -187,9 +187,9 @@ ds1685_rtc_end_data_access(struct ds1685_priv *rtc)
  * Only use this where you are certain another lock will not be held.
  */
 static inline void
-ds1685_rtc_begin_ctrl_access(struct ds1685_priv *rtc, unsigned long flags)
+ds1685_rtc_begin_ctrl_access(struct ds1685_priv *rtc, unsigned long *flags)
 {
-	spin_lock_irqsave(&rtc->lock, flags);
+	spin_lock_irqsave(&rtc->lock, *flags);
 	ds1685_rtc_switch_to_bank1(rtc);
 }
 
@@ -1304,7 +1304,7 @@ ds1685_rtc_sysfs_ctrl_regs_store(struct device *dev,
 {
 	struct ds1685_priv *rtc = dev_get_drvdata(dev);
 	u8 reg = 0, bit = 0, tmp;
-	unsigned long flags = 0;
+	unsigned long flags;
 	long int val = 0;
 	const struct ds1685_rtc_ctrl_regs *reg_info =
 		ds1685_rtc_sysfs_ctrl_regs_lookup(attr->attr.name);
@@ -1325,7 +1325,7 @@ ds1685_rtc_sysfs_ctrl_regs_store(struct device *dev,
 	bit = reg_info->bit;
 
 	/* Safe to spinlock during a write. */
-	ds1685_rtc_begin_ctrl_access(rtc, flags);
+	ds1685_rtc_begin_ctrl_access(rtc, &flags);
 	tmp = rtc->read(rtc, reg);
 	rtc->write(rtc, reg, (val ? (tmp | bit) : (tmp & ~(bit))));
 	ds1685_rtc_end_ctrl_access(rtc, flags);

From 11dd7f9a1ed13794cc77bd144b3aa9dd17c4030f Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Thu, 21 Jan 2016 13:24:21 +0100
Subject: [PATCH 578/797] rtc: rx8025: remove rv8803 id

commit aaa3cee5deffa28415a6e1852c5afae0f5d210e2 upstream.

The rv8803 has its own driver that should be used. Remove its id from
the rx8025 driver.

Fixes: b1f9d790b59dc04f8813a49a92ddd8651770ffee
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/rtc/rtc-rx8025.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c
index bd911bafb809..17341feadad1 100644
--- a/drivers/rtc/rtc-rx8025.c
+++ b/drivers/rtc/rtc-rx8025.c
@@ -65,7 +65,6 @@
 
 static const struct i2c_device_id rx8025_id[] = {
 	{ "rx8025", 0 },
-	{ "rv8803", 1 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, rx8025_id);

From f55131145b8d16d942ddb363c3e1b72cf4775384 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Thu, 4 Feb 2016 09:26:35 +0900
Subject: [PATCH 579/797] rtc: max77686: Properly handle regmap_irq_get_virq()
 error code

commit fb166ba1d7f0a662f7332f4ff660a0d6f4d76915 upstream.

The regmap_irq_get_virq() can return 0 or -EINVAL in error conditions
but driver checked only for value of 0.

This could lead to a cast of -EINVAL to an unsigned int used as a
interrupt number for devm_request_threaded_irq(). Although this is not
yet fatal (devm_request_threaded_irq() will just fail with -EINVAL) but
might be a misleading when diagnosing errors.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Fixes: 6f1c1e71d933 ("mfd: max77686: Convert to use regmap_irq")
Reviewed-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/rtc/rtc-max77686.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-max77686.c b/drivers/rtc/rtc-max77686.c
index 7184a0eda793..725dccae24e7 100644
--- a/drivers/rtc/rtc-max77686.c
+++ b/drivers/rtc/rtc-max77686.c
@@ -465,7 +465,7 @@ static int max77686_rtc_probe(struct platform_device *pdev)
 
 	info->virq = regmap_irq_get_virq(max77686->rtc_irq_data,
 					 MAX77686_RTCIRQ_RTCA1);
-	if (!info->virq) {
+	if (info->virq <= 0) {
 		ret = -ENXIO;
 		goto err_rtc;
 	}

From ab2c82dcd6cdb4d4871b151123f1523f2285a27e Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Mon, 22 Feb 2016 10:20:24 +0100
Subject: [PATCH 580/797] drivers/misc/ad525x_dpot: AD5274 fix RDAC read back
 errors

commit f3df53e4d70b5736368a8fe8aa1bb70c1cb1f577 upstream.

Fix RDAC read back errors caused by a typo. Value must shift by 2.

Fixes: a4bd394956f2 ("drivers/misc/ad525x_dpot.c: new features")
Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/ad525x_dpot.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/ad525x_dpot.c b/drivers/misc/ad525x_dpot.c
index 15e88078ba1e..f1a0b99f5a9a 100644
--- a/drivers/misc/ad525x_dpot.c
+++ b/drivers/misc/ad525x_dpot.c
@@ -216,7 +216,7 @@ static s32 dpot_read_i2c(struct dpot_data *dpot, u8 reg)
 			 */
 			value = swab16(value);
 
-			if (dpot->uid == DPOT_UID(AD5271_ID))
+			if (dpot->uid == DPOT_UID(AD5274_ID))
 				value = value >> 2;
 		return value;
 	default:

From 36828721fbbe4b53a53d62847b476f314123c819 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 17 Feb 2016 10:57:19 -0300
Subject: [PATCH 581/797] perf evlist: Reference count the cpu and thread maps
 at set_maps()

commit a55e5663761366fb883f6f25375dd68bc958b9db upstream.

We were dropping the reference we possibly held but not obtaining one
for the new maps, which we will drop at perf_evlist__delete(), fix it.

This was caught by Steven Noonan in some of the machines which would
produce this output when caught by glibc debug mechanisms:

  $ sudo perf test 21
  21: Test object code reading                                 :***
  Error in `perf': corrupted double-linked list: 0x00000000023ffcd0 ***
  ======= Backtrace: =========
  /usr/lib/libc.so.6(+0x72055)[0x7f25be0f3055]
  /usr/lib/libc.so.6(+0x779b6)[0x7f25be0f89b6]
  /usr/lib/libc.so.6(+0x7a0ed)[0x7f25be0fb0ed]
  /usr/lib/libc.so.6(__libc_calloc+0xba)[0x7f25be0fceda]
  perf(parse_events_lex_init_extra+0x38)[0x4cfff8]
  perf(parse_events+0x55)[0x4a0615]
  perf(perf_evlist__config+0xcf)[0x4eeb2f]
  perf[0x479f82]
  perf(test__code_reading+0x1e)[0x47ad4e]
  perf(cmd_test+0x5dd)[0x46452d]
  perf[0x47f4e3]
  perf(main+0x603)[0x42c723]
  /usr/lib/libc.so.6(__libc_start_main+0xf0)[0x7f25be0a1610]
  perf(_start+0x29)[0x42c859]

Further investigation using valgrind led to the reference count imbalance fixed
in this patch.

Reported-and-Tested-by: Steven Noonan <steven@uplinklabs.net>
Report-Link: http://lkml.kernel.org/r/CAKbGBLjC2Dx5vshxyGmQkcD+VwiAQLbHoXA9i7kvRB2-2opHZQ@mail.gmail.com
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Fixes: f30a79b012e5 ("perf tools: Add reference counting for cpu_map object")
Link: http://lkml.kernel.org/n/tip-j0u1bdhr47sa511sgg76kb8h@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/util/evlist.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index d1392194a9a9..b4b96120fc3b 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1211,12 +1211,12 @@ void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus,
 	 */
 	if (cpus != evlist->cpus) {
 		cpu_map__put(evlist->cpus);
-		evlist->cpus = cpus;
+		evlist->cpus = cpu_map__get(cpus);
 	}
 
 	if (threads != evlist->threads) {
 		thread_map__put(evlist->threads);
-		evlist->threads = threads;
+		evlist->threads = thread_map__get(threads);
 	}
 
 	perf_evlist__propagate_maps(evlist);

From 8481fdf6dc13e3a2b3f7e75e414b5eab3771329d Mon Sep 17 00:00:00 2001
From: Karol Herbst <nouveau@karolherbst.de>
Date: Thu, 3 Mar 2016 02:03:11 +0100
Subject: [PATCH 582/797] x86/mm/kmmio: Fix mmiotrace for hugepages

commit cfa52c0cfa4d727aa3e457bf29aeff296c528a08 upstream.

Because Linux might use bigger pages than the 4K pages to handle those mmio
ioremaps, the kmmio code shouldn't rely on the pade id as it currently does.

Using the memory address instead of the page id lets us look up how big the
page is and what its base address is, so that we won't get a page fault
within the same page twice anymore.

Tested-by: Pierre Moreau <pierre.morrow@free.fr>
Signed-off-by: Karol Herbst <nouveau@karolherbst.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: linux-mm@kvack.org
Cc: linux-x86_64@vger.kernel.org
Cc: nouveau@lists.freedesktop.org
Cc: pq@iki.fi
Cc: rostedt@goodmis.org
Link: http://lkml.kernel.org/r/1456966991-6861-1-git-send-email-nouveau@karolherbst.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/mm/kmmio.c | 88 ++++++++++++++++++++++++++++++---------------
 1 file changed, 59 insertions(+), 29 deletions(-)

diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 637ab34ed632..ddb2244b06a1 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -33,7 +33,7 @@
 struct kmmio_fault_page {
 	struct list_head list;
 	struct kmmio_fault_page *release_next;
-	unsigned long page; /* location of the fault page */
+	unsigned long addr; /* the requested address */
 	pteval_t old_presence; /* page presence prior to arming */
 	bool armed;
 
@@ -70,9 +70,16 @@ unsigned int kmmio_count;
 static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
 static LIST_HEAD(kmmio_probes);
 
-static struct list_head *kmmio_page_list(unsigned long page)
+static struct list_head *kmmio_page_list(unsigned long addr)
 {
-	return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)];
+	unsigned int l;
+	pte_t *pte = lookup_address(addr, &l);
+
+	if (!pte)
+		return NULL;
+	addr &= page_level_mask(l);
+
+	return &kmmio_page_table[hash_long(addr, KMMIO_PAGE_HASH_BITS)];
 }
 
 /* Accessed per-cpu */
@@ -98,15 +105,19 @@ static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
 }
 
 /* You must be holding RCU read lock. */
-static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
+static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long addr)
 {
 	struct list_head *head;
 	struct kmmio_fault_page *f;
+	unsigned int l;
+	pte_t *pte = lookup_address(addr, &l);
 
-	page &= PAGE_MASK;
-	head = kmmio_page_list(page);
+	if (!pte)
+		return NULL;
+	addr &= page_level_mask(l);
+	head = kmmio_page_list(addr);
 	list_for_each_entry_rcu(f, head, list) {
-		if (f->page == page)
+		if (f->addr == addr)
 			return f;
 	}
 	return NULL;
@@ -137,10 +148,10 @@ static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old)
 static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
 {
 	unsigned int level;
-	pte_t *pte = lookup_address(f->page, &level);
+	pte_t *pte = lookup_address(f->addr, &level);
 
 	if (!pte) {
-		pr_err("no pte for page 0x%08lx\n", f->page);
+		pr_err("no pte for addr 0x%08lx\n", f->addr);
 		return -1;
 	}
 
@@ -156,7 +167,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
 		return -1;
 	}
 
-	__flush_tlb_one(f->page);
+	__flush_tlb_one(f->addr);
 	return 0;
 }
 
@@ -176,12 +187,12 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
 	int ret;
 	WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n"));
 	if (f->armed) {
-		pr_warning("double-arm: page 0x%08lx, ref %d, old %d\n",
-			   f->page, f->count, !!f->old_presence);
+		pr_warning("double-arm: addr 0x%08lx, ref %d, old %d\n",
+			   f->addr, f->count, !!f->old_presence);
 	}
 	ret = clear_page_presence(f, true);
-	WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming 0x%08lx failed.\n"),
-		  f->page);
+	WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming at 0x%08lx failed.\n"),
+		  f->addr);
 	f->armed = true;
 	return ret;
 }
@@ -191,7 +202,7 @@ static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
 {
 	int ret = clear_page_presence(f, false);
 	WARN_ONCE(ret < 0,
-			KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page);
+			KERN_ERR "kmmio disarming at 0x%08lx failed.\n", f->addr);
 	f->armed = false;
 }
 
@@ -215,6 +226,12 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
 	struct kmmio_context *ctx;
 	struct kmmio_fault_page *faultpage;
 	int ret = 0; /* default to fault not handled */
+	unsigned long page_base = addr;
+	unsigned int l;
+	pte_t *pte = lookup_address(addr, &l);
+	if (!pte)
+		return -EINVAL;
+	page_base &= page_level_mask(l);
 
 	/*
 	 * Preemption is now disabled to prevent process switch during
@@ -227,7 +244,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
 	preempt_disable();
 	rcu_read_lock();
 
-	faultpage = get_kmmio_fault_page(addr);
+	faultpage = get_kmmio_fault_page(page_base);
 	if (!faultpage) {
 		/*
 		 * Either this page fault is not caused by kmmio, or
@@ -239,7 +256,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
 
 	ctx = &get_cpu_var(kmmio_ctx);
 	if (ctx->active) {
-		if (addr == ctx->addr) {
+		if (page_base == ctx->addr) {
 			/*
 			 * A second fault on the same page means some other
 			 * condition needs handling by do_page_fault(), the
@@ -267,9 +284,9 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
 	ctx->active++;
 
 	ctx->fpage = faultpage;
-	ctx->probe = get_kmmio_probe(addr);
+	ctx->probe = get_kmmio_probe(page_base);
 	ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
-	ctx->addr = addr;
+	ctx->addr = page_base;
 
 	if (ctx->probe && ctx->probe->pre_handler)
 		ctx->probe->pre_handler(ctx->probe, regs, addr);
@@ -354,12 +371,11 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
 }
 
 /* You must be holding kmmio_lock. */
-static int add_kmmio_fault_page(unsigned long page)
+static int add_kmmio_fault_page(unsigned long addr)
 {
 	struct kmmio_fault_page *f;
 
-	page &= PAGE_MASK;
-	f = get_kmmio_fault_page(page);
+	f = get_kmmio_fault_page(addr);
 	if (f) {
 		if (!f->count)
 			arm_kmmio_fault_page(f);
@@ -372,26 +388,25 @@ static int add_kmmio_fault_page(unsigned long page)
 		return -1;
 
 	f->count = 1;
-	f->page = page;
+	f->addr = addr;
 
 	if (arm_kmmio_fault_page(f)) {
 		kfree(f);
 		return -1;
 	}
 
-	list_add_rcu(&f->list, kmmio_page_list(f->page));
+	list_add_rcu(&f->list, kmmio_page_list(f->addr));
 
 	return 0;
 }
 
 /* You must be holding kmmio_lock. */
-static void release_kmmio_fault_page(unsigned long page,
+static void release_kmmio_fault_page(unsigned long addr,
 				struct kmmio_fault_page **release_list)
 {
 	struct kmmio_fault_page *f;
 
-	page &= PAGE_MASK;
-	f = get_kmmio_fault_page(page);
+	f = get_kmmio_fault_page(addr);
 	if (!f)
 		return;
 
@@ -420,18 +435,27 @@ int register_kmmio_probe(struct kmmio_probe *p)
 	int ret = 0;
 	unsigned long size = 0;
 	const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
+	unsigned int l;
+	pte_t *pte;
 
 	spin_lock_irqsave(&kmmio_lock, flags);
 	if (get_kmmio_probe(p->addr)) {
 		ret = -EEXIST;
 		goto out;
 	}
+
+	pte = lookup_address(p->addr, &l);
+	if (!pte) {
+		ret = -EINVAL;
+		goto out;
+	}
+
 	kmmio_count++;
 	list_add_rcu(&p->list, &kmmio_probes);
 	while (size < size_lim) {
 		if (add_kmmio_fault_page(p->addr + size))
 			pr_err("Unable to set page fault.\n");
-		size += PAGE_SIZE;
+		size += page_level_size(l);
 	}
 out:
 	spin_unlock_irqrestore(&kmmio_lock, flags);
@@ -506,11 +530,17 @@ void unregister_kmmio_probe(struct kmmio_probe *p)
 	const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
 	struct kmmio_fault_page *release_list = NULL;
 	struct kmmio_delayed_release *drelease;
+	unsigned int l;
+	pte_t *pte;
+
+	pte = lookup_address(p->addr, &l);
+	if (!pte)
+		return;
 
 	spin_lock_irqsave(&kmmio_lock, flags);
 	while (size < size_lim) {
 		release_kmmio_fault_page(p->addr + size, &release_list);
-		size += PAGE_SIZE;
+		size += page_level_size(l);
 	}
 	list_del_rcu(&p->list);
 	kmmio_count--;

From c745297ba18668f8a760493d7d769563c818616e Mon Sep 17 00:00:00 2001
From: Eryu Guan <guaneryu@gmail.com>
Date: Sat, 12 Mar 2016 21:40:32 -0500
Subject: [PATCH 583/797] ext4: fix NULL pointer dereference in
 ext4_mark_inode_dirty()

commit 5e1021f2b6dff1a86a468a1424d59faae2bc63c1 upstream.

ext4_reserve_inode_write() in ext4_mark_inode_dirty() could fail on
error (e.g. EIO) and iloc.bh can be NULL in this case. But the error is
ignored in the following "if" condition and ext4_expand_extra_isize()
might be called with NULL iloc.bh set, which triggers NULL pointer
dereference.

This is uncovered by commit 8b4953e13f4c ("ext4: reserve code points for
the project quota feature"), which enlarges the ext4_inode size, and
run the following script on new kernel but with old mke2fs:

  #/bin/bash
  mnt=/mnt/ext4
  devname=ext4-error
  dev=/dev/mapper/$devname
  fsimg=/home/fs.img

  trap cleanup 0 1 2 3 9 15

  cleanup()
  {
          umount $mnt >/dev/null 2>&1
          dmsetup remove $devname
          losetup -d $backend_dev
          rm -f $fsimg
          exit 0
  }

  rm -f $fsimg
  fallocate -l 1g $fsimg
  backend_dev=`losetup -f --show $fsimg`
  devsize=`blockdev --getsz $backend_dev`

  good_tab="0 $devsize linear $backend_dev 0"
  error_tab="0 $devsize error $backend_dev 0"

  dmsetup create $devname --table "$good_tab"

  mkfs -t ext4 $dev
  mount -t ext4 -o errors=continue,strictatime $dev $mnt

  dmsetup load $devname --table "$error_tab" && dmsetup resume $devname
  echo 3 > /proc/sys/vm/drop_caches
  ls -l $mnt
  exit 0

[ Patch changed to simplify the function a tiny bit. -- Ted ]

Signed-off-by: Eryu Guan <guaneryu@gmail.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/inode.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 06bda0361e7c..547600556bb9 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5109,6 +5109,8 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
 	might_sleep();
 	trace_ext4_mark_inode_dirty(inode, _RET_IP_);
 	err = ext4_reserve_inode_write(handle, inode, &iloc);
+	if (err)
+		return err;
 	if (ext4_handle_valid(handle) &&
 	    EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
 	    !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) {
@@ -5139,9 +5141,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
 			}
 		}
 	}
-	if (!err)
-		err = ext4_mark_iloc_dirty(handle, inode, &iloc);
-	return err;
+	return ext4_mark_iloc_dirty(handle, inode, &iloc);
 }
 
 /*

From 447ea0a34b78213dd668bf7d0a2b7add1c5675e6 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 5 Jan 2016 19:36:37 +0100
Subject: [PATCH 584/797] serial: sh-sci: Remove cpufreq notifier to fix
 crash/deadlock

commit ff1cab374ad98f4b9f408525ca9c08992b4ed784 upstream.

The BSP team noticed that there is spin/mutex lock issue on sh-sci when
CPUFREQ is used.  The issue is that the notifier function may call
mutex_lock() while the spinlock is held, which can lead to a BUG().
This may happen if CPUFREQ is changed while another CPU calls
clk_get_rate().

Taking the spinlock was added to the notifier function in commit
e552de2413edad1a ("sh-sci: add platform device private data"), to
protect the list of serial ports against modification during traversal.
At that time the Common Clock Framework didn't exist yet, and
clk_get_rate() just returned clk->rate without taking a mutex.
Note that since commit d535a2305facf9b4 ("serial: sh-sci: Require a
device per port mapping."), there's no longer a list of serial ports to
traverse, and taking the spinlock became superfluous.

To fix the issue, just remove the cpufreq notifier:
  1. The notifier doesn't work correctly: all it does is update stored
     clock rates; it does not update the divider in the hardware.
     The divider will only be updated when calling sci_set_termios().
     I believe this was broken back in 2004, when the old
     drivers/char/sh-sci.c driver (where the notifier did update the
     divider) was replaced by drivers/serial/sh-sci.c (where the
     notifier just updated port->uartclk).
     Cfr. full-history-linux commits 6f8deaef2e9675d9 ("[PATCH] sh: port
     sh-sci driver to the new API") and 3f73fe878dc9210a ("[PATCH]
     Remove old sh-sci driver").
  2. On modern SoCs, the sh-sci parent clock rate is no longer related
     to the CPU clock rate anyway, so using a cpufreq notifier is
     futile.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sh-sci.c | 39 -------------------------------------
 1 file changed, 39 deletions(-)

diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 51c7507b0444..63a06ab6ba03 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -38,7 +38,6 @@
 #include <linux/major.h>
 #include <linux/module.h>
 #include <linux/mm.h>
-#include <linux/notifier.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
@@ -116,8 +115,6 @@ struct sci_port {
 	struct timer_list		rx_timer;
 	unsigned int			rx_timeout;
 #endif
-
-	struct notifier_block		freq_transition;
 };
 
 #define SCI_NPORTS CONFIG_SERIAL_SH_SCI_NR_UARTS
@@ -1606,29 +1603,6 @@ static irqreturn_t sci_mpxed_interrupt(int irq, void *ptr)
 	return ret;
 }
 
-/*
- * Here we define a transition notifier so that we can update all of our
- * ports' baud rate when the peripheral clock changes.
- */
-static int sci_notifier(struct notifier_block *self,
-			unsigned long phase, void *p)
-{
-	struct sci_port *sci_port;
-	unsigned long flags;
-
-	sci_port = container_of(self, struct sci_port, freq_transition);
-
-	if (phase == CPUFREQ_POSTCHANGE) {
-		struct uart_port *port = &sci_port->port;
-
-		spin_lock_irqsave(&port->lock, flags);
-		port->uartclk = clk_get_rate(sci_port->iclk);
-		spin_unlock_irqrestore(&port->lock, flags);
-	}
-
-	return NOTIFY_OK;
-}
-
 static const struct sci_irq_desc {
 	const char	*desc;
 	irq_handler_t	handler;
@@ -2559,9 +2533,6 @@ static int sci_remove(struct platform_device *dev)
 {
 	struct sci_port *port = platform_get_drvdata(dev);
 
-	cpufreq_unregister_notifier(&port->freq_transition,
-				    CPUFREQ_TRANSITION_NOTIFIER);
-
 	uart_remove_one_port(&sci_uart_driver, &port->port);
 
 	sci_cleanup_single(port);
@@ -2714,16 +2685,6 @@ static int sci_probe(struct platform_device *dev)
 	if (ret)
 		return ret;
 
-	sp->freq_transition.notifier_call = sci_notifier;
-
-	ret = cpufreq_register_notifier(&sp->freq_transition,
-					CPUFREQ_TRANSITION_NOTIFIER);
-	if (unlikely(ret < 0)) {
-		uart_remove_one_port(&sci_uart_driver, &sp->port);
-		sci_cleanup_single(sp);
-		return ret;
-	}
-
 #ifdef CONFIG_SH_STANDARD_BIOS
 	sh_bios_gdb_detach();
 #endif

From 87261de30fd8e5ebd441cd2f05df73ddf04c2af2 Mon Sep 17 00:00:00 2001
From: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Date: Wed, 3 Feb 2016 14:26:46 +0100
Subject: [PATCH 585/797] mtd: spi-nor: remove micron_quad_enable()

commit 3b5394a3ccffbfa1d1d448d48742853a862822c4 upstream.

This patch remove the micron_quad_enable() function which force the Quad
SPI mode. However, once this mode is enabled, the Micron memory expect ALL
commands to use the SPI 4-4-4 protocol. Hence a failure does occur when
calling spi_nor_wait_till_ready() right after the update of the Enhanced
Volatile Configuration Register (EVCR) in the micron_quad_enable() as
the SPI controller driver is not aware about the protocol change.

Since there is almost no performance increase using Fast Read 4-4-4
commands instead of Fast Read 1-1-4 commands, we rather keep on using the
Extended SPI mode than enabling the Quad SPI mode.

Let's take the example of the pretty standard use of 8 dummy cycles during
Fast Read operations on 64KB erase sectors:

Fast Read 1-1-4 requires 8 cycles for the command, then 24 cycles for the
3byte address followed by 8 dummy clock cycles and finally 65536*2 cycles
for the read data; so 131112 clock cycles.

On the other hand the Fast Read 4-4-4 would require 2 cycles for the
command, then 6 cycles for the 3byte address followed by 8 dummy clock
cycles and finally 65536*2 cycles for the read data. So 131088 clock
cycles. The theorical bandwidth increase is 0.0%.

Now using Fast Read operations on 512byte pages:
Fast Read 1-1-4 needs 8+24+8+(512*2) = 1064 clock cycles whereas Fast
Read 4-4-4 would requires 2+6+8+(512*2) = 1040 clock cycles. Hence the
theorical bandwidth increase is 2.3%.
Consecutive reads for non sequential pages is not a relevant use case so
The Quad SPI mode is not worth it.

mtd_speedtest seems to confirm these figures.

Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Fixes: 548cd3ab54da ("mtd: spi-nor: Add quad I/O support for Micron SPI NOR")
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/spi-nor/spi-nor.c | 46 +----------------------------------
 1 file changed, 1 insertion(+), 45 deletions(-)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 32477c4eb421..37e4135ab213 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -1067,45 +1067,6 @@ static int spansion_quad_enable(struct spi_nor *nor)
 	return 0;
 }
 
-static int micron_quad_enable(struct spi_nor *nor)
-{
-	int ret;
-	u8 val;
-
-	ret = nor->read_reg(nor, SPINOR_OP_RD_EVCR, &val, 1);
-	if (ret < 0) {
-		dev_err(nor->dev, "error %d reading EVCR\n", ret);
-		return ret;
-	}
-
-	write_enable(nor);
-
-	/* set EVCR, enable quad I/O */
-	nor->cmd_buf[0] = val & ~EVCR_QUAD_EN_MICRON;
-	ret = nor->write_reg(nor, SPINOR_OP_WD_EVCR, nor->cmd_buf, 1);
-	if (ret < 0) {
-		dev_err(nor->dev, "error while writing EVCR register\n");
-		return ret;
-	}
-
-	ret = spi_nor_wait_till_ready(nor);
-	if (ret)
-		return ret;
-
-	/* read EVCR and check it */
-	ret = nor->read_reg(nor, SPINOR_OP_RD_EVCR, &val, 1);
-	if (ret < 0) {
-		dev_err(nor->dev, "error %d reading EVCR\n", ret);
-		return ret;
-	}
-	if (val & EVCR_QUAD_EN_MICRON) {
-		dev_err(nor->dev, "Micron EVCR Quad bit not clear\n");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
 static int set_quad_mode(struct spi_nor *nor, const struct flash_info *info)
 {
 	int status;
@@ -1119,12 +1080,7 @@ static int set_quad_mode(struct spi_nor *nor, const struct flash_info *info)
 		}
 		return status;
 	case SNOR_MFR_MICRON:
-		status = micron_quad_enable(nor);
-		if (status) {
-			dev_err(nor->dev, "Micron quad-read not enabled\n");
-			return -EINVAL;
-		}
-		return status;
+		return 0;
 	default:
 		status = spansion_quad_enable(nor);
 		if (status) {

From 67891850e58b0190005441cf4f54da957fed8e01 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 24 Feb 2016 16:07:23 -0800
Subject: [PATCH 586/797] mtd: brcmnand: Fix v7.1 register offsets

commit d267aefc54a28efc5bda7f009598dc83b5f98734 upstream.

The BRCMNAND controller revision 7.1 is almost 100% compatible with the
previous v6.0 register offset layout, except for the Correctable Error
Reporting Threshold registers. Fix this by adding another table with the
correct offsets for CORR_THRESHOLD and CORR_THRESHOLD_EXT.

Fixes: 27c5b17cd1b1 ("mtd: nand: add NAND driver "library" for Broadcom STB NAND controller")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/nand/brcmnand/brcmnand.c | 34 +++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/brcmnand/brcmnand.c b/drivers/mtd/nand/brcmnand/brcmnand.c
index 12c6190c6e33..4a07ba1195b5 100644
--- a/drivers/mtd/nand/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/brcmnand/brcmnand.c
@@ -309,6 +309,36 @@ static const u16 brcmnand_regs_v60[] = {
 	[BRCMNAND_FC_BASE]		= 0x400,
 };
 
+/* BRCMNAND v7.1 */
+static const u16 brcmnand_regs_v71[] = {
+	[BRCMNAND_CMD_START]		=  0x04,
+	[BRCMNAND_CMD_EXT_ADDRESS]	=  0x08,
+	[BRCMNAND_CMD_ADDRESS]		=  0x0c,
+	[BRCMNAND_INTFC_STATUS]		=  0x14,
+	[BRCMNAND_CS_SELECT]		=  0x18,
+	[BRCMNAND_CS_XOR]		=  0x1c,
+	[BRCMNAND_LL_OP]		=  0x20,
+	[BRCMNAND_CS0_BASE]		=  0x50,
+	[BRCMNAND_CS1_BASE]		=     0,
+	[BRCMNAND_CORR_THRESHOLD]	=  0xdc,
+	[BRCMNAND_CORR_THRESHOLD_EXT]	=  0xe0,
+	[BRCMNAND_UNCORR_COUNT]		=  0xfc,
+	[BRCMNAND_CORR_COUNT]		= 0x100,
+	[BRCMNAND_CORR_EXT_ADDR]	= 0x10c,
+	[BRCMNAND_CORR_ADDR]		= 0x110,
+	[BRCMNAND_UNCORR_EXT_ADDR]	= 0x114,
+	[BRCMNAND_UNCORR_ADDR]		= 0x118,
+	[BRCMNAND_SEMAPHORE]		= 0x150,
+	[BRCMNAND_ID]			= 0x194,
+	[BRCMNAND_ID_EXT]		= 0x198,
+	[BRCMNAND_LL_RDATA]		= 0x19c,
+	[BRCMNAND_OOB_READ_BASE]	= 0x200,
+	[BRCMNAND_OOB_READ_10_BASE]	=     0,
+	[BRCMNAND_OOB_WRITE_BASE]	= 0x280,
+	[BRCMNAND_OOB_WRITE_10_BASE]	=     0,
+	[BRCMNAND_FC_BASE]		= 0x400,
+};
+
 enum brcmnand_cs_reg {
 	BRCMNAND_CS_CFG_EXT = 0,
 	BRCMNAND_CS_CFG,
@@ -404,7 +434,9 @@ static int brcmnand_revision_init(struct brcmnand_controller *ctrl)
 	}
 
 	/* Register offsets */
-	if (ctrl->nand_version >= 0x0600)
+	if (ctrl->nand_version >= 0x0701)
+		ctrl->reg_offsets = brcmnand_regs_v71;
+	else if (ctrl->nand_version >= 0x0600)
 		ctrl->reg_offsets = brcmnand_regs_v60;
 	else if (ctrl->nand_version >= 0x0500)
 		ctrl->reg_offsets = brcmnand_regs_v50;

From 35bfb7949b7f23cdbfda12d83a8038f640b49141 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ezequiel=20Garc=C3=ADa?= <ezequiel@vanguardiasur.com.ar>
Date: Fri, 1 Apr 2016 18:29:23 -0300
Subject: [PATCH 587/797] mtd: nand: Drop mtd.owner requirement in nand_scan

commit 20c07a5bf094198ff2382aa5e7c930b3c9807792 upstream.

Since commit 807f16d4db95 ("mtd: core: set some defaults
when dev.parent is set"), it's now legal for drivers
to call nand_scan and nand_scan_ident without setting
mtd.owner.

Drop the check and while at it remove the BUG() abuse.

Fixes: 807f16d4db95 ("mtd: core: set some defaults when dev.parent is set")
Signed-off-by: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
[Brian: editorial note - while commit 807f16d4db95 wasn't explicitly
    broken, some follow-up commits in the v4.4 release broke a few
    drivers, since they would hit this BUG() if they used nand_scan()
    and were built as modules]
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/nand/nand_base.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 3ff583f165cd..ce7b2cab5762 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -3979,7 +3979,6 @@ static int nand_dt_init(struct mtd_info *mtd, struct nand_chip *chip,
  * This is the first phase of the normal nand_scan() function. It reads the
  * flash ID and sets up MTD fields accordingly.
  *
- * The mtd->owner field must be set to the module of the caller.
  */
 int nand_scan_ident(struct mtd_info *mtd, int maxchips,
 		    struct nand_flash_dev *table)
@@ -4403,19 +4402,12 @@ EXPORT_SYMBOL(nand_scan_tail);
  *
  * This fills out all the uninitialized function pointers with the defaults.
  * The flash ID is read and the mtd/chip structures are filled with the
- * appropriate values. The mtd->owner field must be set to the module of the
- * caller.
+ * appropriate values.
  */
 int nand_scan(struct mtd_info *mtd, int maxchips)
 {
 	int ret;
 
-	/* Many callers got this wrong, so check for it for a while... */
-	if (!mtd->owner && caller_is_module()) {
-		pr_crit("%s called with NULL mtd->owner!\n", __func__);
-		BUG();
-	}
-
 	ret = nand_scan_ident(mtd, maxchips, NULL);
 	if (!ret)
 		ret = nand_scan_tail(mtd);

From c3173539ec17901391863321e1eaf335b0029a09 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Thu, 21 Jan 2016 19:50:09 -0300
Subject: [PATCH 588/797] perf hists browser: Only offer symbol scripting when
 a symbol is under the cursor

commit c221acb0f970d3b80d72c812cda19c121acf5d52 upstream.

When this feature was introduced a check was made if there was a
resolved symbol under the cursor, it got lost in commit ea7cd5923309
("perf hists browser: Split popup menu actions - part 2"), reinstate it.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>,
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Wang Nan <wangnan0@huawei.com>
Fixes: ea7cd5923309 ("perf hists browser: Split popup menu actions - part 2")
Link: http://lkml.kernel.org/r/1452960197-5323-9-git-send-email-namhyung@kernel.org
[ Carved out from a  larger patch ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/ui/browsers/hists.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 81def6c3f24b..3900386a3629 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -2059,10 +2059,12 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 			 *
 			 * See hist_browser__show_entry.
 			 */
-			nr_options += add_script_opt(browser,
-						     &actions[nr_options],
-						     &options[nr_options],
-						     NULL, browser->selection->sym);
+			if (sort__has_sym && browser->selection->sym) {
+				nr_options += add_script_opt(browser,
+							     &actions[nr_options],
+							     &options[nr_options],
+							     NULL, browser->selection->sym);
+			}
 		}
 		nr_options += add_script_opt(browser, &actions[nr_options],
 					     &options[nr_options], NULL, NULL);

From dcfdb38c41385bc3cb7be295eb5b9d4b00ea1177 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marcin=20=C5=9Alusarz?= <marcin.slusarz@gmail.com>
Date: Tue, 19 Jan 2016 20:03:03 +0100
Subject: [PATCH 589/797] perf tools: handle spaces in file names obtained from
 /proc/pid/maps
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 89fee59b504f86925894fcc9ba79d5c933842f93 upstream.

Steam frequently puts game binaries in folders with spaces.

Note: "(deleted)" markers are now treated as part of the file name.

Signed-off-by: Marcin Ślusarz <marcin.slusarz@gmail.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Fixes: 6064803313ba ("perf tools: Use sscanf for parsing /proc/pid/maps")
Link: http://lkml.kernel.org/r/20160119190303.GA17579@marcin-Inspiron-7720
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/util/event.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 8b10621b415c..956187bf1a85 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -274,7 +274,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 		strcpy(execname, "");
 
 		/* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
-		n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %s\n",
+		n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %[^\n]\n",
 		       &event->mmap2.start, &event->mmap2.len, prot,
 		       &event->mmap2.pgoff, &event->mmap2.maj,
 		       &event->mmap2.min,

From d7b60bafb195dd349821e422b1dbc8b897eeb368 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Mon, 7 Mar 2016 16:44:44 -0300
Subject: [PATCH 590/797] perf stat: Document --detailed option

commit f594bae08183fb6b57db55387794ece3e1edf6f6 upstream.

I'm surprised this remained undocumented since at least 2011. And it is
actually a very useful switch, as Steve and I came to realize recently.

Add the text from

  2cba3ffb9a9d ("perf stat: Add -d -d and -d -d -d options to show more CPU events")

which added the incrementing aspect to -d.

Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Davidlohr Bueso <dbueso@suse.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mel Gorman <mgorman@suse.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 2cba3ffb9a9d ("perf stat: Add -d -d and -d -d -d options to show more CPU events")
Link: http://lkml.kernel.org/r/1457347294-32546-1-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/Documentation/perf-stat.txt | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 4e074a660826..90c3558c2c12 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -62,6 +62,14 @@ OPTIONS
 --scale::
 	scale/normalize counter values
 
+-d::
+--detailed::
+	print more detailed statistics, can be specified up to 3 times
+
+	   -d:          detailed events, L1 and LLC data cache
+        -d -d:     more detailed events, dTLB and iTLB events
+     -d -d -d:     very detailed events, adding prefetch events
+
 -r::
 --repeat=<n>::
 	repeat command and print average + stddev (max: 100). 0 means forever.

From 0b680de452570274716c2c9990903acea525f0d0 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.com>
Date: Mon, 7 Dec 2015 14:28:03 -0500
Subject: [PATCH 591/797] ext4: fix races between page faults and hole punching

commit ea3d7209ca01da209cda6f0dea8be9cc4b7a933b upstream.

Currently, page faults and hole punching are completely unsynchronized.
This can result in page fault faulting in a page into a range that we
are punching after truncate_pagecache_range() has been called and thus
we can end up with a page mapped to disk blocks that will be shortly
freed. Filesystem corruption will shortly follow. Note that the same
race is avoided for truncate by checking page fault offset against
i_size but there isn't similar mechanism available for punching holes.

Fix the problem by creating new rw semaphore i_mmap_sem in inode and
grab it for writing over truncate, hole punching, and other functions
removing blocks from extent tree and for read over page faults. We
cannot easily use i_data_sem for this since that ranks below transaction
start and we need something ranking above it so that it can be held over
the whole truncate / hole punching operation. Also remove various
workarounds we had in the code to reduce race window when page fault
could have created pages with stale mapping information.

Signed-off-by: Jan Kara <jack@suse.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/ext4.h     | 10 +++++++
 fs/ext4/extents.c  | 54 ++++++++++++++++++++-----------------
 fs/ext4/file.c     | 66 +++++++++++++++++++++++++++++++++++++++-------
 fs/ext4/inode.c    | 36 ++++++++++++++++++-------
 fs/ext4/super.c    |  1 +
 fs/ext4/truncate.h |  2 ++
 6 files changed, 127 insertions(+), 42 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index d4156e1c128d..89df9f55595b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -933,6 +933,15 @@ struct ext4_inode_info {
 	 * by other means, so we have i_data_sem.
 	 */
 	struct rw_semaphore i_data_sem;
+	/*
+	 * i_mmap_sem is for serializing page faults with truncate / punch hole
+	 * operations. We have to make sure that new page cannot be faulted in
+	 * a section of the inode that is being punched. We cannot easily use
+	 * i_data_sem for this since we need protection for the whole punch
+	 * operation and i_data_sem ranks below transaction start so we have
+	 * to occasionally drop it.
+	 */
+	struct rw_semaphore i_mmap_sem;
 	struct inode vfs_inode;
 	struct jbd2_inode *jinode;
 
@@ -2507,6 +2516,7 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
 extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
 			     loff_t lstart, loff_t lend);
 extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
+extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
 extern qsize_t *ext4_get_reserved_space(struct inode *inode);
 extern void ext4_da_update_reserve_space(struct inode *inode,
 					int used, int quota_claim);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 551353b1b17a..5be9ca5a8a7a 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4770,7 +4770,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 	int partial_begin, partial_end;
 	loff_t start, end;
 	ext4_lblk_t lblk;
-	struct address_space *mapping = inode->i_mapping;
 	unsigned int blkbits = inode->i_blkbits;
 
 	trace_ext4_zero_range(inode, offset, len, mode);
@@ -4785,17 +4784,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 			return ret;
 	}
 
-	/*
-	 * Write out all dirty pages to avoid race conditions
-	 * Then release them.
-	 */
-	if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
-		ret = filemap_write_and_wait_range(mapping, offset,
-						   offset + len - 1);
-		if (ret)
-			return ret;
-	}
-
 	/*
 	 * Round up offset. This is not fallocate, we neet to zero out
 	 * blocks, so convert interior block aligned part of the range to
@@ -4856,16 +4844,22 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 		flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
 			  EXT4_EX_NOCACHE);
 
-		/* Now release the pages and zero block aligned part of pages*/
-		truncate_pagecache_range(inode, start, end - 1);
-		inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
-
 		/* Wait all existing dio workers, newcomers will block on i_mutex */
 		ext4_inode_block_unlocked_dio(inode);
 		inode_dio_wait(inode);
 
+		/*
+		 * Prevent page faults from reinstantiating pages we have
+		 * released from page cache.
+		 */
+		down_write(&EXT4_I(inode)->i_mmap_sem);
+		/* Now release the pages and zero block aligned part of pages */
+		truncate_pagecache_range(inode, start, end - 1);
+		inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+
 		ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
 					     flags, mode);
+		up_write(&EXT4_I(inode)->i_mmap_sem);
 		if (ret)
 			goto out_dio;
 	}
@@ -5524,17 +5518,22 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 		goto out_mutex;
 	}
 
-	truncate_pagecache(inode, ioffset);
-
 	/* Wait for existing dio to complete */
 	ext4_inode_block_unlocked_dio(inode);
 	inode_dio_wait(inode);
 
+	/*
+	 * Prevent page faults from reinstantiating pages we have released from
+	 * page cache.
+	 */
+	down_write(&EXT4_I(inode)->i_mmap_sem);
+	truncate_pagecache(inode, ioffset);
+
 	credits = ext4_writepage_trans_blocks(inode);
 	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
-		goto out_dio;
+		goto out_mmap;
 	}
 
 	down_write(&EXT4_I(inode)->i_data_sem);
@@ -5573,7 +5572,8 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 
 out_stop:
 	ext4_journal_stop(handle);
-out_dio:
+out_mmap:
+	up_write(&EXT4_I(inode)->i_mmap_sem);
 	ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
 	mutex_unlock(&inode->i_mutex);
@@ -5660,17 +5660,22 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
 		goto out_mutex;
 	}
 
-	truncate_pagecache(inode, ioffset);
-
 	/* Wait for existing dio to complete */
 	ext4_inode_block_unlocked_dio(inode);
 	inode_dio_wait(inode);
 
+	/*
+	 * Prevent page faults from reinstantiating pages we have released from
+	 * page cache.
+	 */
+	down_write(&EXT4_I(inode)->i_mmap_sem);
+	truncate_pagecache(inode, ioffset);
+
 	credits = ext4_writepage_trans_blocks(inode);
 	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
-		goto out_dio;
+		goto out_mmap;
 	}
 
 	/* Expand file to avoid data loss if there is error while shifting */
@@ -5741,7 +5746,8 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
 
 out_stop:
 	ext4_journal_stop(handle);
-out_dio:
+out_mmap:
+	up_write(&EXT4_I(inode)->i_mmap_sem);
 	ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
 	mutex_unlock(&inode->i_mutex);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 113837e7ba98..0d24ebcd7c9e 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -209,15 +209,18 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	int result;
 	handle_t *handle = NULL;
-	struct super_block *sb = file_inode(vma->vm_file)->i_sb;
+	struct inode *inode = file_inode(vma->vm_file);
+	struct super_block *sb = inode->i_sb;
 	bool write = vmf->flags & FAULT_FLAG_WRITE;
 
 	if (write) {
 		sb_start_pagefault(sb);
 		file_update_time(vma->vm_file);
+		down_read(&EXT4_I(inode)->i_mmap_sem);
 		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
 						EXT4_DATA_TRANS_BLOCKS(sb));
-	}
+	} else
+		down_read(&EXT4_I(inode)->i_mmap_sem);
 
 	if (IS_ERR(handle))
 		result = VM_FAULT_SIGBUS;
@@ -228,8 +231,10 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	if (write) {
 		if (!IS_ERR(handle))
 			ext4_journal_stop(handle);
+		up_read(&EXT4_I(inode)->i_mmap_sem);
 		sb_end_pagefault(sb);
-	}
+	} else
+		up_read(&EXT4_I(inode)->i_mmap_sem);
 
 	return result;
 }
@@ -246,10 +251,12 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
 	if (write) {
 		sb_start_pagefault(sb);
 		file_update_time(vma->vm_file);
+		down_read(&EXT4_I(inode)->i_mmap_sem);
 		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
 				ext4_chunk_trans_blocks(inode,
 							PMD_SIZE / PAGE_SIZE));
-	}
+	} else
+		down_read(&EXT4_I(inode)->i_mmap_sem);
 
 	if (IS_ERR(handle))
 		result = VM_FAULT_SIGBUS;
@@ -260,30 +267,71 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
 	if (write) {
 		if (!IS_ERR(handle))
 			ext4_journal_stop(handle);
+		up_read(&EXT4_I(inode)->i_mmap_sem);
 		sb_end_pagefault(sb);
-	}
+	} else
+		up_read(&EXT4_I(inode)->i_mmap_sem);
 
 	return result;
 }
 
 static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-	return dax_mkwrite(vma, vmf, ext4_get_block_dax,
-				ext4_end_io_unwritten);
+	int err;
+	struct inode *inode = file_inode(vma->vm_file);
+
+	sb_start_pagefault(inode->i_sb);
+	file_update_time(vma->vm_file);
+	down_read(&EXT4_I(inode)->i_mmap_sem);
+	err = __dax_mkwrite(vma, vmf, ext4_get_block_dax,
+			    ext4_end_io_unwritten);
+	up_read(&EXT4_I(inode)->i_mmap_sem);
+	sb_end_pagefault(inode->i_sb);
+
+	return err;
+}
+
+/*
+ * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_mkwrite()
+ * handler we check for races agaist truncate. Note that since we cycle through
+ * i_mmap_sem, we are sure that also any hole punching that began before we
+ * were called is finished by now and so if it included part of the file we
+ * are working on, our pte will get unmapped and the check for pte_same() in
+ * wp_pfn_shared() fails. Thus fault gets retried and things work out as
+ * desired.
+ */
+static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
+				struct vm_fault *vmf)
+{
+	struct inode *inode = file_inode(vma->vm_file);
+	struct super_block *sb = inode->i_sb;
+	int ret = VM_FAULT_NOPAGE;
+	loff_t size;
+
+	sb_start_pagefault(sb);
+	file_update_time(vma->vm_file);
+	down_read(&EXT4_I(inode)->i_mmap_sem);
+	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	if (vmf->pgoff >= size)
+		ret = VM_FAULT_SIGBUS;
+	up_read(&EXT4_I(inode)->i_mmap_sem);
+	sb_end_pagefault(sb);
+
+	return ret;
 }
 
 static const struct vm_operations_struct ext4_dax_vm_ops = {
 	.fault		= ext4_dax_fault,
 	.pmd_fault	= ext4_dax_pmd_fault,
 	.page_mkwrite	= ext4_dax_mkwrite,
-	.pfn_mkwrite	= dax_pfn_mkwrite,
+	.pfn_mkwrite	= ext4_dax_pfn_mkwrite,
 };
 #else
 #define ext4_dax_vm_ops	ext4_file_vm_ops
 #endif
 
 static const struct vm_operations_struct ext4_file_vm_ops = {
-	.fault		= filemap_fault,
+	.fault		= ext4_filemap_fault,
 	.map_pages	= filemap_map_pages,
 	.page_mkwrite   = ext4_page_mkwrite,
 };
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 547600556bb9..214e30a3ef9e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3651,6 +3651,15 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 
 	}
 
+	/* Wait all existing dio workers, newcomers will block on i_mutex */
+	ext4_inode_block_unlocked_dio(inode);
+	inode_dio_wait(inode);
+
+	/*
+	 * Prevent page faults from reinstantiating pages we have released from
+	 * page cache.
+	 */
+	down_write(&EXT4_I(inode)->i_mmap_sem);
 	first_block_offset = round_up(offset, sb->s_blocksize);
 	last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
 
@@ -3659,10 +3668,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 		truncate_pagecache_range(inode, first_block_offset,
 					 last_block_offset);
 
-	/* Wait all existing dio workers, newcomers will block on i_mutex */
-	ext4_inode_block_unlocked_dio(inode);
-	inode_dio_wait(inode);
-
 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
 		credits = ext4_writepage_trans_blocks(inode);
 	else
@@ -3708,16 +3713,12 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 	if (IS_SYNC(inode))
 		ext4_handle_sync(handle);
 
-	/* Now release the pages again to reduce race window */
-	if (last_block_offset > first_block_offset)
-		truncate_pagecache_range(inode, first_block_offset,
-					 last_block_offset);
-
 	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
 	ext4_mark_inode_dirty(handle, inode);
 out_stop:
 	ext4_journal_stop(handle);
 out_dio:
+	up_write(&EXT4_I(inode)->i_mmap_sem);
 	ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
 	mutex_unlock(&inode->i_mutex);
@@ -4851,6 +4852,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 			} else
 				ext4_wait_for_tail_page_commit(inode);
 		}
+		down_write(&EXT4_I(inode)->i_mmap_sem);
 		/*
 		 * Truncate pagecache after we've waited for commit
 		 * in data=journal mode to make pages freeable.
@@ -4858,6 +4860,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 		truncate_pagecache(inode, inode->i_size);
 		if (shrink)
 			ext4_truncate(inode);
+		up_write(&EXT4_I(inode)->i_mmap_sem);
 	}
 
 	if (!rc) {
@@ -5306,6 +5309,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 
 	sb_start_pagefault(inode->i_sb);
 	file_update_time(vma->vm_file);
+
+	down_read(&EXT4_I(inode)->i_mmap_sem);
 	/* Delalloc case is easy... */
 	if (test_opt(inode->i_sb, DELALLOC) &&
 	    !ext4_should_journal_data(inode) &&
@@ -5375,6 +5380,19 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 out_ret:
 	ret = block_page_mkwrite_return(ret);
 out:
+	up_read(&EXT4_I(inode)->i_mmap_sem);
 	sb_end_pagefault(inode->i_sb);
 	return ret;
 }
+
+int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct inode *inode = file_inode(vma->vm_file);
+	int err;
+
+	down_read(&EXT4_I(inode)->i_mmap_sem);
+	err = filemap_fault(vma, vmf);
+	up_read(&EXT4_I(inode)->i_mmap_sem);
+
+	return err;
+}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ba1cf0bf2f81..852c26806af2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -958,6 +958,7 @@ static void init_once(void *foo)
 	INIT_LIST_HEAD(&ei->i_orphan);
 	init_rwsem(&ei->xattr_sem);
 	init_rwsem(&ei->i_data_sem);
+	init_rwsem(&ei->i_mmap_sem);
 	inode_init_once(&ei->vfs_inode);
 }
 
diff --git a/fs/ext4/truncate.h b/fs/ext4/truncate.h
index 011ba6670d99..c70d06a383e2 100644
--- a/fs/ext4/truncate.h
+++ b/fs/ext4/truncate.h
@@ -10,8 +10,10 @@
  */
 static inline void ext4_truncate_failed_write(struct inode *inode)
 {
+	down_write(&EXT4_I(inode)->i_mmap_sem);
 	truncate_inode_pages(inode->i_mapping, inode->i_size);
 	ext4_truncate(inode);
+	up_write(&EXT4_I(inode)->i_mmap_sem);
 }
 
 /*

From e096ade68c13011ba6548a542c1fc00e14555f5c Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.com>
Date: Mon, 7 Dec 2015 14:29:17 -0500
Subject: [PATCH 592/797] ext4: move unlocked dio protection from
 ext4_alloc_file_blocks()

commit 17048e8a083fec7ad841d88ef0812707fbc7e39f upstream.

Currently ext4_alloc_file_blocks() was handling protection against
unlocked DIO. However we now need to sometimes call it under i_mmap_sem
and sometimes not and DIO protection ranks above it (although strictly
speaking this cannot currently create any deadlocks). Also
ext4_zero_range() was actually getting & releasing unlocked DIO
protection twice in some cases. Luckily it didn't introduce any real bug
but it was a land mine waiting to be stepped on.  So move DIO protection
out from ext4_alloc_file_blocks() into the two callsites.

Signed-off-by: Jan Kara <jack@suse.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/extents.c | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 5be9ca5a8a7a..65b5ada2833f 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4685,10 +4685,6 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
 	if (len <= EXT_UNWRITTEN_MAX_LEN)
 		flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
 
-	/* Wait all existing dio workers, newcomers will block on i_mutex */
-	ext4_inode_block_unlocked_dio(inode);
-	inode_dio_wait(inode);
-
 	/*
 	 * credits to insert 1 extent into extent tree
 	 */
@@ -4752,8 +4748,6 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
 		goto retry;
 	}
 
-	ext4_inode_resume_unlocked_dio(inode);
-
 	return ret > 0 ? ret2 : ret;
 }
 
@@ -4827,6 +4821,10 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 	if (mode & FALLOC_FL_KEEP_SIZE)
 		flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
 
+	/* Wait all existing dio workers, newcomers will block on i_mutex */
+	ext4_inode_block_unlocked_dio(inode);
+	inode_dio_wait(inode);
+
 	/* Preallocate the range including the unaligned edges */
 	if (partial_begin || partial_end) {
 		ret = ext4_alloc_file_blocks(file,
@@ -4835,7 +4833,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 				 round_down(offset, 1 << blkbits)) >> blkbits,
 				new_size, flags, mode);
 		if (ret)
-			goto out_mutex;
+			goto out_dio;
 
 	}
 
@@ -4844,10 +4842,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 		flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
 			  EXT4_EX_NOCACHE);
 
-		/* Wait all existing dio workers, newcomers will block on i_mutex */
-		ext4_inode_block_unlocked_dio(inode);
-		inode_dio_wait(inode);
-
 		/*
 		 * Prevent page faults from reinstantiating pages we have
 		 * released from page cache.
@@ -4992,8 +4986,13 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 			goto out;
 	}
 
+	/* Wait all existing dio workers, newcomers will block on i_mutex */
+	ext4_inode_block_unlocked_dio(inode);
+	inode_dio_wait(inode);
+
 	ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
 				     flags, mode);
+	ext4_inode_resume_unlocked_dio(inode);
 	if (ret)
 		goto out;
 

From 1f7b7e9a4ba3d60af27c78a149743d269e6fb848 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.com>
Date: Mon, 7 Dec 2015 14:31:11 -0500
Subject: [PATCH 593/797] ext4: fix races between buffered IO and collapse /
 insert range

commit 32ebffd3bbb4162da5ff88f9a35dd32d0a28ea70 upstream.

Current code implementing FALLOC_FL_COLLAPSE_RANGE and
FALLOC_FL_INSERT_RANGE is prone to races with buffered writes and page
faults. If buffered write or write via mmap manages to squeeze between
filemap_write_and_wait_range() and truncate_pagecache() in the fallocate
implementations, the written data is simply discarded by
truncate_pagecache() although it should have been shifted.

Fix the problem by moving filemap_write_and_wait_range() call inside
i_mutex and i_mmap_sem. That way we are protected against races with
both buffered writes and page faults.

Signed-off-by: Jan Kara <jack@suse.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/extents.c | 59 +++++++++++++++++++++++++----------------------
 1 file changed, 31 insertions(+), 28 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 65b5ada2833f..4b105c96df08 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5487,21 +5487,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 			return ret;
 	}
 
-	/*
-	 * Need to round down offset to be aligned with page size boundary
-	 * for page size > block size.
-	 */
-	ioffset = round_down(offset, PAGE_SIZE);
-
-	/* Write out all dirty pages */
-	ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
-					   LLONG_MAX);
-	if (ret)
-		return ret;
-
-	/* Take mutex lock */
 	mutex_lock(&inode->i_mutex);
-
 	/*
 	 * There is no need to overlap collapse range with EOF, in which case
 	 * it is effectively a truncate operation
@@ -5526,6 +5512,27 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 	 * page cache.
 	 */
 	down_write(&EXT4_I(inode)->i_mmap_sem);
+	/*
+	 * Need to round down offset to be aligned with page size boundary
+	 * for page size > block size.
+	 */
+	ioffset = round_down(offset, PAGE_SIZE);
+	/*
+	 * Write tail of the last page before removed range since it will get
+	 * removed from the page cache below.
+	 */
+	ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, offset);
+	if (ret)
+		goto out_mmap;
+	/*
+	 * Write data that will be shifted to preserve them when discarding
+	 * page cache below. We are also protected from pages becoming dirty
+	 * by i_mmap_sem.
+	 */
+	ret = filemap_write_and_wait_range(inode->i_mapping, offset + len,
+					   LLONG_MAX);
+	if (ret)
+		goto out_mmap;
 	truncate_pagecache(inode, ioffset);
 
 	credits = ext4_writepage_trans_blocks(inode);
@@ -5626,21 +5633,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
 			return ret;
 	}
 
-	/*
-	 * Need to round down to align start offset to page size boundary
-	 * for page size > block size.
-	 */
-	ioffset = round_down(offset, PAGE_SIZE);
-
-	/* Write out all dirty pages */
-	ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
-			LLONG_MAX);
-	if (ret)
-		return ret;
-
-	/* Take mutex lock */
 	mutex_lock(&inode->i_mutex);
-
 	/* Currently just for extent based files */
 	if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
 		ret = -EOPNOTSUPP;
@@ -5668,6 +5661,16 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
 	 * page cache.
 	 */
 	down_write(&EXT4_I(inode)->i_mmap_sem);
+	/*
+	 * Need to round down to align start offset to page size boundary
+	 * for page size > block size.
+	 */
+	ioffset = round_down(offset, PAGE_SIZE);
+	/* Write out all dirty pages */
+	ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
+			LLONG_MAX);
+	if (ret)
+		goto out_mmap;
 	truncate_pagecache(inode, ioffset);
 
 	credits = ext4_writepage_trans_blocks(inode);

From 21228341bf17496062b0e6a1b37265f6bcf5c8f3 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.com>
Date: Mon, 7 Dec 2015 14:34:49 -0500
Subject: [PATCH 594/797] ext4: fix races of writeback with punch hole and zero
 range

commit 011278485ecc3cd2a3954b5d4c73101d919bf1fa upstream.

When doing delayed allocation, update of on-disk inode size is postponed
until IO submission time. However hole punch or zero range fallocate
calls can end up discarding the tail page cache page and thus on-disk
inode size would never be properly updated.

Make sure the on-disk inode size is updated before truncating page
cache.

Signed-off-by: Jan Kara <jack@suse.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/ext4.h    |  3 +++
 fs/ext4/extents.c |  5 +++++
 fs/ext4/inode.c   | 35 ++++++++++++++++++++++++++++++++++-
 3 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 89df9f55595b..b7e921d207fb 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2881,6 +2881,9 @@ static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize)
 	return changed;
 }
 
+int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
+				      loff_t len);
+
 struct ext4_group_info {
 	unsigned long   bb_state;
 	struct rb_root  bb_free_root;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 4b105c96df08..3578b25fccfd 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4847,6 +4847,11 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 		 * released from page cache.
 		 */
 		down_write(&EXT4_I(inode)->i_mmap_sem);
+		ret = ext4_update_disksize_before_punch(inode, offset, len);
+		if (ret) {
+			up_write(&EXT4_I(inode)->i_mmap_sem);
+			goto out_dio;
+		}
 		/* Now release the pages and zero block aligned part of pages */
 		truncate_pagecache_range(inode, start, end - 1);
 		inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 214e30a3ef9e..e31d762eedce 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3586,6 +3586,35 @@ int ext4_can_truncate(struct inode *inode)
 	return 0;
 }
 
+/*
+ * We have to make sure i_disksize gets properly updated before we truncate
+ * page cache due to hole punching or zero range. Otherwise i_disksize update
+ * can get lost as it may have been postponed to submission of writeback but
+ * that will never happen after we truncate page cache.
+ */
+int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
+				      loff_t len)
+{
+	handle_t *handle;
+	loff_t size = i_size_read(inode);
+
+	WARN_ON(!mutex_is_locked(&inode->i_mutex));
+	if (offset > size || offset + len < size)
+		return 0;
+
+	if (EXT4_I(inode)->i_disksize >= size)
+		return 0;
+
+	handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	ext4_update_i_disksize(inode, size);
+	ext4_mark_inode_dirty(handle, inode);
+	ext4_journal_stop(handle);
+
+	return 0;
+}
+
 /*
  * ext4_punch_hole: punches a hole in a file by releaseing the blocks
  * associated with the given offset and length
@@ -3664,9 +3693,13 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 	last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
 
 	/* Now release the pages and zero block aligned part of pages*/
-	if (last_block_offset > first_block_offset)
+	if (last_block_offset > first_block_offset) {
+		ret = ext4_update_disksize_before_punch(inode, offset, length);
+		if (ret)
+			goto out_dio;
 		truncate_pagecache_range(inode, first_block_offset,
 					 last_block_offset);
+	}
 
 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
 		credits = ext4_writepage_trans_blocks(inode);

From 40cab474b47b2fc87911812687e83f8cd21aea1b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali.rohar@gmail.com>
Date: Fri, 19 Feb 2016 10:35:39 -0800
Subject: [PATCH 595/797] ARM: OMAP3: Add cpuidle parameters table for omap3430
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 98f42221501353067251fbf11e732707dbb68ce3 upstream.

Based on CPU type choose generic omap3 or omap3430 specific cpuidle
parameters. Parameters for omap3430 were measured on Nokia N900 device and
added by commit 5a1b1d3a9efa ("OMAP3: RX-51: Pass cpu idle parameters")
which were later removed by commit 231900afba52 ("ARM: OMAP3: cpuidle -
remove rx51 cpuidle parameters table") due to huge code complexity.

This patch brings cpuidle parameters for omap3430 devices again, but uses
simple condition based on CPU type.

Fixes: 231900afba52 ("ARM: OMAP3: cpuidle - remove rx51 cpuidle
parameters table")
Signed-off-by: Pali Rohár <pali.rohar@gmail.com>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-omap2/cpuidle34xx.c | 69 ++++++++++++++++++++++++++++++-
 1 file changed, 68 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-omap2/cpuidle34xx.c b/arch/arm/mach-omap2/cpuidle34xx.c
index aa7b379e2661..2a3db0bd9e15 100644
--- a/arch/arm/mach-omap2/cpuidle34xx.c
+++ b/arch/arm/mach-omap2/cpuidle34xx.c
@@ -34,6 +34,7 @@
 #include "pm.h"
 #include "control.h"
 #include "common.h"
+#include "soc.h"
 
 /* Mach specific information to be recorded in the C-state driver_data */
 struct omap3_idle_statedata {
@@ -315,6 +316,69 @@ static struct cpuidle_driver omap3_idle_driver = {
 	.safe_state_index = 0,
 };
 
+/*
+ * Numbers based on measurements made in October 2009 for PM optimized kernel
+ * with CPU freq enabled on device Nokia N900. Assumes OPP2 (main idle OPP,
+ * and worst case latencies).
+ */
+static struct cpuidle_driver omap3430_idle_driver = {
+	.name             = "omap3430_idle",
+	.owner            = THIS_MODULE,
+	.states = {
+		{
+			.enter		  = omap3_enter_idle_bm,
+			.exit_latency	  = 110 + 162,
+			.target_residency = 5,
+			.name		  = "C1",
+			.desc		  = "MPU ON + CORE ON",
+		},
+		{
+			.enter		  = omap3_enter_idle_bm,
+			.exit_latency	  = 106 + 180,
+			.target_residency = 309,
+			.name		  = "C2",
+			.desc		  = "MPU ON + CORE ON",
+		},
+		{
+			.enter		  = omap3_enter_idle_bm,
+			.exit_latency	  = 107 + 410,
+			.target_residency = 46057,
+			.name		  = "C3",
+			.desc		  = "MPU RET + CORE ON",
+		},
+		{
+			.enter		  = omap3_enter_idle_bm,
+			.exit_latency	  = 121 + 3374,
+			.target_residency = 46057,
+			.name		  = "C4",
+			.desc		  = "MPU OFF + CORE ON",
+		},
+		{
+			.enter		  = omap3_enter_idle_bm,
+			.exit_latency	  = 855 + 1146,
+			.target_residency = 46057,
+			.name		  = "C5",
+			.desc		  = "MPU RET + CORE RET",
+		},
+		{
+			.enter		  = omap3_enter_idle_bm,
+			.exit_latency	  = 7580 + 4134,
+			.target_residency = 484329,
+			.name		  = "C6",
+			.desc		  = "MPU OFF + CORE RET",
+		},
+		{
+			.enter		  = omap3_enter_idle_bm,
+			.exit_latency	  = 7505 + 15274,
+			.target_residency = 484329,
+			.name		  = "C7",
+			.desc		  = "MPU OFF + CORE OFF",
+		},
+	},
+	.state_count = ARRAY_SIZE(omap3_idle_data),
+	.safe_state_index = 0,
+};
+
 /* Public functions */
 
 /**
@@ -333,5 +397,8 @@ int __init omap3_idle_init(void)
 	if (!mpu_pd || !core_pd || !per_pd || !cam_pd)
 		return -ENODEV;
 
-	return cpuidle_register(&omap3_idle_driver, NULL);
+	if (cpu_is_omap3430())
+		return cpuidle_register(&omap3430_idle_driver, NULL);
+	else
+		return cpuidle_register(&omap3_idle_driver, NULL);
 }

From 159c52e15f95712dd22aa5d64b17a79a7fd8f939 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sat, 28 Nov 2015 23:56:47 +0100
Subject: [PATCH 596/797] ARM: prima2: always enable reset controller

commit ef2b1d777d643af227a22309d8b79898b90b123c upstream.

The atlas7 clock controller driver registers a reset controller
for itself, which causes a link error when the subsystem is
disabled:

drivers/built-in.o: In function `atlas7_clk_init':
drivers/clk/sirf/clk-atlas7.c:1681: undefined reference to `reset_controller_register'

As the clk driver does not have a Kconfig symbol for itself
but it always built-in when the platform is enabled, we have
to ensure that the reset controller subsystem is also built-in
in this case.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Philipp Zabel <p.zabel@pengutronix.de>
Fixes: 301c5d29402e ("clk: sirf: add CSR atlas7 clk and reset support")
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-prima2/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/mach-prima2/Kconfig b/arch/arm/mach-prima2/Kconfig
index 9ab8932403e5..56e55fd37d13 100644
--- a/arch/arm/mach-prima2/Kconfig
+++ b/arch/arm/mach-prima2/Kconfig
@@ -1,6 +1,7 @@
 menuconfig ARCH_SIRF
 	bool "CSR SiRF" if ARCH_MULTI_V7
 	select ARCH_HAS_RESET_CONTROLLER
+	select RESET_CONTROLLER
 	select ARCH_REQUIRE_GPIOLIB
 	select GENERIC_IRQ_CHIP
 	select NO_IOPORT_MAP

From abc48d066b7b5063db56f4a81e367c84b9582882 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 29 Jan 2016 15:50:38 +0100
Subject: [PATCH 597/797] ARM: EXYNOS: select THERMAL_OF

commit dc7eb9d589e595954792cc192bcbb92932e5c2ff upstream.

We cannot select a symbol that has disabled dependencies, so
we get a warning if we ever enable EXYNOS_THERMAL without
also turning on THERMAL_OF:

warning: (ARCH_EXYNOS) selects EXYNOS_THERMAL which has unmet direct dependencies (THERMAL && (ARCH_EXYNOS || COMPILE_TEST) && THERMAL_OF)

This adds another 'select' in the platform code to avoid that
case. Alternatively, we could decide to not select EXYNOS_THERMAL
here and instead make it a user option.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: f87e6bd3f740 ("thermal: exynos: Add the dependency of CONFIG_THERMAL_OF instead of CONFIG_OF")
Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-exynos/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
index 3a10f1a8317a..bfd8bb371477 100644
--- a/arch/arm/mach-exynos/Kconfig
+++ b/arch/arm/mach-exynos/Kconfig
@@ -26,6 +26,7 @@ menuconfig ARCH_EXYNOS
 	select S5P_DEV_MFC
 	select SRAM
 	select THERMAL
+	select THERMAL_OF
 	select MFD_SYSCON
 	help
 	  Support for SAMSUNG EXYNOS SoCs (EXYNOS4/5)

From ea075ae7f00c6416b12d68abf29b6a57a15b3916 Mon Sep 17 00:00:00 2001
From: Lior Amsalem <alior@marvell.com>
Date: Wed, 10 Feb 2016 17:29:15 +0100
Subject: [PATCH 598/797] ARM: dts: armada-375: use armada-370-sata for SATA

commit b3a7f31eb7375633cd6a742f19488fc5a4208b36 upstream.

The Armada 375 has the same SATA IP as Armada 370 and Armada XP, which
requires the PHY speed to be set in the LP_PHY_CTL register for SATA
hotplug to work.

Therefore, this commit updates the compatible string used to describe
the SATA IP in Armada 375 from marvell,orion-sata to
marvell,armada-370-sata.

Fixes: 4de59085091f753d08c8429d756b46756ab94665 ("ARM: mvebu: add Device Tree description of the Armada 375 SoC")
Signed-off-by: Lior Amsalem <alior@marvell.com>
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/armada-375.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/armada-375.dtsi b/arch/arm/boot/dts/armada-375.dtsi
index 7ccce7529b0c..cc952cf8ec30 100644
--- a/arch/arm/boot/dts/armada-375.dtsi
+++ b/arch/arm/boot/dts/armada-375.dtsi
@@ -529,7 +529,7 @@ crypto@90000 {
 			};
 
 			sata@a0000 {
-				compatible = "marvell,orion-sata";
+				compatible = "marvell,armada-370-sata";
 				reg = <0xa0000 0x5000>;
 				interrupts = <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>;
 				clocks = <&gateclk 14>, <&gateclk 20>;

From eb7f1c5fb5c8e888ca8b728e17e71426ea809590 Mon Sep 17 00:00:00 2001
From: Robert Jarzmik <robert.jarzmik@free.fr>
Date: Sat, 13 Feb 2016 00:49:20 +0100
Subject: [PATCH 599/797] ARM: dts: pxa: fix dma engine node to pxa3xx-nand

commit 07c6b2d01d351f0512ed7145625265e435ab3240 upstream.

Since the switch from mmp_pdma to pxa_dma driver for pxa architectures,
the pxa_dma requires 2 arguments, namely the requestor line and the
requested priority.

Fix the only left device node which was still passing only one argument,
making the pxa3xx-nand driver misbehave in a device-tree configuration,
ie. failing all data transfers.

Fixes: c943646d1f49 ("ARM: dts: pxa: add dma engine node to pxa3xx-nand")
Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/pxa3xx.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/pxa3xx.dtsi b/arch/arm/boot/dts/pxa3xx.dtsi
index cf6998a0804d..564341af7e97 100644
--- a/arch/arm/boot/dts/pxa3xx.dtsi
+++ b/arch/arm/boot/dts/pxa3xx.dtsi
@@ -30,7 +30,7 @@ nand0: nand@43100000 {
 			reg = <0x43100000 90>;
 			interrupts = <45>;
 			clocks = <&clks CLK_NAND>;
-			dmas = <&pdma 97>;
+			dmas = <&pdma 97 3>;
 			dma-names = "data";
 			#address-cells = <1>;
 			#size-cells = <1>;	

From c565897ffe54ec0e36854db7fcfe88014e05ce41 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@nxp.com>
Date: Mon, 22 Feb 2016 09:01:53 -0300
Subject: [PATCH 600/797] bus: imx-weim: Take the 'status' property value into
 account

commit 33b96d2c9579213cf3f36d7b29841b1e464750c4 upstream.

Currently we have an incorrect behaviour when multiple devices
are present under the weim node. For example:

&weim {
	...
	status = "okay";

	sram@0,0 {
		...
        	status = "okay";
	};

	mram@0,0 {
		...
        	status = "disabled";
    	};
};

In this case only the 'sram' device should be probed and not 'mram'.

However what happens currently is that the status variable is ignored,
causing the 'sram' device to be disabled and 'mram' to be enabled.

Change the weim_parse_dt() function to use
for_each_available_child_of_node()so that the devices marked with
'status = disabled' are not probed.

Suggested-by: Wolfgang Netbal <wolfgang.netbal@sigmatek.at>
Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>
Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
Acked-by: Shawn Guo <shawnguo@kernel.org>
Signed-off-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/bus/imx-weim.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/bus/imx-weim.c b/drivers/bus/imx-weim.c
index e98d15eaa799..1827fc4d15c1 100644
--- a/drivers/bus/imx-weim.c
+++ b/drivers/bus/imx-weim.c
@@ -150,7 +150,7 @@ static int __init weim_parse_dt(struct platform_device *pdev,
 			return ret;
 	}
 
-	for_each_child_of_node(pdev->dev.of_node, child) {
+	for_each_available_child_of_node(pdev->dev.of_node, child) {
 		if (!child->name)
 			continue;
 

From 1b06e9942d51804170631351ada984947e87f042 Mon Sep 17 00:00:00 2001
From: Guo-Fu Tseng <cooldavid@cooldavid.org>
Date: Sat, 5 Mar 2016 08:11:55 +0800
Subject: [PATCH 601/797] jme: Do not enable NIC WoL functions on S0

commit 0772a99b818079e628a1da122ac7ee023faed83e upstream.

Otherwise it might be back on resume right after going to suspend in
some hardware.

Reported-by: Diego Viola <diego.viola@gmail.com>
Signed-off-by: Guo-Fu Tseng <cooldavid@cooldavid.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/jme.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
index 973dade2d07f..39da60007ade 100644
--- a/drivers/net/ethernet/jme.c
+++ b/drivers/net/ethernet/jme.c
@@ -270,11 +270,17 @@ jme_reset_mac_processor(struct jme_adapter *jme)
 }
 
 static inline void
-jme_clear_pm(struct jme_adapter *jme)
+jme_clear_pm_enable_wol(struct jme_adapter *jme)
 {
 	jwrite32(jme, JME_PMCS, PMCS_STMASK | jme->reg_pmcs);
 }
 
+static inline void
+jme_clear_pm_disable_wol(struct jme_adapter *jme)
+{
+	jwrite32(jme, JME_PMCS, PMCS_STMASK);
+}
+
 static int
 jme_reload_eeprom(struct jme_adapter *jme)
 {
@@ -1853,7 +1859,7 @@ jme_open(struct net_device *netdev)
 	struct jme_adapter *jme = netdev_priv(netdev);
 	int rc;
 
-	jme_clear_pm(jme);
+	jme_clear_pm_disable_wol(jme);
 	JME_NAPI_ENABLE(jme);
 
 	tasklet_init(&jme->linkch_task, jme_link_change_tasklet,
@@ -1929,7 +1935,7 @@ jme_powersave_phy(struct jme_adapter *jme)
 		jme_set_100m_half(jme);
 		if (jme->reg_pmcs & (PMCS_LFEN | PMCS_LREN))
 			jme_wait_link(jme);
-		jme_clear_pm(jme);
+		jme_clear_pm_enable_wol(jme);
 	} else {
 		jme_phy_off(jme);
 	}
@@ -2646,7 +2652,6 @@ jme_set_wol(struct net_device *netdev,
 	if (wol->wolopts & WAKE_MAGIC)
 		jme->reg_pmcs |= PMCS_MFEN;
 
-	jwrite32(jme, JME_PMCS, jme->reg_pmcs);
 	device_set_wakeup_enable(&jme->pdev->dev, !!(jme->reg_pmcs));
 
 	return 0;
@@ -3172,7 +3177,7 @@ jme_init_one(struct pci_dev *pdev,
 	jme->mii_if.mdio_read = jme_mdio_read;
 	jme->mii_if.mdio_write = jme_mdio_write;
 
-	jme_clear_pm(jme);
+	jme_clear_pm_disable_wol(jme);
 	device_set_wakeup_enable(&pdev->dev, true);
 
 	jme_set_phyfifo_5level(jme);
@@ -3304,7 +3309,7 @@ jme_resume(struct device *dev)
 	if (!netif_running(netdev))
 		return 0;
 
-	jme_clear_pm(jme);
+	jme_clear_pm_disable_wol(jme);
 	jme_phy_on(jme);
 	if (test_bit(JME_FLAG_SSET, &jme->flags))
 		jme_set_settings(netdev, &jme->old_ecmd);

From e91b1dbdc1f064872a6a2bb2375ae9202dd5e6e0 Mon Sep 17 00:00:00 2001
From: Guo-Fu Tseng <cooldavid@cooldavid.org>
Date: Sat, 5 Mar 2016 08:11:56 +0800
Subject: [PATCH 602/797] jme: Fix device PM wakeup API usage

commit 81422e672f8181d7ad1ee6c60c723aac649f538f upstream.

According to Documentation/power/devices.txt

The driver should not use device_set_wakeup_enable() which is the policy
for user to decide.

Using device_init_wakeup() to initialize dev->power.should_wakeup and
dev->power.can_wakeup on driver initialization.

And use device_may_wakeup() on suspend to decide if WoL function should
be enabled on NIC.

Reported-by: Diego Viola <diego.viola@gmail.com>
Signed-off-by: Guo-Fu Tseng <cooldavid@cooldavid.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/jme.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
index 39da60007ade..1257b18e6b90 100644
--- a/drivers/net/ethernet/jme.c
+++ b/drivers/net/ethernet/jme.c
@@ -1931,7 +1931,7 @@ jme_wait_link(struct jme_adapter *jme)
 static void
 jme_powersave_phy(struct jme_adapter *jme)
 {
-	if (jme->reg_pmcs) {
+	if (jme->reg_pmcs && device_may_wakeup(&jme->pdev->dev)) {
 		jme_set_100m_half(jme);
 		if (jme->reg_pmcs & (PMCS_LFEN | PMCS_LREN))
 			jme_wait_link(jme);
@@ -2652,8 +2652,6 @@ jme_set_wol(struct net_device *netdev,
 	if (wol->wolopts & WAKE_MAGIC)
 		jme->reg_pmcs |= PMCS_MFEN;
 
-	device_set_wakeup_enable(&jme->pdev->dev, !!(jme->reg_pmcs));
-
 	return 0;
 }
 
@@ -3178,7 +3176,7 @@ jme_init_one(struct pci_dev *pdev,
 	jme->mii_if.mdio_write = jme_mdio_write;
 
 	jme_clear_pm_disable_wol(jme);
-	device_set_wakeup_enable(&pdev->dev, true);
+	device_init_wakeup(&pdev->dev, true);
 
 	jme_set_phyfifo_5level(jme);
 	jme->pcirev = pdev->revision;

From 22327f609cef2a3f9bf0781fb2e9dda07ec64c98 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Thu, 14 Jan 2016 18:13:49 +0000
Subject: [PATCH 603/797] unbreak allmodconfig KCONFIG_ALLCONFIG=...

commit 6b87b70c5339f30e3c5b32085e69625906513dc2 upstream.

	Prior to 3.13 make allmodconfig KCONFIG_ALLCONFIG=/dev/null used
to be equivalent to make allmodconfig; these days it hardwires MODULES to n.
In fact, any KCONFIG_ALLCONFIG that doesn't set MODULES explicitly is
treated as if it set it to n.

	Regression had been introduced by commit cfa98f ("kconfig: do not
override symbols already set"); what happens is that conf_read_simple()
does sym_calc_value(modules_sym) on exit, which leaves SYMBOL_VALID set and
has conf_set_all_new_symbols() skip modules_sym.

	It's pretty easy to fix - simply move that call of sym_calc_value()
into the callers, except for the ones in KCONFIG_ALLCONFIG handling.
Objections?

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Fixes: cfa98f2e0ae9 ("kconfig: do not override symbols already set")
Signed-off-by: Michal Marek <mmarek@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 scripts/kconfig/confdata.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c
index 0b7dc2fd7bac..dd243d2abd87 100644
--- a/scripts/kconfig/confdata.c
+++ b/scripts/kconfig/confdata.c
@@ -267,10 +267,8 @@ int conf_read_simple(const char *name, int def)
 		if (in)
 			goto load;
 		sym_add_change_count(1);
-		if (!sym_defconfig_list) {
-			sym_calc_value(modules_sym);
+		if (!sym_defconfig_list)
 			return 1;
-		}
 
 		for_all_defaults(sym_defconfig_list, prop) {
 			if (expr_calc_value(prop->visible.expr) == no ||
@@ -403,7 +401,6 @@ int conf_read_simple(const char *name, int def)
 	}
 	free(line);
 	fclose(in);
-	sym_calc_value(modules_sym);
 	return 0;
 }
 
@@ -414,8 +411,12 @@ int conf_read(const char *name)
 
 	sym_set_change_count(0);
 
-	if (conf_read_simple(name, S_DEF_USER))
+	if (conf_read_simple(name, S_DEF_USER)) {
+		sym_calc_value(modules_sym);
 		return 1;
+	}
+
+	sym_calc_value(modules_sym);
 
 	for_all_symbols(i, sym) {
 		sym_calc_value(sym);
@@ -846,6 +847,7 @@ static int conf_split_config(void)
 
 	name = conf_get_autoconfig_name();
 	conf_read_simple(name, S_DEF_AUTO);
+	sym_calc_value(modules_sym);
 
 	if (chdir("include/config"))
 		return 1;

From 1f5c4e0cb83cde427f1b8b95aa9a2a42e249fd53 Mon Sep 17 00:00:00 2001
From: Caesar Wang <wxt@rock-chips.com>
Date: Mon, 15 Feb 2016 15:33:28 +0800
Subject: [PATCH 604/797] thermal: rockchip: fix a impossible condition caused
 by the warning

commit 43b4eb9fe719b107c8e5d49d1edbff0c135a42cb upstream.

As the Dan report the smatch check the thermal driver warning:
drivers/thermal/rockchip_thermal.c:551 rockchip_configure_from_dt()
warn: impossible condition '(thermal->tshut_temp > ((~0 >> 1))) =>
(s32min-s32max > s32max)'

Although The shut_temp read from DT is u32,the temperature is currently
represented as int not long in the thermal driver.
Let's change to make shut_temp instead of the thermal->tshut_temp for
the condition.

Fixes: commit 437df2172e8d
("thermal: rockchip: consistently use int for temperatures")

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Caesar Wang <wxt@rock-chips.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/thermal/rockchip_thermal.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/thermal/rockchip_thermal.c b/drivers/thermal/rockchip_thermal.c
index e845841ab036..7106288efae3 100644
--- a/drivers/thermal/rockchip_thermal.c
+++ b/drivers/thermal/rockchip_thermal.c
@@ -545,15 +545,14 @@ static int rockchip_configure_from_dt(struct device *dev,
 			 thermal->chip->tshut_temp);
 		thermal->tshut_temp = thermal->chip->tshut_temp;
 	} else {
+		if (shut_temp > INT_MAX) {
+			dev_err(dev, "Invalid tshut temperature specified: %d\n",
+				shut_temp);
+			return -ERANGE;
+		}
 		thermal->tshut_temp = shut_temp;
 	}
 
-	if (thermal->tshut_temp > INT_MAX) {
-		dev_err(dev, "Invalid tshut temperature specified: %d\n",
-			thermal->tshut_temp);
-		return -ERANGE;
-	}
-
 	if (of_property_read_u32(np, "rockchip,hw-tshut-mode", &tshut_mode)) {
 		dev_warn(dev,
 			 "Missing tshut mode property, using default (%s)\n",

From 03d86237007729b006808e8eab90e96a565deee4 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Fri, 4 Mar 2016 17:20:13 +1100
Subject: [PATCH 605/797] sunrpc/cache: drop reference when
 sunrpc_cache_pipe_upcall() detects a race

commit a6ab1e8126d205238defbb55d23661a3a5c6a0d8 upstream.

sunrpc_cache_pipe_upcall() can detect a race if CACHE_PENDING is no longer
set.  In this case it aborts the queuing of the upcall.
However it has already taken a new counted reference on "h" and
doesn't "put" it, even though it frees the data structure holding the reference.

So let's delay the "cache_get" until we know we need it.

Fixes: f9e1aedc6c79 ("sunrpc/cache: remove races with queuing an upcall.")
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/sunrpc/cache.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 21e20353178e..63fb5ee212cf 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1182,14 +1182,14 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h)
 	}
 
 	crq->q.reader = 0;
-	crq->item = cache_get(h);
 	crq->buf = buf;
 	crq->len = 0;
 	crq->readers = 0;
 	spin_lock(&queue_lock);
-	if (test_bit(CACHE_PENDING, &h->flags))
+	if (test_bit(CACHE_PENDING, &h->flags)) {
+		crq->item = cache_get(h);
 		list_add_tail(&crq->q.list, &detail->queue);
-	else
+	} else
 		/* Lost a race, no longer PENDING, so don't enqueue */
 		ret = -EAGAIN;
 	spin_unlock(&queue_lock);

From 5b6e810f352b00c7bf5e7e32557a39b6d550458a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 14 Mar 2016 15:29:45 +0100
Subject: [PATCH 606/797] megaraid_sas: add missing curly braces in ioctl
 handler

commit 3deb9438d34a09f6796639b652a01d110aca9f75 upstream.

gcc-6 found a dubious indentation in the megasas_mgmt_fw_ioctl
function:

drivers/scsi/megaraid/megaraid_sas_base.c: In function 'megasas_mgmt_fw_ioctl':
drivers/scsi/megaraid/megaraid_sas_base.c:6658:4: warning: statement is indented as if it were guarded by... [-Wmisleading-indentation]
    kbuff_arr[i] = NULL;
    ^~~~~~~~~
drivers/scsi/megaraid/megaraid_sas_base.c:6653:3: note: ...this 'if' clause, but it is not
   if (kbuff_arr[i])
   ^~

The code is actually correct, as there is no downside in clearing a NULL
pointer again.

This clarifies the code and avoids the warning by adding extra curly
braces.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 90dc9d98f01b ("megaraid_sas : MFI MPT linked list corruption fix")
Reviewed-by: Hannes Reinecke <hare@suse.com>
Acked-by: Sumit Saxena <sumit.saxena@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/megaraid/megaraid_sas_base.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 97a1c1c33b05..00ce3e269a43 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -6282,12 +6282,13 @@ megasas_mgmt_fw_ioctl(struct megasas_instance *instance,
 	}
 
 	for (i = 0; i < ioc->sge_count; i++) {
-		if (kbuff_arr[i])
+		if (kbuff_arr[i]) {
 			dma_free_coherent(&instance->pdev->dev,
 					  le32_to_cpu(kern_sge32[i].length),
 					  kbuff_arr[i],
 					  le32_to_cpu(kern_sge32[i].phys_addr));
 			kbuff_arr[i] = NULL;
+		}
 	}
 
 	megasas_return_cmd(instance, cmd);

From f4b1d0a9a3f4291ba4ab48dd27efd01d3775d7f6 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 22 Dec 2015 17:25:17 +0200
Subject: [PATCH 607/797] stm class: Select CONFIG_SRCU

commit 042d4460b5b4379a12f375045ff9065cf6758735 upstream.

The newly added STM code uses SRCU, but does not ensure that
this code is part of the kernel:

drivers/built-in.o: In function `stm_source_link_show':
include/linux/srcu.h:221: undefined reference to `__srcu_read_lock'
include/linux/srcu.h:238: undefined reference to `__srcu_read_unlock'
drivers/built-in.o: In function `stm_source_link_drop':
include/linux/srcu.h:221: undefined reference to `__srcu_read_lock'
include/linux/srcu.h:238: undefined reference to `__srcu_read_unlock'

This adds a Kconfig 'select' statement like all the other SRCU using
drivers have.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 7bd1d4093c2f ("stm class: Introduce an abstraction for System Trace Module devices")
Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwtracing/stm/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/hwtracing/stm/Kconfig b/drivers/hwtracing/stm/Kconfig
index 83e9f591a54b..e7a348807f0c 100644
--- a/drivers/hwtracing/stm/Kconfig
+++ b/drivers/hwtracing/stm/Kconfig
@@ -1,6 +1,7 @@
 config STM
 	tristate "System Trace Module devices"
 	select CONFIGFS_FS
+	select SRCU
 	help
 	  A System Trace Module (STM) is a device exporting data in System
 	  Trace Protocol (STP) format as defined by MIPI STP standards.

From b393b9da446626170a39bcd79c52e8ebadb19c8c Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 4 Feb 2016 14:36:09 +0300
Subject: [PATCH 608/797] extcon: max77843: Use correct size for reading the
 interrupt register

commit c4924e92442d7218bd725e47fa3988c73aae84c9 upstream.

The info->status[] array has 3 elements.  We are using size
MAX77843_MUIC_IRQ_NUM (16) instead of MAX77843_MUIC_STATUS_NUM (3) as
intended.

Fixes: 135d9f7d135a ('extcon: max77843: Clear IRQ bits state before request IRQ')
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Jaewon Kim <jaewon02.kim@samsung.com>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
[cw00.choi: Modify the patch title]
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/extcon/extcon-max77843.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/extcon/extcon-max77843.c b/drivers/extcon/extcon-max77843.c
index 9f9ea334399c..b6cb30d207be 100644
--- a/drivers/extcon/extcon-max77843.c
+++ b/drivers/extcon/extcon-max77843.c
@@ -803,7 +803,7 @@ static int max77843_muic_probe(struct platform_device *pdev)
 	/* Clear IRQ bits before request IRQs */
 	ret = regmap_bulk_read(max77843->regmap_muic,
 			MAX77843_MUIC_REG_INT1, info->status,
-			MAX77843_MUIC_IRQ_NUM);
+			MAX77843_MUIC_STATUS_NUM);
 	if (ret) {
 		dev_err(&pdev->dev, "Failed to Clear IRQ bits\n");
 		goto err_muic_irq;

From 1a1a512b983108015ced1e7a7c7775cfeec42d8c Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 4 May 2016 14:50:15 -0700
Subject: [PATCH 609/797] Linux 4.4.9

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 1928fcd539cc..0722cdf52152 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 4
-SUBLEVEL = 8
+SUBLEVEL = 9
 EXTRAVERSION =
 NAME = Blurry Fish Butt
 

From cc798dcca00a8b71e5c09a38d3921461f931c85f Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 23 Nov 2015 13:26:19 +0000
Subject: [PATCH 610/797] arm64: mm: detect bad __create_mapping uses

If a caller of __create_mapping provides a PA and VA which have
different sub-page offsets, it is not clear which offset they expect to
apply to the mapping, and is indicative of a bad caller.

In some cases, the region we wish to map may validly have a sub-page
offset in the physical and virtual addresses. For example, EFI runtime
regions have 4K granularity, yet may be mapped by a 64K page kernel. So
long as the physical and virtual offsets are the same, the region will
be mapped at the expected VAs.

Disallow calls with differing sub-page offsets, and WARN when they are
encountered, so that we can detect and fix such cases.

Cc: Laura Abbott <labbott@fedoraproject.org>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Steve Capper <steve.capper@linaro.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit cc5d2b3b95cdbb3fed4e38e667d17b9ac7250f7a)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/mmu.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 116ad654dd59..61a82d68330d 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -251,6 +251,13 @@ static void  __create_mapping(struct mm_struct *mm, pgd_t *pgd,
 {
 	unsigned long addr, length, end, next;
 
+	/*
+	 * If the virtual and physical address don't have the same offset
+	 * within a page, we cannot map the region as the caller expects.
+	 */
+	if (WARN_ON((phys ^ virt) & ~PAGE_MASK))
+		return;
+
 	addr = virt & PAGE_MASK;
 	length = PAGE_ALIGN(size + (virt & ~PAGE_MASK));
 

From 6329e5d3b74540578dbbf4550fdc53c52c706c94 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 23 Nov 2015 13:26:20 +0000
Subject: [PATCH 611/797] arm64: mm: allow sections for unaligned bases

Callees of __create_mapping may decide to create section mappings if
sufficient low bits of the physical and virtual addresses they were
passed are zero. While __create_mapping rounds the virtual base address
down, it does not similarly round the physical base address down, and
hence non-zero bits in the physical address can prevent use of a section
mapping, even where a whole next-level table would be used instead.

Round down the physical base address in __create_mapping to enable all
callees to always create section mappings when such a mapping is
possible.

Cc: Laura Abbott <labbott@fedoraproject.org>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Steve Capper <steve.capper@linaro.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 9c4e08a3022b6df90d31ef4007291faabfce5431)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/mmu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 61a82d68330d..d2a6194f4bec 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -258,6 +258,7 @@ static void  __create_mapping(struct mm_struct *mm, pgd_t *pgd,
 	if (WARN_ON((phys ^ virt) & ~PAGE_MASK))
 		return;
 
+	phys &= PAGE_MASK;
 	addr = virt & PAGE_MASK;
 	length = PAGE_ALIGN(size + (virt & ~PAGE_MASK));
 

From 6e8ef09edf4a9e61a04fad753ffeebaecc60b568 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 30 Oct 2015 18:56:19 +0000
Subject: [PATCH 612/797] arm64: pgtable: implement pte_accessible()

This patch implements the pte_accessible() macro, which can be used to
test whether or not a given pte is a candidate for allocation in the
TLB.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 76c714be0e5e60c935a53b31be58939510ba1d0f)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/pgtable.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index c63868ae9a4a..cd5dfc97268e 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -168,6 +168,16 @@ extern struct page *empty_zero_page;
 #define pte_valid(pte)		(!!(pte_val(pte) & PTE_VALID))
 #define pte_valid_not_user(pte) \
 	((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
+#define pte_valid_young(pte) \
+	((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF))
+
+/*
+ * Could the pte be present in the TLB? We must check mm_tlb_flush_pending
+ * so that we don't erroneously return false for pages that have been
+ * remapped as PROT_NONE but are yet to be flushed from the TLB.
+ */
+#define pte_accessible(mm, pte)	\
+	(mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid_young(pte))
 
 static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot)
 {

From 1a9cc42c0a812241f6cd679a19aefba2900437a7 Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <jszhang@marvell.com>
Date: Fri, 20 Nov 2015 17:59:10 +0800
Subject: [PATCH 613/797] arm64: add __init/__initdata section marker to some
 functions/variables

These functions/variables are not needed after booting, so mark them
as __init or __initdata.

Signed-off-by: Jisheng Zhang <jszhang@marvell.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit a7c61a3452d39078919f0e1f493ff966fb64f0db)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/armv8_deprecated.c | 6 +++---
 arch/arm64/kernel/cpufeature.c       | 9 +++++----
 arch/arm64/kernel/fpsimd.c           | 2 +-
 arch/arm64/mm/dma-mapping.c          | 4 ++--
 arch/arm64/mm/init.c                 | 6 +++---
 5 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index 937f5e58a4d3..3e01207917b1 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -62,7 +62,7 @@ struct insn_emulation {
 };
 
 static LIST_HEAD(insn_emulation);
-static int nr_insn_emulated;
+static int nr_insn_emulated __initdata;
 static DEFINE_RAW_SPINLOCK(insn_emulation_lock);
 
 static void register_emulation_hooks(struct insn_emulation_ops *ops)
@@ -173,7 +173,7 @@ static int update_insn_emulation_mode(struct insn_emulation *insn,
 	return ret;
 }
 
-static void register_insn_emulation(struct insn_emulation_ops *ops)
+static void __init register_insn_emulation(struct insn_emulation_ops *ops)
 {
 	unsigned long flags;
 	struct insn_emulation *insn;
@@ -237,7 +237,7 @@ static struct ctl_table ctl_abi[] = {
 	{ }
 };
 
-static void register_insn_emulation_sysctl(struct ctl_table *table)
+static void __init register_insn_emulation_sysctl(struct ctl_table *table)
 {
 	unsigned long flags;
 	int i = 0;
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 0669c63281ea..5c90aa490a2b 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -684,7 +684,7 @@ static const struct arm64_cpu_capabilities arm64_hwcaps[] = {
 	{},
 };
 
-static void cap_set_hwcap(const struct arm64_cpu_capabilities *cap)
+static void __init cap_set_hwcap(const struct arm64_cpu_capabilities *cap)
 {
 	switch (cap->hwcap_type) {
 	case CAP_HWCAP:
@@ -729,7 +729,7 @@ static bool __maybe_unused cpus_have_hwcap(const struct arm64_cpu_capabilities *
 	return rc;
 }
 
-static void setup_cpu_hwcaps(void)
+static void __init setup_cpu_hwcaps(void)
 {
 	int i;
 	const struct arm64_cpu_capabilities *hwcaps = arm64_hwcaps;
@@ -758,7 +758,8 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
  * Run through the enabled capabilities and enable() it on all active
  * CPUs
  */
-static void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
+static void __init
+enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
 {
 	int i;
 
@@ -897,7 +898,7 @@ static inline void set_sys_caps_initialised(void)
 
 #endif	/* CONFIG_HOTPLUG_CPU */
 
-static void setup_feature_capabilities(void)
+static void __init setup_feature_capabilities(void)
 {
 	update_cpu_capabilities(arm64_features, "detected feature:");
 	enable_cpu_capabilities(arm64_features);
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 4c46c54a3ad7..acc1afd5c749 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -289,7 +289,7 @@ static struct notifier_block fpsimd_cpu_pm_notifier_block = {
 	.notifier_call = fpsimd_cpu_pm_notifier,
 };
 
-static void fpsimd_pm_init(void)
+static void __init fpsimd_pm_init(void)
 {
 	cpu_pm_register_notifier(&fpsimd_cpu_pm_notifier_block);
 }
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 354144e33218..a6e757cbab77 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -40,7 +40,7 @@ static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot,
 static struct gen_pool *atomic_pool;
 
 #define DEFAULT_DMA_COHERENT_POOL_SIZE  SZ_256K
-static size_t atomic_pool_size = DEFAULT_DMA_COHERENT_POOL_SIZE;
+static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
 
 static int __init early_coherent_pool(char *p)
 {
@@ -896,7 +896,7 @@ static int __iommu_attach_notifier(struct notifier_block *nb,
 	return 0;
 }
 
-static int register_iommu_dma_ops_notifier(struct bus_type *bus)
+static int __init register_iommu_dma_ops_notifier(struct bus_type *bus)
 {
 	struct notifier_block *nb = kzalloc(sizeof(*nb), GFP_KERNEL);
 	int ret;
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 4cb98aa8c27b..10fab52eed95 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -71,7 +71,7 @@ early_param("initrd", early_initrd);
  * currently assumes that for memory starting above 4G, 32-bit devices will
  * use a DMA offset.
  */
-static phys_addr_t max_zone_dma_phys(void)
+static phys_addr_t __init max_zone_dma_phys(void)
 {
 	phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 32);
 	return min(offset + (1ULL << 32), memblock_end_of_DRAM());
@@ -126,11 +126,11 @@ EXPORT_SYMBOL(pfn_valid);
 #endif
 
 #ifndef CONFIG_SPARSEMEM
-static void arm64_memory_present(void)
+static void __init arm64_memory_present(void)
 {
 }
 #else
-static void arm64_memory_present(void)
+static void __init arm64_memory_present(void)
 {
 	struct memblock_region *reg;
 

From 2e310797997214e0cc606013ac308167b6b72dc0 Mon Sep 17 00:00:00 2001
From: Yury Norov <ynorov@caviumnetworks.com>
Date: Wed, 2 Dec 2015 14:00:10 +0000
Subject: [PATCH 614/797] arm64: fix COMPAT_SHMLBA definition for large pages

ARM glibc uses (4 * __getpagesize()) for SHMLBA, which is correct for
4KB pages and works fine for 64KB pages, but the kernel uses a hardcoded
16KB that is too small for 64KB page based kernels. This changes the
definition to what user space sees when using 64KB pages.

Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Yury Norov <ynorov@caviumnetworks.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit b9b7aebb42d1b1392f3111de61136bb6cf3aae3f)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/shmparam.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/shmparam.h b/arch/arm64/include/asm/shmparam.h
index 4df608a8459e..e368a55ebd22 100644
--- a/arch/arm64/include/asm/shmparam.h
+++ b/arch/arm64/include/asm/shmparam.h
@@ -21,7 +21,7 @@
  * alignment value. Since we don't have aliasing D-caches, the rest of
  * the time we can safely use PAGE_SIZE.
  */
-#define COMPAT_SHMLBA	0x4000
+#define COMPAT_SHMLBA	(4 * PAGE_SIZE)
 
 #include <asm-generic/shmparam.h>
 

From 4ea9dd702768f3d1c3ab346f30fdce8b8d1a8ef9 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 23 Nov 2015 15:12:59 +0000
Subject: [PATCH 615/797] arm64: enable HAVE_IRQ_TIME_ACCOUNTING

arm64 relies on the arm_arch_timer for sched_clock, so we can select
HAVE_IRQ_TIME_ACCOUNTING and have the core sched-clock code enable the
feature at runtime based on the rate.

Reported-by: Mario Smarduch <m.smarduch@samsung.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 24da208db32ee1e4757ceaba898c47add8e5361e)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 Documentation/features/time/irq-time-acct/arch-support.txt | 2 +-
 arch/arm64/Kconfig                                         | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/features/time/irq-time-acct/arch-support.txt b/Documentation/features/time/irq-time-acct/arch-support.txt
index e63316239938..4199ffecc0ff 100644
--- a/Documentation/features/time/irq-time-acct/arch-support.txt
+++ b/Documentation/features/time/irq-time-acct/arch-support.txt
@@ -9,7 +9,7 @@
     |       alpha: |  ..  |
     |         arc: | TODO |
     |         arm: |  ok  |
-    |       arm64: |  ..  |
+    |       arm64: |  ok  |
     |       avr32: | TODO |
     |    blackfin: | TODO |
     |         c6x: | TODO |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 871f21783866..4876459c0838 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -70,6 +70,7 @@ config ARM64
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_HW_BREAKPOINT if PERF_EVENTS
+	select HAVE_IRQ_TIME_ACCOUNTING
 	select HAVE_MEMBLOCK
 	select HAVE_PATA_PLATFORM
 	select HAVE_PERF_EVENTS

From 0348dff2c49b25c7b5702ef887f03177f4b0c0fd Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 19 Nov 2015 17:48:31 +0000
Subject: [PATCH 616/797] arm64: spinlock: serialise spin_unlock_wait against
 concurrent lockers

Boqun Feng reported a rather nasty ordering issue with spin_unlock_wait
on architectures implementing spin_lock with LL/SC sequences and acquire
semantics:

 | CPU 1                   CPU 2                     CPU 3
 | ==================      ====================      ==============
 |                                                   spin_unlock(&lock);
 |                         spin_lock(&lock):
 |                           r1 = *lock; // r1 == 0;
 |                         o = READ_ONCE(object); // reordered here
 | object = NULL;
 | smp_mb();
 | spin_unlock_wait(&lock);
 |                           *lock = 1;
 | smp_mb();
 | o->dead = true;
 |                         if (o) // true
 |                           BUG_ON(o->dead); // true!!

The crux of the problem is that spin_unlock_wait(&lock) can return on
CPU 1 whilst CPU 2 is in the process of taking the lock. This can be
resolved by upgrading spin_unlock_wait to a LOCK operation, forcing it
to serialise against a concurrent locker and giving it acquire semantics
in the process (although it is not at all clear whether this is needed -
different callers seem to assume different things about the barrier
semantics and architectures are similarly disjoint in their
implementations of the macro).

This patch implements spin_unlock_wait using an LL/SC sequence with
acquire semantics on arm64. For v8.1 systems with the LSE atomics, the
exclusive writeback is omitted, since the spin_lock operation is
indivisible and no intermediate state can be observed.

Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit d86b8da04dfa4771a68bdbad6c424d40f22f0d14)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/spinlock.h | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index c85e96d174a5..fc9682bfe002 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -26,9 +26,28 @@
  * The memory barriers are implicit with the load-acquire and store-release
  * instructions.
  */
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	unsigned int tmp;
+	arch_spinlock_t lockval;
 
-#define arch_spin_unlock_wait(lock) \
-	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+	asm volatile(
+"	sevl\n"
+"1:	wfe\n"
+"2:	ldaxr	%w0, %2\n"
+"	eor	%w1, %w0, %w0, ror #16\n"
+"	cbnz	%w1, 1b\n"
+	ARM64_LSE_ATOMIC_INSN(
+	/* LL/SC */
+"	stxr	%w1, %w0, %2\n"
+"	cbnz	%w1, 2b\n", /* Serialise against any concurrent lockers */
+	/* LSE atomics */
+"	nop\n"
+"	nop\n")
+	: "=&r" (lockval), "=&r" (tmp), "+Q" (*lock)
+	:
+	: "memory");
+}
 
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 

From 2ef8b1f56c1989157c8ef929f4f89bd6a3ac7950 Mon Sep 17 00:00:00 2001
From: Li Bin <huawei.libin@huawei.com>
Date: Fri, 4 Dec 2015 11:38:39 +0800
Subject: [PATCH 617/797] arm64: ftrace: stop using kstop_machine to
 enable/disable tracing

For ftrace on arm64, kstop_machine which is hugely disruptive
to a running system is not needed to convert nops to ftrace calls
or back, because that to be modified instrucions, that NOP, B or BL,
are all safe instructions which called "concurrent modification
and execution of instructions", that can be executed by one
thread of execution as they are being modified by another thread
of execution without requiring explicit synchronization.

Signed-off-by: Li Bin <huawei.libin@huawei.com>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 81a6a146e88eca5d6726569779778d61489d85aa)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/ftrace.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index c851be795080..9669b331a23b 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -93,6 +93,11 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
 	return ftrace_modify_code(pc, old, new, true);
 }
 
+void arch_ftrace_update_code(int command)
+{
+	ftrace_modify_all_code(command);
+}
+
 int __init ftrace_dyn_arch_init(void)
 {
 	return 0;

From 42c1d121864549d11fb0f002df8dc8ef35219107 Mon Sep 17 00:00:00 2001
From: Li Bin <huawei.libin@huawei.com>
Date: Fri, 4 Dec 2015 11:38:40 +0800
Subject: [PATCH 618/797] arm64: ftrace: fix the comments for
 ftrace_modify_code

There is no need to worry about module and __init text disappearing
case, because that ftrace has a module notifier that is called when
a module is being unloaded and before the text goes away and this
code grabs the ftrace_lock mutex and removes the module functions
from the ftrace list, such that it will no longer do any
modifications to that module's text, the update to make functions
be traced or not is done under the ftrace_lock mutex as well.
And by now, __init section codes should not been modified
by ftrace, because it is black listed in recordmcount.c and
ignored by ftrace.

Suggested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Li Bin <huawei.libin@huawei.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 004ab584e028093996cf5b8e220b8bc50c5111cf)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/ftrace.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 9669b331a23b..8f7005bc35bd 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -29,12 +29,11 @@ static int ftrace_modify_code(unsigned long pc, u32 old, u32 new,
 
 	/*
 	 * Note:
-	 * Due to modules and __init, code can disappear and change,
-	 * we need to protect against faulting as well as code changing.
-	 * We do this by aarch64_insn_*() which use the probe_kernel_*().
-	 *
-	 * No lock is held here because all the modifications are run
-	 * through stop_machine().
+	 * We are paranoid about modifying text, as if a bug were to happen, it
+	 * could cause us to read or write to someplace that could cause harm.
+	 * Carefully read and modify the code with aarch64_insn_*() which uses
+	 * probe_kernel_*(), and make sure what we read is what we expected it
+	 * to be before modifying it.
 	 */
 	if (validate) {
 		if (aarch64_insn_read((void *)pc, &replaced))

From a5b499e62f4070c7bfe2d322516a937e6f48d04f Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 4 Dec 2015 12:42:29 +0000
Subject: [PATCH 619/797] arm64: Add trace_hardirqs_off annotation in
 ret_to_user

When a kernel is built with CONFIG_TRACE_IRQFLAGS the following warning
is produced when entering userspace for the first time:

  WARNING: at /work/Linux/linux-2.6-aarch64/kernel/locking/lockdep.c:3519
  Modules linked in:
  CPU: 1 PID: 1 Comm: systemd Not tainted 4.4.0-rc3+ #639
  Hardware name: Juno (DT)
  task: ffffffc9768a0000 ti: ffffffc9768a8000 task.ti: ffffffc9768a8000
  PC is at check_flags.part.22+0x19c/0x1a8
  LR is at check_flags.part.22+0x19c/0x1a8
  pc : [<ffffffc0000fba6c>] lr : [<ffffffc0000fba6c>] pstate: 600001c5
  sp : ffffffc9768abe10
  x29: ffffffc9768abe10 x28: ffffffc9768a8000
  x27: 0000000000000000 x26: 0000000000000001
  x25: 00000000000000a6 x24: ffffffc00064be6c
  x23: ffffffc0009f249e x22: ffffffc9768a0000
  x21: ffffffc97fea5480 x20: 00000000000001c0
  x19: ffffffc00169a000 x18: 0000005558cc7b58
  x17: 0000007fb78e3180 x16: 0000005558d2e238
  x15: ffffffffffffffff x14: 0ffffffffffffffd
  x13: 0000000000000008 x12: 0101010101010101
  x11: 7f7f7f7f7f7f7f7f x10: fefefefefefeff63
  x9 : 7f7f7f7f7f7f7f7f x8 : 6e655f7371726964
  x7 : 0000000000000001 x6 : ffffffc0001079c4
  x5 : 0000000000000000 x4 : 0000000000000001
  x3 : ffffffc001698438 x2 : 0000000000000000
  x1 : ffffffc9768a0000 x0 : 000000000000002e
  Call trace:
  [<ffffffc0000fba6c>] check_flags.part.22+0x19c/0x1a8
  [<ffffffc0000fc440>] lock_is_held+0x80/0x98
  [<ffffffc00064bafc>] __schedule+0x404/0x730
  [<ffffffc00064be6c>] schedule+0x44/0xb8
  [<ffffffc000085bb0>] ret_to_user+0x0/0x24
  possible reason: unannotated irqs-off.
  irq event stamp: 502169
  hardirqs last  enabled at (502169): [<ffffffc000085a98>] el0_irq_naked+0x1c/0x24
  hardirqs last disabled at (502167): [<ffffffc0000bb3bc>] __do_softirq+0x17c/0x298
  softirqs last  enabled at (502168): [<ffffffc0000bb43c>] __do_softirq+0x1fc/0x298
  softirqs last disabled at (502143): [<ffffffc0000bb830>] irq_exit+0xa0/0xf0

This happens because we disable interrupts in ret_to_user before calling
schedule() in work_resched. This patch adds the necessary
trace_hardirqs_off annotation.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit db3899a6477a4dccd26cbfb7f408b6be2cc068e0)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/entry.S | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 7ed3d75f6304..e5b25389c48f 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -634,6 +634,9 @@ work_pending:
 	bl	do_notify_resume
 	b	ret_to_user
 work_resched:
+#ifdef CONFIG_TRACE_IRQFLAGS
+	bl	trace_hardirqs_off		// the IRQs are off here, inform the tracing code
+#endif
 	bl	schedule
 
 /*

From c0f49bdceea7d2f0cfe1c6f857e4e1cfa919382a Mon Sep 17 00:00:00 2001
From: Jungseok Lee <jungseoklee85@gmail.com>
Date: Fri, 4 Dec 2015 11:02:25 +0000
Subject: [PATCH 620/797] arm64: Store struct thread_info in sp_el0

There is need for figuring out how to manage struct thread_info data when
IRQ stack is introduced. struct thread_info information should be copied
to IRQ stack under the current thread_info calculation logic whenever
context switching is invoked. This is too expensive to keep supporting
the approach.

Instead, this patch pays attention to sp_el0 which is an unused scratch
register in EL1 context. sp_el0 utilization not only simplifies the
management, but also prevents text section size from being increased
largely due to static allocated IRQ stack as removing masking operation
using THREAD_SIZE in many places.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Jungseok Lee <jungseoklee85@gmail.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 6cdf9c7ca687e01840d0215437620a20263012fc)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/thread_info.h | 10 ++++++++--
 arch/arm64/kernel/entry.S            | 15 ++++++++++++---
 arch/arm64/kernel/head.S             |  5 +++++
 arch/arm64/kernel/sleep.S            |  3 +++
 4 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 90c7ff233735..abd64bd1f6d9 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -73,10 +73,16 @@ register unsigned long current_stack_pointer asm ("sp");
  */
 static inline struct thread_info *current_thread_info(void) __attribute_const__;
 
+/*
+ * struct thread_info can be accessed directly via sp_el0.
+ */
 static inline struct thread_info *current_thread_info(void)
 {
-	return (struct thread_info *)
-		(current_stack_pointer & ~(THREAD_SIZE - 1));
+	unsigned long sp_el0;
+
+	asm ("mrs %0, sp_el0" : "=r" (sp_el0));
+
+	return (struct thread_info *)sp_el0;
 }
 
 #define thread_saved_pc(tsk)	\
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index e5b25389c48f..245fa6837880 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -88,7 +88,8 @@
 
 	.if	\el == 0
 	mrs	x21, sp_el0
-	get_thread_info tsk			// Ensure MDSCR_EL1.SS is clear,
+	mov	tsk, sp
+	and	tsk, tsk, #~(THREAD_SIZE - 1)	// Ensure MDSCR_EL1.SS is clear,
 	ldr	x19, [tsk, #TI_FLAGS]		// since we can unmask debug
 	disable_step_tsk x19, x20		// exceptions when scheduling.
 	.else
@@ -107,6 +108,13 @@
 	str	x21, [sp, #S_SYSCALLNO]
 	.endif
 
+	/*
+	 * Set sp_el0 to current thread_info.
+	 */
+	.if	\el == 0
+	msr	sp_el0, tsk
+	.endif
+
 	/*
 	 * Registers that may be useful after this macro is invoked:
 	 *
@@ -164,8 +172,7 @@ alternative_endif
 	.endm
 
 	.macro	get_thread_info, rd
-	mov	\rd, sp
-	and	\rd, \rd, #~(THREAD_SIZE - 1)	// top of stack
+	mrs	\rd, sp_el0
 	.endm
 
 /*
@@ -599,6 +606,8 @@ ENTRY(cpu_switch_to)
 	ldp	x29, x9, [x8], #16
 	ldr	lr, [x8]
 	mov	sp, x9
+	and	x9, x9, #~(THREAD_SIZE - 1)
+	msr	sp_el0, x9
 	ret
 ENDPROC(cpu_switch_to)
 
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index b685257926f0..17ce7285bb12 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -424,6 +424,9 @@ __mmap_switched:
 	b	1b
 2:
 	adr_l	sp, initial_sp, x4
+	mov	x4, sp
+	and	x4, x4, #~(THREAD_SIZE - 1)
+	msr	sp_el0, x4			// Save thread_info
 	str_l	x21, __fdt_pointer, x5		// Save FDT pointer
 	str_l	x24, memstart_addr, x6		// Save PHYS_OFFSET
 	mov	x29, #0
@@ -611,6 +614,8 @@ ENDPROC(secondary_startup)
 ENTRY(__secondary_switched)
 	ldr	x0, [x21]			// get secondary_data.stack
 	mov	sp, x0
+	and	x0, x0, #~(THREAD_SIZE - 1)
+	msr	sp_el0, x0			// save thread_info
 	mov	x29, #0
 	b	secondary_start_kernel
 ENDPROC(__secondary_switched)
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index f586f7c875e2..e33fe33876ab 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -173,6 +173,9 @@ ENTRY(cpu_resume)
 	/* load physical address of identity map page table in x1 */
 	adrp	x1, idmap_pg_dir
 	mov	sp, x2
+	/* save thread_info */
+	and	x2, x2, #~(THREAD_SIZE - 1)
+	msr	sp_el0, x2
 	/*
 	 * cpu_do_resume expects x0 to contain context physical address
 	 * pointer and x1 to contain physical address of 1:1 page tables

From 2f478c1abcc07611671c5fee4bd10974dde476ab Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Fri, 4 Dec 2015 11:02:26 +0000
Subject: [PATCH 621/797] arm64: Modify stack trace and dump for use with
 irq_stack

This patch allows unwind_frame() to traverse from interrupt stack to task
stack correctly. It requires data from a dummy stack frame, created
during irq_stack_entry(), added by a later patch.

A similar approach is taken to modify dump_backtrace(), which expects to
find struct pt_regs underneath any call to functions marked __exception.
When on an irq_stack, the struct pt_regs is stored on the old task stack,
the location of which is stored in the dummy stack frame.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
[james.morse: merged two patches, reworked for per_cpu irq_stacks, and
 no alignment guarantees, added irq_stack definitions]
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>

(cherry picked from commit 132cd887b5c54758d04bf25c52fa48f45e843a30)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/irq.h   | 32 ++++++++++++++++++++++++++++++++
 arch/arm64/kernel/irq.c        |  3 +++
 arch/arm64/kernel/stacktrace.c | 29 +++++++++++++++++++++++++++--
 arch/arm64/kernel/traps.c      | 14 +++++++++++++-
 4 files changed, 75 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index 8e8d30684392..e2f3f135a3bc 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -1,10 +1,32 @@
 #ifndef __ASM_IRQ_H
 #define __ASM_IRQ_H
 
+#define IRQ_STACK_SIZE			THREAD_SIZE
+#define IRQ_STACK_START_SP		THREAD_START_SP
+
+#ifndef __ASSEMBLER__
+
+#include <linux/percpu.h>
+
 #include <asm-generic/irq.h>
+#include <asm/thread_info.h>
 
 struct pt_regs;
 
+DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack);
+
+/*
+ * The highest address on the stack, and the first to be used. Used to
+ * find the dummy-stack frame put down by el?_irq() in entry.S.
+ */
+#define IRQ_STACK_PTR(cpu) ((unsigned long)per_cpu(irq_stack, cpu) + IRQ_STACK_START_SP)
+
+/*
+ * The offset from irq_stack_ptr where entry.S will store the original
+ * stack pointer. Used by unwind_frame() and dump_backtrace().
+ */
+#define IRQ_STACK_TO_TASK_STACK(ptr) *((unsigned long *)(ptr - 0x10));
+
 extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
 
 static inline int nr_legacy_irqs(void)
@@ -12,4 +34,14 @@ static inline int nr_legacy_irqs(void)
 	return 0;
 }
 
+static inline bool on_irq_stack(unsigned long sp, int cpu)
+{
+	/* variable names the same as kernel/stacktrace.c */
+	unsigned long low = (unsigned long)per_cpu(irq_stack, cpu);
+	unsigned long high = low + IRQ_STACK_START_SP;
+
+	return (low <= sp && sp <= high);
+}
+
+#endif /* !__ASSEMBLER__ */
 #endif
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 9f17ec071ee0..1e3cef578e21 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -30,6 +30,9 @@
 
 unsigned long irq_err_count;
 
+/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned */
+DEFINE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack) __aligned(16);
+
 int arch_show_interrupts(struct seq_file *p, int prec)
 {
 	show_ipi_list(p, prec);
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index ccb6078ed9f2..b947eeffa5b2 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -20,6 +20,7 @@
 #include <linux/sched.h>
 #include <linux/stacktrace.h>
 
+#include <asm/irq.h>
 #include <asm/stacktrace.h>
 
 /*
@@ -39,17 +40,41 @@ int notrace unwind_frame(struct stackframe *frame)
 {
 	unsigned long high, low;
 	unsigned long fp = frame->fp;
+	unsigned long irq_stack_ptr;
+
+	/*
+	 * Use raw_smp_processor_id() to avoid false-positives from
+	 * CONFIG_DEBUG_PREEMPT. get_wchan() calls unwind_frame() on sleeping
+	 * task stacks, we can be pre-empted in this case, so
+	 * {raw_,}smp_processor_id() may give us the wrong value. Sleeping
+	 * tasks can't ever be on an interrupt stack, so regardless of cpu,
+	 * the checks will always fail.
+	 */
+	irq_stack_ptr = IRQ_STACK_PTR(raw_smp_processor_id());
 
 	low  = frame->sp;
-	high = ALIGN(low, THREAD_SIZE);
+	/* irq stacks are not THREAD_SIZE aligned */
+	if (on_irq_stack(frame->sp, raw_smp_processor_id()))
+		high = irq_stack_ptr;
+	else
+		high = ALIGN(low, THREAD_SIZE) - 0x20;
 
-	if (fp < low || fp > high - 0x18 || fp & 0xf)
+	if (fp < low || fp > high || fp & 0xf)
 		return -EINVAL;
 
 	frame->sp = fp + 0x10;
 	frame->fp = *(unsigned long *)(fp);
 	frame->pc = *(unsigned long *)(fp + 8);
 
+	/*
+	 * Check whether we are going to walk through from interrupt stack
+	 * to task stack.
+	 * If we reach the end of the stack - and its an interrupt stack,
+	 * read the original task stack pointer from the dummy frame.
+	 */
+	if (frame->sp == irq_stack_ptr)
+		frame->sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr);
+
 	return 0;
 }
 
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index e9b9b5364393..8a0084541f84 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -146,6 +146,7 @@ static void dump_instr(const char *lvl, struct pt_regs *regs)
 static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 {
 	struct stackframe frame;
+	unsigned long irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id());
 
 	pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
 
@@ -180,9 +181,20 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 		if (ret < 0)
 			break;
 		stack = frame.sp;
-		if (in_exception_text(where))
+		if (in_exception_text(where)) {
+			/*
+			 * If we switched to the irq_stack before calling this
+			 * exception handler, then the pt_regs will be on the
+			 * task stack. The easiest way to tell is if the large
+			 * pt_regs would overlap with the end of the irq_stack.
+			 */
+			if (stack < irq_stack_ptr &&
+			    (stack + sizeof(struct pt_regs)) > irq_stack_ptr)
+				stack = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr);
+
 			dump_mem("", "Exception stack", stack,
 				 stack + sizeof(struct pt_regs), false);
+		}
 	}
 }
 

From ea288f7a80b63d6956d23f50dd04fa70f8e7368f Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 4 Dec 2015 11:02:27 +0000
Subject: [PATCH 622/797] arm64: Add do_softirq_own_stack() and enable
 irq_stacks

entry.S is modified to switch to the per_cpu irq_stack during el{0,1}_irq.
irq_count is used to detect recursive interrupts on the irq_stack, it is
updated late by do_softirq_own_stack(), when called on the irq_stack, before
__do_softirq() re-enables interrupts to process softirqs.

do_softirq_own_stack() is added by this patch, but does not yet switch
stack.

This patch adds the dummy stack frame and data needed by the previous
stack tracing patches.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 8e23dacd12a48e58125b84c817da50850b73280a)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/irq.h |  2 ++
 arch/arm64/kernel/entry.S    | 42 ++++++++++++++++++++++++++++++++++--
 arch/arm64/kernel/irq.c      | 38 +++++++++++++++++++++++++++++++-
 3 files changed, 79 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index e2f3f135a3bc..fa2a8d0e4792 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -11,6 +11,8 @@
 #include <asm-generic/irq.h>
 #include <asm/thread_info.h>
 
+#define __ARCH_HAS_DO_SOFTIRQ
+
 struct pt_regs;
 
 DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack);
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 245fa6837880..8f7e737949fe 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -27,6 +27,7 @@
 #include <asm/cpufeature.h>
 #include <asm/errno.h>
 #include <asm/esr.h>
+#include <asm/irq.h>
 #include <asm/thread_info.h>
 #include <asm/unistd.h>
 
@@ -175,6 +176,42 @@ alternative_endif
 	mrs	\rd, sp_el0
 	.endm
 
+	.macro	irq_stack_entry, dummy_lr
+	mov	x19, sp			// preserve the original sp
+
+	adr_l	x25, irq_stack
+	mrs	x26, tpidr_el1
+	add	x25, x25, x26
+
+	/*
+	 * Check the lowest address on irq_stack for the irq_count value,
+	 * incremented by do_softirq_own_stack if we have re-enabled irqs
+	 * while on the irq_stack.
+	 */
+	ldr	x26, [x25]
+	cbnz	x26, 9998f		// recursive use?
+
+	/* switch to the irq stack */
+	mov	x26, #IRQ_STACK_START_SP
+	add	x26, x25, x26
+	mov	sp, x26
+
+	/* Add a dummy stack frame */
+	stp     x29, \dummy_lr, [sp, #-16]!           // dummy stack frame
+	mov	x29, sp
+	stp     xzr, x19, [sp, #-16]!
+
+9998:
+	.endm
+
+	/*
+	 * x19 should be preserved between irq_stack_entry and
+	 * irq_stack_exit.
+	 */
+	.macro	irq_stack_exit
+	mov	sp, x19
+	.endm
+
 /*
  * These are the registers used in the syscall handler, and allow us to
  * have in theory up to 7 arguments to a function - x0 to x6.
@@ -190,10 +227,11 @@ tsk	.req	x28		// current thread_info
  * Interrupt handling.
  */
 	.macro	irq_handler
-	adrp	x1, handle_arch_irq
-	ldr	x1, [x1, #:lo12:handle_arch_irq]
+	ldr_l	x1, handle_arch_irq
 	mov	x0, sp
+	irq_stack_entry x22
 	blr	x1
+	irq_stack_exit
 	.endm
 
 	.text
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 1e3cef578e21..ff7ebb710e51 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -25,14 +25,24 @@
 #include <linux/irq.h>
 #include <linux/smp.h>
 #include <linux/init.h>
+#include <linux/interrupt.h>
 #include <linux/irqchip.h>
 #include <linux/seq_file.h>
 
 unsigned long irq_err_count;
 
-/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned */
+/*
+ * irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned.
+ * irq_stack[0] is used as irq_count, a non-zero value indicates the stack
+ * is in use, and el?_irq() shouldn't switch to it. This is used to detect
+ * recursive use of the irq_stack, it is lazily updated by
+ * do_softirq_own_stack(), which is called on the irq_stack, before
+ * re-enabling interrupts to process softirqs.
+ */
 DEFINE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack) __aligned(16);
 
+#define IRQ_COUNT()	(*per_cpu(irq_stack, smp_processor_id()))
+
 int arch_show_interrupts(struct seq_file *p, int prec)
 {
 	show_ipi_list(p, prec);
@@ -56,3 +66,29 @@ void __init init_IRQ(void)
 	if (!handle_arch_irq)
 		panic("No interrupt controller found.");
 }
+
+/*
+ * do_softirq_own_stack() is called from irq_exit() before __do_softirq()
+ * re-enables interrupts, at which point we may re-enter el?_irq(). We
+ * increase irq_count here so that el1_irq() knows that it is already on the
+ * irq stack.
+ *
+ * Called with interrupts disabled, so we don't worry about moving cpu, or
+ * being interrupted while modifying irq_count.
+ *
+ * This function doesn't actually switch stack.
+ */
+void do_softirq_own_stack(void)
+{
+	int cpu = smp_processor_id();
+
+	WARN_ON_ONCE(!irqs_disabled());
+
+	if (on_irq_stack(current_stack_pointer, cpu)) {
+		IRQ_COUNT()++;
+		__do_softirq();
+		IRQ_COUNT()--;
+	} else {
+		__do_softirq();
+	}
+}

From 306fe6c320ec846544b25130a2fe61e3d394cb6f Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 9 Dec 2015 13:58:42 +0000
Subject: [PATCH 623/797] arm64: irq: fix walking from irq stack to task stack

Running with CONFIG_DEBUG_SPINLOCK=y can trigger a BUG with the new IRQ
stack code:

  BUG: spinlock lockup suspected on CPU#1

This is due to the IRQ_STACK_TO_TASK_STACK macro incorrectly retrieving
the task stack pointer stashed at the top of the IRQ stack.

Sayeth James:

| Yup, this is what is happening. Its an off-by-one due to broken
| thinking about how the stack works. My broken thinking was:
|
| >   top ------------
| >       | dummy_lr | <- irq_stack_ptr
| >       ------------
| >       |   x29    |
| >       ------------
| >       |   x19    | <- irq_stack_ptr - 0x10
| >       ------------
| >       |   xzr    |
| >       ------------
|
| But the stack-pointer is decreased before use. So it actually looks
| like this:
|
| >       ------------
| >       |          |  <- irq_stack_ptr
| >   top ------------
| >       | dummy_lr |
| >       ------------
| >       |   x29    | <- irq_stack_ptr - 0x10
| >       ------------
| >       |   x19    |
| >       ------------
| >       |   xzr    | <- irq_stack_ptr - 0x20
| >       ------------
|
| The value being used as the original stack is x29, which in all the
| tests is sp but without the current frames data, hence there are no
| missing frames in the output.
|
| Jungseok Lee picked it up with a 32bit user space because aarch32
| can't use x29, so it remains 0 forever. The fix he posted is correct.

This patch fixes the macro and adds some of this wisdom to a comment,
so that the layout of the IRQ stack is well understood.

Cc: James Morse <james.morse@arm.com>
Reported-by: Jungseok Lee <jungseoklee85@gmail.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 7596abf2e5661d52c4f414f37addeed54e098880)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/irq.h | 20 ++++++++++++++++++--
 arch/arm64/kernel/entry.S    |  2 +-
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index fa2a8d0e4792..877c7e358384 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -19,7 +19,23 @@ DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack);
 
 /*
  * The highest address on the stack, and the first to be used. Used to
- * find the dummy-stack frame put down by el?_irq() in entry.S.
+ * find the dummy-stack frame put down by el?_irq() in entry.S, which
+ * is structured as follows:
+ *
+ *       ------------
+ *       |          |  <- irq_stack_ptr
+ *   top ------------
+ *       |  elr_el1 |
+ *       ------------
+ *       |   x29    | <- irq_stack_ptr - 0x10
+ *       ------------
+ *       |   xzr    |
+ *       ------------
+ *       |   x19    | <- irq_stack_ptr - 0x20
+ *       ------------
+ *
+ * where x19 holds a copy of the task stack pointer.
+ *
  */
 #define IRQ_STACK_PTR(cpu) ((unsigned long)per_cpu(irq_stack, cpu) + IRQ_STACK_START_SP)
 
@@ -27,7 +43,7 @@ DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack);
  * The offset from irq_stack_ptr where entry.S will store the original
  * stack pointer. Used by unwind_frame() and dump_backtrace().
  */
-#define IRQ_STACK_TO_TASK_STACK(ptr) *((unsigned long *)(ptr - 0x10));
+#define IRQ_STACK_TO_TASK_STACK(ptr) *((unsigned long *)(ptr - 0x20));
 
 extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
 
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 8f7e737949fe..be7ec544b540 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -199,7 +199,7 @@ alternative_endif
 	/* Add a dummy stack frame */
 	stp     x29, \dummy_lr, [sp, #-16]!           // dummy stack frame
 	mov	x29, sp
-	stp     xzr, x19, [sp, #-16]!
+	stp     x19, xzr, [sp, #-16]!
 
 9998:
 	.endm

From 95e1db8bd78d2b3f15f7d4e7896735a041c775f6 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Thu, 10 Dec 2015 10:22:39 +0000
Subject: [PATCH 624/797] arm64: Add this_cpu_ptr() assembler macro for use in
 entry.S

irq_stack is a per_cpu variable, that needs to be access from entry.S.
Use an assembler macro instead of the unreadable details.

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit aa4d5d3cbc258c355151a3903211b27359390ec5)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/assembler.h | 11 +++++++++++
 arch/arm64/kernel/entry.S          |  4 +---
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 12eff928ef8b..bb7b72734c24 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -193,6 +193,17 @@ lr	.req	x30		// link register
 	str	\src, [\tmp, :lo12:\sym]
 	.endm
 
+	/*
+	 * @sym: The name of the per-cpu variable
+	 * @reg: Result of per_cpu(sym, smp_processor_id())
+	 * @tmp: scratch register
+	 */
+	.macro this_cpu_ptr, sym, reg, tmp
+	adr_l	\reg, \sym
+	mrs	\tmp, tpidr_el1
+	add	\reg, \reg, \tmp
+	.endm
+
 /*
  * Annotate a function as position independent, i.e., safe to be called before
  * the kernel virtual mapping is activated.
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index be7ec544b540..e394f8c9595a 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -179,9 +179,7 @@ alternative_endif
 	.macro	irq_stack_entry, dummy_lr
 	mov	x19, sp			// preserve the original sp
 
-	adr_l	x25, irq_stack
-	mrs	x26, tpidr_el1
-	add	x25, x25, x26
+	this_cpu_ptr irq_stack, x25, x26
 
 	/*
 	 * Check the lowest address on irq_stack for the irq_count value,

From e330d15430acce6073bb2c8486fba7555be1e923 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Thu, 10 Dec 2015 10:22:40 +0000
Subject: [PATCH 625/797] arm64: when walking onto the task stack, check sp &
 fp are in current->stack

When unwind_frame() reaches the bottom of the irq_stack, the last fp
points to the original task stack. unwind_frame() uses
IRQ_STACK_TO_TASK_STACK() to find the sp value. If either values is
wrong, we may end up walking a corrupt stack.

Check these values are sane by testing if they are both on the stack
pointed to by current->stack.

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 1ffe199b1c9b72a8e752a9ae2a7af10128ab2ca1)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/stacktrace.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index b947eeffa5b2..d916d5b6aef6 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -71,9 +71,17 @@ int notrace unwind_frame(struct stackframe *frame)
 	 * to task stack.
 	 * If we reach the end of the stack - and its an interrupt stack,
 	 * read the original task stack pointer from the dummy frame.
+	 *
+	 * Check the frame->fp we read from the bottom of the irq_stack,
+	 * and the original task stack pointer are both in current->stack.
 	 */
-	if (frame->sp == irq_stack_ptr)
-		frame->sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr);
+	if (frame->sp == irq_stack_ptr) {
+		unsigned long orig_sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr);
+
+		if(object_is_on_stack((void *)orig_sp) &&
+		   object_is_on_stack((void *)frame->fp))
+			frame->sp = orig_sp;
+	}
 
 	return 0;
 }

From 70dfc6968ad22e057520da92b7b4da86041d3ea7 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Thu, 10 Dec 2015 10:22:41 +0000
Subject: [PATCH 626/797] arm64: don't call C code with el0's fp register

On entry from el0, we save all the registers on the kernel stack, and
restore them before returning. x29 remains unchanged when we call out
to C code, which will store x29 as the frame-pointer on the stack.

Instead, write 0 into x29 after entry from el0, to avoid any risk of
tracing into user space.

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 49003a8d6b35e128ef5e51433e60e783a46fbe5f)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/entry.S | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index e394f8c9595a..2284c296e3f7 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -93,6 +93,8 @@
 	and	tsk, tsk, #~(THREAD_SIZE - 1)	// Ensure MDSCR_EL1.SS is clear,
 	ldr	x19, [tsk, #TI_FLAGS]		// since we can unmask debug
 	disable_step_tsk x19, x20		// exceptions when scheduling.
+
+	mov	x29, xzr			// fp pointed to user-space
 	.else
 	add	x21, sp, #S_FRAME_SIZE
 	.endif

From 2949f7a8a1516b704750cd343aebb36de2428d38 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 9 Dec 2015 12:44:36 +0000
Subject: [PATCH 627/797] arm64: mm: remove pointless PAGE_MASKing

As pgd_offset{,_k} shift the input address by PGDIR_SHIFT, the sub-page
bits will always be shifted out. There is no need to apply PAGE_MASK
before this.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit e2c30ee320eb96304896c7ab84499e5bc5e5fb6e)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/mmu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index d2a6194f4bec..c5bd5bca8e3d 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -288,7 +288,7 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt,
 			&phys, virt);
 		return;
 	}
-	__create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), phys, virt,
+	__create_mapping(&init_mm, pgd_offset_k(virt), phys, virt,
 			 size, prot, early_alloc);
 }
 
@@ -309,7 +309,7 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt,
 		return;
 	}
 
-	return __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK),
+	return __create_mapping(&init_mm, pgd_offset_k(virt),
 				phys, virt, size, prot, late_alloc);
 }
 

From a2151a0e23afcaf1fbb8f2e63bf2335f61e12172 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 9 Dec 2015 12:44:37 +0000
Subject: [PATCH 628/797] arm64: Remove redundant padding from linker script

Currently we place an ALIGN_DEBUG_RO between text and data for the .text
and .init sections, and depending on configuration each of these may
result in up to SECTION_SIZE bytes worth of padding (for
DEBUG_RODATA_ALIGN).

We make no distinction between the text and data in each of these
sections at any point when creating the initial page tables in head.S.
We also make no distinction when modifying the tables; __map_memblock,
fixup_executable, mark_rodata_ro, and fixup_init only work at section
granularity. Thus this padding is unnecessary.

For the spit between init text and data we impose a minimum alignment of
16 bytes, but this is also unnecessary. The init data is output
immediately after the padding before any symbols are defined, so this is
not required to keep a symbol for linker a section array correctly
associated with the data. Any objects within the section will be given
at least their usual alignment regardless.

This patch removes the redundant padding.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 5b28cd9d084eca8ddc46270d2720305bfd40e348)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/vmlinux.lds.S | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 71426a78db12..cc2572db32a6 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -113,7 +113,6 @@ SECTIONS
 		*(.got)			/* Global offset table		*/
 	}
 
-	ALIGN_DEBUG_RO
 	RO_DATA(PAGE_SIZE)
 	EXCEPTION_TABLE(8)
 	NOTES
@@ -128,7 +127,6 @@ SECTIONS
 		ARM_EXIT_KEEP(EXIT_TEXT)
 	}
 
-	ALIGN_DEBUG_RO_MIN(16)
 	.init.data : {
 		INIT_DATA
 		INIT_SETUP(16)

From 9250d09be9e3f467145584fd476a6132e04ddaef Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 9 Dec 2015 12:44:38 +0000
Subject: [PATCH 629/797] arm64: mm: fold alternatives into .init

Currently we treat the alternatives separately from other data that's
only used during initialisation, using separate .altinstructions and
.altinstr_replacement linker sections. These are freed for general
allocation separately from .init*. This is problematic as:

* We do not remove execute permissions, as we do for .init, leaving the
  memory executable.

* We pad between them, making the kernel Image bianry up to PAGE_SIZE
  bytes larger than necessary.

This patch moves the two sections into the contiguous region used for
.init*. This saves some memory, ensures that we remove execute
permissions, and allows us to remove some code made redundant by this
reorganisation.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Andre Przywara <andre.przywara@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 9aa4ec1571da62366cfddc20f3b923609604fe63)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/alternative.h | 1 -
 arch/arm64/kernel/alternative.c      | 6 ------
 arch/arm64/kernel/vmlinux.lds.S      | 5 ++---
 arch/arm64/mm/init.c                 | 1 -
 4 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index d56ec0715157..e4962f04201e 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -19,7 +19,6 @@ struct alt_instr {
 
 void __init apply_alternatives_all(void);
 void apply_alternatives(void *start, size_t length);
-void free_alternatives_memory(void);
 
 #define ALTINSTR_ENTRY(feature)						      \
 	" .word 661b - .\n"				/* label           */ \
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index ab9db0e9818c..d2ee1b21a10d 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -158,9 +158,3 @@ void apply_alternatives(void *start, size_t length)
 
 	__apply_alternatives(&region);
 }
-
-void free_alternatives_memory(void)
-{
-	free_reserved_area(__alt_instructions, __alt_instructions_end,
-			   0, "alternatives");
-}
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index cc2572db32a6..e3928f578891 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -141,9 +141,6 @@ SECTIONS
 
 	PERCPU_SECTION(L1_CACHE_BYTES)
 
-	. = ALIGN(PAGE_SIZE);
-	__init_end = .;
-
 	. = ALIGN(4);
 	.altinstructions : {
 		__alt_instructions = .;
@@ -155,6 +152,8 @@ SECTIONS
 	}
 
 	. = ALIGN(PAGE_SIZE);
+	__init_end = .;
+
 	_data = .;
 	_sdata = .;
 	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 10fab52eed95..dba32ceff17a 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -360,7 +360,6 @@ void free_initmem(void)
 {
 	fixup_init();
 	free_initmem_default(0);
-	free_alternatives_memory();
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD

From c92251d1a82c7c071662ca90300ebf488ab3d6f1 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Thu, 10 Dec 2015 16:54:32 +0000
Subject: [PATCH 630/797] arm64: cmpxchg: Don't incldue linux/mmdebug.h

The arm64 asm/cmpxchg.h includes linux/mmdebug.h but doesn't so far as I
can tell actually use anything from it.  Removing the inclusion reduces
spurious header dependency rebuilds and also avoids issues with
recursive inclusions of headers causing build breaks due to attempts to
use things before they are defined if linux/mmdebug.h starts pulling in
more low level headers.

Such errors have happened in -next recently, for example:

In file included from include/linux/completion.h:11:0,
                 from include/linux/rcupdate.h:43,
                 from include/linux/tracepoint.h:19,
                 from include/linux/mmdebug.h:6,
                 from ./arch/arm64/include/asm/cmpxchg.h:22,
                 from ./arch/arm64/include/asm/atomic.h:41,
                 from include/linux/atomic.h:4,
                 from include/linux/spinlock.h:406,
                 from include/linux/seqlock.h:35,
                 from include/linux/time.h:5,
                 from include/uapi/linux/timex.h:56,
                 from include/linux/timex.h:56,
                 from include/linux/sched.h:19,
                 from arch/arm64/kernel/asm-offsets.c:21:
include/linux/wait.h: In function 'wait_on_atomic_t':
include/linux/wait.h:1218:2: error: implicit declaration of function 'atomic_read' [-Werror=implicit-function-declaration]
 if (atomic_read(val) == 0)

Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 4a6ccf30263f4e265c0f171561bf4c40bed5f273)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/cmpxchg.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 9ea611ea69df..510c7b404454 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -19,7 +19,6 @@
 #define __ASM_CMPXCHG_H
 
 #include <linux/bug.h>
-#include <linux/mmdebug.h>
 
 #include <asm/atomic.h>
 #include <asm/barrier.h>

From 9429ab599551a430b8e6d9d8bfccfc9f31288211 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 11 Dec 2015 11:04:31 +0000
Subject: [PATCH 631/797] arm64: mm: place __cpu_setup in .text

We drop __cpu_setup in .text.init, which ends up being part of .text.
The .text.init section was a legacy section name which has been unused
elsewhere for a long time.

The ".text.init" name is misleading if read as a synonym for
".init.text". Any CPU may execute __cpu_setup before turning the MMU on,
so it should simply live in .text.

Remove the pointless section assignment. This will leave __cpu_setup in
the .text section.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit f00083cae331e5d3eecade6b4fdc35d0825e73ef)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/proc.S | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index b8f04b3f2786..c164d2cb35c0 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -140,8 +140,6 @@ ENTRY(cpu_do_switch_mm)
 	ret
 ENDPROC(cpu_do_switch_mm)
 
-	.section ".text.init", #alloc, #execinstr
-
 /*
  *	__cpu_setup
  *

From 7ae24aa87b5b0c58d378b6fc8fb0d8e72fbfd81d Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 17 Nov 2015 14:45:47 +0000
Subject: [PATCH 632/797] arm64: Documentation: add list of software
 workarounds for errata

It's not immediately obvious which hardware errata are worked around in
the Linux kernel for an arbitrary kernel tree, so add a file to keep
track of what we're working around.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 9cb9c9e5ba8453537e8e645318edf231fe54eaf9)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 Documentation/arm64/silicon-errata.txt | 58 ++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)
 create mode 100644 Documentation/arm64/silicon-errata.txt

diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
new file mode 100644
index 000000000000..58b71ddf9b60
--- /dev/null
+++ b/Documentation/arm64/silicon-errata.txt
@@ -0,0 +1,58 @@
+                Silicon Errata and Software Workarounds
+                =======================================
+
+Author: Will Deacon <will.deacon@arm.com>
+Date  : 27 November 2015
+
+It is an unfortunate fact of life that hardware is often produced with
+so-called "errata", which can cause it to deviate from the architecture
+under specific circumstances.  For hardware produced by ARM, these
+errata are broadly classified into the following categories:
+
+  Category A: A critical error without a viable workaround.
+  Category B: A significant or critical error with an acceptable
+              workaround.
+  Category C: A minor error that is not expected to occur under normal
+              operation.
+
+For more information, consult one of the "Software Developers Errata
+Notice" documents available on infocenter.arm.com (registration
+required).
+
+As far as Linux is concerned, Category B errata may require some special
+treatment in the operating system. For example, avoiding a particular
+sequence of code, or configuring the processor in a particular way. A
+less common situation may require similar actions in order to declassify
+a Category A erratum into a Category C erratum. These are collectively
+known as "software workarounds" and are only required in the minority of
+cases (e.g. those cases that both require a non-secure workaround *and*
+can be triggered by Linux).
+
+For software workarounds that may adversely impact systems unaffected by
+the erratum in question, a Kconfig entry is added under "Kernel
+Features" -> "ARM errata workarounds via the alternatives framework".
+These are enabled by default and patched in at runtime when an affected
+CPU is detected. For less-intrusive workarounds, a Kconfig option is not
+available and the code is structured (preferably with a comment) in such
+a way that the erratum will not be hit.
+
+This approach can make it slightly onerous to determine exactly which
+errata are worked around in an arbitrary kernel source tree, so this
+file acts as a registry of software workarounds in the Linux Kernel and
+will be updated when new workarounds are committed and backported to
+stable kernels.
+
+| Implementor    | Component       | Erratum ID      | Kconfig                 |
++----------------+-----------------+-----------------+-------------------------+
+| ARM            | Cortex-A53      | #826319         | ARM64_ERRATUM_826319    |
+| ARM            | Cortex-A53      | #827319         | ARM64_ERRATUM_827319    |
+| ARM            | Cortex-A53      | #824069         | ARM64_ERRATUM_824069    |
+| ARM            | Cortex-A53      | #819472         | ARM64_ERRATUM_819472    |
+| ARM            | Cortex-A53      | #845719         | ARM64_ERRATUM_845719    |
+| ARM            | Cortex-A53      | #843419         | ARM64_ERRATUM_843419    |
+| ARM            | Cortex-A57      | #832075         | ARM64_ERRATUM_832075    |
+| ARM            | Cortex-A57      | #852523         | N/A                     |
+| ARM            | Cortex-A57      | #834220         | ARM64_ERRATUM_834220    |
+|                |                 |                 |                         |
+| Cavium         | ThunderX ITS    | #22375, #24313  | CAVIUM_ERRATUM_22375    |
+| Cavium         | ThunderX GICv3  | #23154          | CAVIUM_ERRATUM_23154    |

From a79c216b06b18f1545d55e8f238dd9b49896f347 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Tue, 15 Dec 2015 11:21:25 +0000
Subject: [PATCH 633/797] arm64: reduce stack use in irq_handler

The code for switching to irq_stack stores three pieces of information on
the stack, fp+lr, as a fake stack frame (that lets us walk back onto the
interrupted tasks stack frame), and the address of the struct pt_regs that
contains the register values from kernel entry. (which dump_backtrace()
will print in any stack trace).

To reduce this, we store fp, and the pointer to the struct pt_regs.
unwind_frame() can recognise this as the irq_stack dummy frame, (as it only
appears at the top of the irq_stack), and use the struct pt_regs values
to find the missing interrupted link-register.

Suggested-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 971c67ce37cfeeaf560e792a2c3bc21d8b67163a)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/irq.h   | 11 ++++-------
 arch/arm64/kernel/entry.S      | 12 +++++++-----
 arch/arm64/kernel/stacktrace.c | 19 ++++++++++++++++---
 3 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index 877c7e358384..3bece4379bd9 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -25,16 +25,13 @@ DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack);
  *       ------------
  *       |          |  <- irq_stack_ptr
  *   top ------------
- *       |  elr_el1 |
+ *       |   x19    | <- irq_stack_ptr - 0x08
  *       ------------
  *       |   x29    | <- irq_stack_ptr - 0x10
  *       ------------
- *       |   xzr    |
- *       ------------
- *       |   x19    | <- irq_stack_ptr - 0x20
- *       ------------
  *
- * where x19 holds a copy of the task stack pointer.
+ * where x19 holds a copy of the task stack pointer where the struct pt_regs
+ * from kernel_entry can be found.
  *
  */
 #define IRQ_STACK_PTR(cpu) ((unsigned long)per_cpu(irq_stack, cpu) + IRQ_STACK_START_SP)
@@ -43,7 +40,7 @@ DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack);
  * The offset from irq_stack_ptr where entry.S will store the original
  * stack pointer. Used by unwind_frame() and dump_backtrace().
  */
-#define IRQ_STACK_TO_TASK_STACK(ptr) *((unsigned long *)(ptr - 0x20));
+#define IRQ_STACK_TO_TASK_STACK(ptr) (*((unsigned long *)((ptr) - 0x08)))
 
 extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
 
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 2284c296e3f7..0667fb7d8bb1 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -178,7 +178,7 @@ alternative_endif
 	mrs	\rd, sp_el0
 	.endm
 
-	.macro	irq_stack_entry, dummy_lr
+	.macro	irq_stack_entry
 	mov	x19, sp			// preserve the original sp
 
 	this_cpu_ptr irq_stack, x25, x26
@@ -196,10 +196,12 @@ alternative_endif
 	add	x26, x25, x26
 	mov	sp, x26
 
-	/* Add a dummy stack frame */
-	stp     x29, \dummy_lr, [sp, #-16]!           // dummy stack frame
+	/*
+	 * Add a dummy stack frame, this non-standard format is fixed up
+	 * by unwind_frame()
+	 */
+	stp     x29, x19, [sp, #-16]!
 	mov	x29, sp
-	stp     x19, xzr, [sp, #-16]!
 
 9998:
 	.endm
@@ -229,7 +231,7 @@ tsk	.req	x28		// current thread_info
 	.macro	irq_handler
 	ldr_l	x1, handle_arch_irq
 	mov	x0, sp
-	irq_stack_entry x22
+	irq_stack_entry
 	blr	x1
 	irq_stack_exit
 	.endm
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index d916d5b6aef6..b9fd3a8abfc1 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -70,17 +70,30 @@ int notrace unwind_frame(struct stackframe *frame)
 	 * Check whether we are going to walk through from interrupt stack
 	 * to task stack.
 	 * If we reach the end of the stack - and its an interrupt stack,
-	 * read the original task stack pointer from the dummy frame.
+	 * unpack the dummy frame to find the original elr.
 	 *
 	 * Check the frame->fp we read from the bottom of the irq_stack,
 	 * and the original task stack pointer are both in current->stack.
 	 */
 	if (frame->sp == irq_stack_ptr) {
+		struct pt_regs *irq_args;
 		unsigned long orig_sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr);
 
-		if(object_is_on_stack((void *)orig_sp) &&
-		   object_is_on_stack((void *)frame->fp))
+		if (object_is_on_stack((void *)orig_sp) &&
+		   object_is_on_stack((void *)frame->fp)) {
 			frame->sp = orig_sp;
+
+			/* orig_sp is the saved pt_regs, find the elr */
+			irq_args = (struct pt_regs *)orig_sp;
+			frame->pc = irq_args->pc;
+		} else {
+			/*
+			 * This frame has a non-standard format, and we
+			 * didn't fix it, because the data looked wrong.
+			 * Refuse to output this frame.
+			 */
+			return -EINVAL;
+		}
 	}
 
 	return 0;

From ac7406c28c8bada863d36c46ca246bb7b76f3e9f Mon Sep 17 00:00:00 2001
From: Ashok Kumar <ashoks@broadcom.com>
Date: Thu, 17 Dec 2015 01:38:31 -0800
Subject: [PATCH 634/797] arm64: Defer dcache flush in __cpu_copy_user_page

Defer dcache flushing to __sync_icache_dcache by calling
flush_dcache_page which clears PG_dcache_clean flag.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Ashok Kumar <ashoks@broadcom.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit e6b1185f77351aa154e63bd54b05d07ff99d4ffa)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/copypage.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c
index 13bbc3be6f5a..22e4cb4d6f53 100644
--- a/arch/arm64/mm/copypage.c
+++ b/arch/arm64/mm/copypage.c
@@ -24,8 +24,9 @@
 
 void __cpu_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr)
 {
+	struct page *page = virt_to_page(kto);
 	copy_page(kto, kfrom);
-	__flush_dcache_area(kto, PAGE_SIZE);
+	flush_dcache_page(page);
 }
 EXPORT_SYMBOL_GPL(__cpu_copy_user_page);
 

From 358e3c80a223c4d79a786be2e71e51cab91c2e7e Mon Sep 17 00:00:00 2001
From: Ashok Kumar <ashoks@broadcom.com>
Date: Thu, 17 Dec 2015 01:38:32 -0800
Subject: [PATCH 635/797] arm64: Use PoU cache instr for I/D coherency

In systems with three levels of cache(PoU at L1 and PoC at L3),
PoC cache flush instructions flushes L2 and L3 caches which could affect
performance.
For cache flushes for I and D coherency, PoU should suffice.
So changing all I and D coherency related cache flushes to PoU.

Introduced a new __clean_dcache_area_pou API for dcache flush till PoU
and provided a common macro for __flush_dcache_area and
__clean_dcache_area_pou.

Also, now in __sync_icache_dcache, icache invalidation for non-aliasing
VIPT icache is done only for that particular page instead of the earlier
__flush_icache_all.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Ashok Kumar <ashoks@broadcom.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 0a28714c53fd4f7aea709be7577dfbe0095c8c3e)
Signed-off-by: Alex Shi <alex.shi@linaro.org>

Conflicts:
	included reset_pmuserenr_el0 in arch/arm64/mm/proc-macros.S
---
 arch/arm64/include/asm/cacheflush.h |  1 +
 arch/arm64/mm/cache.S               | 28 ++++++++++++++----------
 arch/arm64/mm/flush.c               | 33 ++++++++++++++++-------------
 arch/arm64/mm/proc-macros.S         | 22 +++++++++++++++++++
 4 files changed, 58 insertions(+), 26 deletions(-)

diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 54efedaf331f..7fc294c3bc5b 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -68,6 +68,7 @@
 extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
 extern void flush_icache_range(unsigned long start, unsigned long end);
 extern void __flush_dcache_area(void *addr, size_t len);
+extern void __clean_dcache_area_pou(void *addr, size_t len);
 extern long __flush_cache_user_range(unsigned long start, unsigned long end);
 
 static inline void flush_cache_mm(struct mm_struct *mm)
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index cfa44a6adc0a..6df07069a025 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -81,25 +81,31 @@ ENDPROC(__flush_cache_user_range)
 /*
  *	__flush_dcache_area(kaddr, size)
  *
- *	Ensure that the data held in the page kaddr is written back to the
- *	page in question.
+ *	Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ *	are cleaned and invalidated to the PoC.
  *
  *	- kaddr   - kernel address
  *	- size    - size in question
  */
 ENTRY(__flush_dcache_area)
-	dcache_line_size x2, x3
-	add	x1, x0, x1
-	sub	x3, x2, #1
-	bic	x0, x0, x3
-1:	dc	civac, x0			// clean & invalidate D line / unified line
-	add	x0, x0, x2
-	cmp	x0, x1
-	b.lo	1b
-	dsb	sy
+	dcache_by_line_op civac, sy, x0, x1, x2, x3
 	ret
 ENDPIPROC(__flush_dcache_area)
 
+/*
+ *	__clean_dcache_area_pou(kaddr, size)
+ *
+ * 	Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ * 	are cleaned to the PoU.
+ *
+ *	- kaddr   - kernel address
+ *	- size    - size in question
+ */
+ENTRY(__clean_dcache_area_pou)
+	dcache_by_line_op cvau, ish, x0, x1, x2, x3
+	ret
+ENDPROC(__clean_dcache_area_pou)
+
 /*
  *	__inval_cache_range(start, end)
  *	- start   - start address of region
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index c26b804015e8..46649d6e6c5a 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -34,19 +34,24 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
 		__flush_icache_all();
 }
 
+static void sync_icache_aliases(void *kaddr, unsigned long len)
+{
+	unsigned long addr = (unsigned long)kaddr;
+
+	if (icache_is_aliasing()) {
+		__clean_dcache_area_pou(kaddr, len);
+		__flush_icache_all();
+	} else {
+		flush_icache_range(addr, addr + len);
+	}
+}
+
 static void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
 				unsigned long uaddr, void *kaddr,
 				unsigned long len)
 {
-	if (vma->vm_flags & VM_EXEC) {
-		unsigned long addr = (unsigned long)kaddr;
-		if (icache_is_aliasing()) {
-			__flush_dcache_area(kaddr, len);
-			__flush_icache_all();
-		} else {
-			flush_icache_range(addr, addr + len);
-		}
-	}
+	if (vma->vm_flags & VM_EXEC)
+		sync_icache_aliases(kaddr, len);
 }
 
 /*
@@ -74,13 +79,11 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr)
 	if (!page_mapping(page))
 		return;
 
-	if (!test_and_set_bit(PG_dcache_clean, &page->flags)) {
-		__flush_dcache_area(page_address(page),
-				PAGE_SIZE << compound_order(page));
+	if (!test_and_set_bit(PG_dcache_clean, &page->flags))
+		sync_icache_aliases(page_address(page),
+				    PAGE_SIZE << compound_order(page));
+	else if (icache_is_aivivt())
 		__flush_icache_all();
-	} else if (icache_is_aivivt()) {
-		__flush_icache_all();
-	}
 }
 
 /*
diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S
index d69dffffaa89..984edcda1850 100644
--- a/arch/arm64/mm/proc-macros.S
+++ b/arch/arm64/mm/proc-macros.S
@@ -74,3 +74,25 @@
 	msr	pmuserenr_el0, xzr		// Disable PMU access from EL0
 9000:
 	.endm
+
+/*
+ * Macro to perform a data cache maintenance for the interval
+ * [kaddr, kaddr + size)
+ *
+ * 	op:		operation passed to dc instruction
+ * 	domain:		domain used in dsb instruciton
+ * 	kaddr:		starting virtual address of the region
+ * 	size:		size of the region
+ * 	Corrupts: 	kaddr, size, tmp1, tmp2
+ */
+	.macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2
+	dcache_line_size \tmp1, \tmp2
+	add	\size, \kaddr, \size
+	sub	\tmp2, \tmp1, #1
+	bic	\kaddr, \kaddr, \tmp2
+9998:	dc	\op, \kaddr
+	add	\kaddr, \kaddr, \tmp1
+	cmp	\kaddr, \size
+	b.lo	9998b
+	dsb	\domain
+	.endm

From 720089ef0ba8007c34dfa7d80d96e27d6d23088f Mon Sep 17 00:00:00 2001
From: David Woods <dwoods@ezchip.com>
Date: Thu, 17 Dec 2015 14:31:26 -0500
Subject: [PATCH 636/797] arm64: hugetlb: add support for PTE contiguous bit

The arm64 MMU supports a Contiguous bit which is a hint that the TTE
is one of a set of contiguous entries which can be cached in a single
TLB entry.  Supporting this bit adds new intermediate huge page sizes.

The set of huge page sizes available depends on the base page size.
Without using contiguous pages the huge page sizes are as follows.

 4KB:   2MB  1GB
64KB: 512MB

With a 4KB granule, the contiguous bit groups together sets of 16 pages
and with a 64KB granule it groups sets of 32 pages.  This enables two new
huge page sizes in each case, so that the full set of available sizes
is as follows.

 4KB:  64KB   2MB  32MB  1GB
64KB:   2MB 512MB  16GB

If a 16KB granule is used then the contiguous bit groups 128 pages
at the PTE level and 32 pages at the PMD level.

If the base page size is set to 64KB then 2MB pages are enabled by
default.  It is possible in the future to make 2MB the default huge
page size for both 4KB and 64KB granules.

Reviewed-by: Chris Metcalf <cmetcalf@ezchip.com>
Reviewed-by: Steve Capper <steve.capper@linaro.org>
Signed-off-by: David Woods <dwoods@ezchip.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 66b3923a1a0f77a563b43f43f6ad091354abbfe9)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/Kconfig                     |   3 -
 arch/arm64/include/asm/hugetlb.h       |  44 ++--
 arch/arm64/include/asm/pgtable-hwdef.h |  18 +-
 arch/arm64/include/asm/pgtable.h       |  10 +-
 arch/arm64/mm/hugetlbpage.c            | 274 ++++++++++++++++++++++++-
 include/linux/hugetlb.h                |   2 -
 6 files changed, 313 insertions(+), 38 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 4876459c0838..ffa3c549a4ba 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -530,9 +530,6 @@ config HW_PERF_EVENTS
 config SYS_SUPPORTS_HUGETLBFS
 	def_bool y
 
-config ARCH_WANT_GENERAL_HUGETLB
-	def_bool y
-
 config ARCH_WANT_HUGE_PMD_SHARE
 	def_bool y if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
 
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index bb4052e85dba..bbc1e35aa601 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -26,36 +26,7 @@ static inline pte_t huge_ptep_get(pte_t *ptep)
 	return *ptep;
 }
 
-static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
-				   pte_t *ptep, pte_t pte)
-{
-	set_pte_at(mm, addr, ptep, pte);
-}
 
-static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
-					 unsigned long addr, pte_t *ptep)
-{
-	ptep_clear_flush(vma, addr, ptep);
-}
-
-static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
-					   unsigned long addr, pte_t *ptep)
-{
-	ptep_set_wrprotect(mm, addr, ptep);
-}
-
-static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
-					    unsigned long addr, pte_t *ptep)
-{
-	return ptep_get_and_clear(mm, addr, ptep);
-}
-
-static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
-					     unsigned long addr, pte_t *ptep,
-					     pte_t pte, int dirty)
-{
-	return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
-}
 
 static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 					  unsigned long addr, unsigned long end,
@@ -97,4 +68,19 @@ static inline void arch_clear_hugepage_flags(struct page *page)
 	clear_bit(PG_dcache_clean, &page->flags);
 }
 
+extern pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
+				struct page *page, int writable);
+#define arch_make_huge_pte arch_make_huge_pte
+extern void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+			    pte_t *ptep, pte_t pte);
+extern int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+				      unsigned long addr, pte_t *ptep,
+				      pte_t pte, int dirty);
+extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+				     unsigned long addr, pte_t *ptep);
+extern void huge_ptep_set_wrprotect(struct mm_struct *mm,
+				    unsigned long addr, pte_t *ptep);
+extern void huge_ptep_clear_flush(struct vm_area_struct *vma,
+				  unsigned long addr, pte_t *ptep);
+
 #endif /* __ASM_HUGETLB_H */
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index d6739e836f7b..5c25b831273d 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -90,7 +90,23 @@
 /*
  * Contiguous page definitions.
  */
-#define CONT_PTES		(_AC(1, UL) << CONT_SHIFT)
+#ifdef CONFIG_ARM64_64K_PAGES
+#define CONT_PTE_SHIFT		5
+#define CONT_PMD_SHIFT		5
+#elif defined(CONFIG_ARM64_16K_PAGES)
+#define CONT_PTE_SHIFT		7
+#define CONT_PMD_SHIFT		5
+#else
+#define CONT_PTE_SHIFT		4
+#define CONT_PMD_SHIFT		4
+#endif
+
+#define CONT_PTES		(1 << CONT_PTE_SHIFT)
+#define CONT_PTE_SIZE		(CONT_PTES * PAGE_SIZE)
+#define CONT_PTE_MASK		(~(CONT_PTE_SIZE - 1))
+#define CONT_PMDS		(1 << CONT_PMD_SHIFT)
+#define CONT_PMD_SIZE		(CONT_PMDS * PMD_SIZE)
+#define CONT_PMD_MASK		(~(CONT_PMD_SIZE - 1))
 /* the the numerical offset of the PTE within a range of CONT_PTES */
 #define CONT_RANGE_OFFSET(addr) (((addr)>>PAGE_SHIFT)&(CONT_PTES-1))
 
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index cd5dfc97268e..fd3d7c177c5f 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -228,7 +228,8 @@ static inline pte_t pte_mkspecial(pte_t pte)
 
 static inline pte_t pte_mkcont(pte_t pte)
 {
-	return set_pte_bit(pte, __pgprot(PTE_CONT));
+	pte = set_pte_bit(pte, __pgprot(PTE_CONT));
+	return set_pte_bit(pte, __pgprot(PTE_TYPE_PAGE));
 }
 
 static inline pte_t pte_mknoncont(pte_t pte)
@@ -236,6 +237,11 @@ static inline pte_t pte_mknoncont(pte_t pte)
 	return clear_pte_bit(pte, __pgprot(PTE_CONT));
 }
 
+static inline pmd_t pmd_mkcont(pmd_t pmd)
+{
+	return __pmd(pmd_val(pmd) | PMD_SECT_CONT);
+}
+
 static inline void set_pte(pte_t *ptep, pte_t pte)
 {
 	*ptep = pte;
@@ -309,7 +315,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 /*
  * Hugetlb definitions.
  */
-#define HUGE_MAX_HSTATE		2
+#define HUGE_MAX_HSTATE		4
 #define HPAGE_SHIFT		PMD_SHIFT
 #define HPAGE_SIZE		(_AC(1, UL) << HPAGE_SHIFT)
 #define HPAGE_MASK		(~(HPAGE_SIZE - 1))
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 383b03ff38f8..82d607c3614e 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -41,17 +41,289 @@ int pud_huge(pud_t pud)
 #endif
 }
 
+static int find_num_contig(struct mm_struct *mm, unsigned long addr,
+			   pte_t *ptep, pte_t pte, size_t *pgsize)
+{
+	pgd_t *pgd = pgd_offset(mm, addr);
+	pud_t *pud;
+	pmd_t *pmd;
+
+	*pgsize = PAGE_SIZE;
+	if (!pte_cont(pte))
+		return 1;
+	if (!pgd_present(*pgd)) {
+		VM_BUG_ON(!pgd_present(*pgd));
+		return 1;
+	}
+	pud = pud_offset(pgd, addr);
+	if (!pud_present(*pud)) {
+		VM_BUG_ON(!pud_present(*pud));
+		return 1;
+	}
+	pmd = pmd_offset(pud, addr);
+	if (!pmd_present(*pmd)) {
+		VM_BUG_ON(!pmd_present(*pmd));
+		return 1;
+	}
+	if ((pte_t *)pmd == ptep) {
+		*pgsize = PMD_SIZE;
+		return CONT_PMDS;
+	}
+	return CONT_PTES;
+}
+
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+			    pte_t *ptep, pte_t pte)
+{
+	size_t pgsize;
+	int i;
+	int ncontig = find_num_contig(mm, addr, ptep, pte, &pgsize);
+	unsigned long pfn;
+	pgprot_t hugeprot;
+
+	if (ncontig == 1) {
+		set_pte_at(mm, addr, ptep, pte);
+		return;
+	}
+
+	pfn = pte_pfn(pte);
+	hugeprot = __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
+	for (i = 0; i < ncontig; i++) {
+		pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep,
+			 pte_val(pfn_pte(pfn, hugeprot)));
+		set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
+		ptep++;
+		pfn += pgsize >> PAGE_SHIFT;
+		addr += pgsize;
+	}
+}
+
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+		      unsigned long addr, unsigned long sz)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pte_t *pte = NULL;
+
+	pr_debug("%s: addr:0x%lx sz:0x%lx\n", __func__, addr, sz);
+	pgd = pgd_offset(mm, addr);
+	pud = pud_alloc(mm, pgd, addr);
+	if (!pud)
+		return NULL;
+
+	if (sz == PUD_SIZE) {
+		pte = (pte_t *)pud;
+	} else if (sz == (PAGE_SIZE * CONT_PTES)) {
+		pmd_t *pmd = pmd_alloc(mm, pud, addr);
+
+		WARN_ON(addr & (sz - 1));
+		/*
+		 * Note that if this code were ever ported to the
+		 * 32-bit arm platform then it will cause trouble in
+		 * the case where CONFIG_HIGHPTE is set, since there
+		 * will be no pte_unmap() to correspond with this
+		 * pte_alloc_map().
+		 */
+		pte = pte_alloc_map(mm, NULL, pmd, addr);
+	} else if (sz == PMD_SIZE) {
+		if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) &&
+		    pud_none(*pud))
+			pte = huge_pmd_share(mm, addr, pud);
+		else
+			pte = (pte_t *)pmd_alloc(mm, pud, addr);
+	} else if (sz == (PMD_SIZE * CONT_PMDS)) {
+		pmd_t *pmd;
+
+		pmd = pmd_alloc(mm, pud, addr);
+		WARN_ON(addr & (sz - 1));
+		return (pte_t *)pmd;
+	}
+
+	pr_debug("%s: addr:0x%lx sz:0x%lx ret pte=%p/0x%llx\n", __func__, addr,
+	       sz, pte, pte_val(*pte));
+	return pte;
+}
+
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd = NULL;
+	pte_t *pte = NULL;
+
+	pgd = pgd_offset(mm, addr);
+	pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd);
+	if (!pgd_present(*pgd))
+		return NULL;
+	pud = pud_offset(pgd, addr);
+	if (!pud_present(*pud))
+		return NULL;
+
+	if (pud_huge(*pud))
+		return (pte_t *)pud;
+	pmd = pmd_offset(pud, addr);
+	if (!pmd_present(*pmd))
+		return NULL;
+
+	if (pte_cont(pmd_pte(*pmd))) {
+		pmd = pmd_offset(
+			pud, (addr & CONT_PMD_MASK));
+		return (pte_t *)pmd;
+	}
+	if (pmd_huge(*pmd))
+		return (pte_t *)pmd;
+	pte = pte_offset_kernel(pmd, addr);
+	if (pte_present(*pte) && pte_cont(*pte)) {
+		pte = pte_offset_kernel(
+			pmd, (addr & CONT_PTE_MASK));
+		return pte;
+	}
+	return NULL;
+}
+
+pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
+			 struct page *page, int writable)
+{
+	size_t pagesize = huge_page_size(hstate_vma(vma));
+
+	if (pagesize == CONT_PTE_SIZE) {
+		entry = pte_mkcont(entry);
+	} else if (pagesize == CONT_PMD_SIZE) {
+		entry = pmd_pte(pmd_mkcont(pte_pmd(entry)));
+	} else if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) {
+		pr_warn("%s: unrecognized huge page size 0x%lx\n",
+			__func__, pagesize);
+	}
+	return entry;
+}
+
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+			      unsigned long addr, pte_t *ptep)
+{
+	pte_t pte;
+
+	if (pte_cont(*ptep)) {
+		int ncontig, i;
+		size_t pgsize;
+		pte_t *cpte;
+		bool is_dirty = false;
+
+		cpte = huge_pte_offset(mm, addr);
+		ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize);
+		/* save the 1st pte to return */
+		pte = ptep_get_and_clear(mm, addr, cpte);
+		for (i = 1; i < ncontig; ++i) {
+			/*
+			 * If HW_AFDBM is enabled, then the HW could
+			 * turn on the dirty bit for any of the page
+			 * in the set, so check them all.
+			 */
+			++cpte;
+			if (pte_dirty(ptep_get_and_clear(mm, addr, cpte)))
+				is_dirty = true;
+		}
+		if (is_dirty)
+			return pte_mkdirty(pte);
+		else
+			return pte;
+	} else {
+		return ptep_get_and_clear(mm, addr, ptep);
+	}
+}
+
+int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+			       unsigned long addr, pte_t *ptep,
+			       pte_t pte, int dirty)
+{
+	pte_t *cpte;
+
+	if (pte_cont(pte)) {
+		int ncontig, i, changed = 0;
+		size_t pgsize = 0;
+		unsigned long pfn = pte_pfn(pte);
+		/* Select all bits except the pfn */
+		pgprot_t hugeprot =
+			__pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^
+				 pte_val(pte));
+
+		cpte = huge_pte_offset(vma->vm_mm, addr);
+		pfn = pte_pfn(*cpte);
+		ncontig = find_num_contig(vma->vm_mm, addr, cpte,
+					  *cpte, &pgsize);
+		for (i = 0; i < ncontig; ++i, ++cpte) {
+			changed = ptep_set_access_flags(vma, addr, cpte,
+							pfn_pte(pfn,
+								hugeprot),
+							dirty);
+			pfn += pgsize >> PAGE_SHIFT;
+		}
+		return changed;
+	} else {
+		return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+	}
+}
+
+void huge_ptep_set_wrprotect(struct mm_struct *mm,
+			     unsigned long addr, pte_t *ptep)
+{
+	if (pte_cont(*ptep)) {
+		int ncontig, i;
+		pte_t *cpte;
+		size_t pgsize = 0;
+
+		cpte = huge_pte_offset(mm, addr);
+		ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize);
+		for (i = 0; i < ncontig; ++i, ++cpte)
+			ptep_set_wrprotect(mm, addr, cpte);
+	} else {
+		ptep_set_wrprotect(mm, addr, ptep);
+	}
+}
+
+void huge_ptep_clear_flush(struct vm_area_struct *vma,
+			   unsigned long addr, pte_t *ptep)
+{
+	if (pte_cont(*ptep)) {
+		int ncontig, i;
+		pte_t *cpte;
+		size_t pgsize = 0;
+
+		cpte = huge_pte_offset(vma->vm_mm, addr);
+		ncontig = find_num_contig(vma->vm_mm, addr, cpte,
+					  *cpte, &pgsize);
+		for (i = 0; i < ncontig; ++i, ++cpte)
+			ptep_clear_flush(vma, addr, cpte);
+	} else {
+		ptep_clear_flush(vma, addr, ptep);
+	}
+}
+
 static __init int setup_hugepagesz(char *opt)
 {
 	unsigned long ps = memparse(opt, &opt);
+
 	if (ps == PMD_SIZE) {
 		hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
 	} else if (ps == PUD_SIZE) {
 		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
+	} else if (ps == (PAGE_SIZE * CONT_PTES)) {
+		hugetlb_add_hstate(CONT_PTE_SHIFT);
+	} else if (ps == (PMD_SIZE * CONT_PMDS)) {
+		hugetlb_add_hstate((PMD_SHIFT + CONT_PMD_SHIFT) - PAGE_SHIFT);
 	} else {
-		pr_err("hugepagesz: Unsupported page size %lu M\n", ps >> 20);
+		pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10);
 		return 0;
 	}
 	return 1;
 }
 __setup("hugepagesz=", setup_hugepagesz);
+
+#ifdef CONFIG_ARM64_64K_PAGES
+static __init int add_default_hugepagesz(void)
+{
+	if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL)
+		hugetlb_add_hstate(CONT_PMD_SHIFT);
+	return 0;
+}
+arch_initcall(add_default_hugepagesz);
+#endif
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 685c262e0be8..b0eb06423d5e 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -96,9 +96,7 @@ u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
 				struct address_space *mapping,
 				pgoff_t idx, unsigned long address);
 
-#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
 pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud);
-#endif
 
 extern int hugepages_treat_as_movable;
 extern int sysctl_hugetlb_shm_group;

From da604e8646f553a4a2c7613491112b0c9b3ec400 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 18 Dec 2015 16:01:47 +0000
Subject: [PATCH 637/797] arm64: remove irq_count and do_softirq_own_stack()

sysrq_handle_reboot() re-enables interrupts while on the irq stack. The
irq_stack implementation wrongly assumed this would only ever happen
via the softirq path, allowing it to update irq_count late, in
do_softirq_own_stack().

This means if an irq occurs in sysrq_handle_reboot(), during
emergency_restart() the stack will be corrupted, as irq_count wasn't
updated.

Lose the optimisation, and instead of moving the adding/subtracting of
irq_count into irq_stack_entry/irq_stack_exit, remove it, and compare
sp_el0 (struct thread_info) with sp & ~(THREAD_SIZE - 1). This tells us
if we are on a task stack, if so, we can safely switch to the irq stack.
Finally, remove do_softirq_own_stack(), we don't need it anymore.

Reported-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
[will: use get_thread_info macro]
Signed-off-by: Will Deacon <will.deacon@arm.com>

(cherry picked from commit d224a69e3d80fe08f285d1f41d21b590bae4fa9f)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/irq.h |  2 --
 arch/arm64/kernel/entry.S    | 19 +++++++++---------
 arch/arm64/kernel/irq.c      | 38 +-----------------------------------
 3 files changed, 11 insertions(+), 48 deletions(-)

diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index 3bece4379bd9..b77197d941fc 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -11,8 +11,6 @@
 #include <asm-generic/irq.h>
 #include <asm/thread_info.h>
 
-#define __ARCH_HAS_DO_SOFTIRQ
-
 struct pt_regs;
 
 DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack);
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 0667fb7d8bb1..c0db321db7e1 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -181,19 +181,20 @@ alternative_endif
 	.macro	irq_stack_entry
 	mov	x19, sp			// preserve the original sp
 
-	this_cpu_ptr irq_stack, x25, x26
-
 	/*
-	 * Check the lowest address on irq_stack for the irq_count value,
-	 * incremented by do_softirq_own_stack if we have re-enabled irqs
-	 * while on the irq_stack.
+	 * Compare sp with the current thread_info, if the top
+	 * ~(THREAD_SIZE - 1) bits match, we are on a task stack, and
+	 * should switch to the irq stack.
 	 */
-	ldr	x26, [x25]
-	cbnz	x26, 9998f		// recursive use?
+	and	x25, x19, #~(THREAD_SIZE - 1)
+	cmp	x25, tsk
+	b.ne	9998f
 
-	/* switch to the irq stack */
+	this_cpu_ptr irq_stack, x25, x26
 	mov	x26, #IRQ_STACK_START_SP
 	add	x26, x25, x26
+
+	/* switch to the irq stack */
 	mov	sp, x26
 
 	/*
@@ -405,10 +406,10 @@ el1_irq:
 	bl	trace_hardirqs_off
 #endif
 
+	get_thread_info tsk
 	irq_handler
 
 #ifdef CONFIG_PREEMPT
-	get_thread_info tsk
 	ldr	w24, [tsk, #TI_PREEMPT]		// get preempt count
 	cbnz	w24, 1f				// preempt count != 0
 	ldr	x0, [tsk, #TI_FLAGS]		// get flags
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index ff7ebb710e51..2386b26c0712 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -25,24 +25,14 @@
 #include <linux/irq.h>
 #include <linux/smp.h>
 #include <linux/init.h>
-#include <linux/interrupt.h>
 #include <linux/irqchip.h>
 #include <linux/seq_file.h>
 
 unsigned long irq_err_count;
 
-/*
- * irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned.
- * irq_stack[0] is used as irq_count, a non-zero value indicates the stack
- * is in use, and el?_irq() shouldn't switch to it. This is used to detect
- * recursive use of the irq_stack, it is lazily updated by
- * do_softirq_own_stack(), which is called on the irq_stack, before
- * re-enabling interrupts to process softirqs.
- */
+/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned. */
 DEFINE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack) __aligned(16);
 
-#define IRQ_COUNT()	(*per_cpu(irq_stack, smp_processor_id()))
-
 int arch_show_interrupts(struct seq_file *p, int prec)
 {
 	show_ipi_list(p, prec);
@@ -66,29 +56,3 @@ void __init init_IRQ(void)
 	if (!handle_arch_irq)
 		panic("No interrupt controller found.");
 }
-
-/*
- * do_softirq_own_stack() is called from irq_exit() before __do_softirq()
- * re-enables interrupts, at which point we may re-enter el?_irq(). We
- * increase irq_count here so that el1_irq() knows that it is already on the
- * irq stack.
- *
- * Called with interrupts disabled, so we don't worry about moving cpu, or
- * being interrupted while modifying irq_count.
- *
- * This function doesn't actually switch stack.
- */
-void do_softirq_own_stack(void)
-{
-	int cpu = smp_processor_id();
-
-	WARN_ON_ONCE(!irqs_disabled());
-
-	if (on_irq_stack(current_stack_pointer, cpu)) {
-		IRQ_COUNT()++;
-		__do_softirq();
-		IRQ_COUNT()--;
-	} else {
-		__do_softirq();
-	}
-}

From 76f2d0af233200abc487122d0002f3c14d796676 Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Tue, 15 Dec 2015 17:33:39 +0900
Subject: [PATCH 638/797] arm64: ftrace: modify a stack frame in a safe way

Function graph tracer modifies a return address (LR) in a stack frame by
calling ftrace_prepare_return() in a traced function's function prologue.
The current code does this modification before preserving an original
address at ftrace_push_return_trace() and there is always a small window
of inconsistency when an interrupt occurs.

This doesn't matter, as far as an interrupt stack is introduced, because
stack tracer won't be invoked in an interrupt context. But it would be
better to proactively minimize such a window by moving the LR modification
after ftrace_push_return_trace().

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 79fdee9b6355c9720f14717e1ad66af51bb331b5)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/ftrace.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 8f7005bc35bd..ebecf9aa33d1 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -129,23 +129,20 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 	 * on other archs. It's unlikely on AArch64.
 	 */
 	old = *parent;
-	*parent = return_hooker;
 
 	trace.func = self_addr;
 	trace.depth = current->curr_ret_stack + 1;
 
 	/* Only trace if the calling function expects to */
-	if (!ftrace_graph_entry(&trace)) {
-		*parent = old;
+	if (!ftrace_graph_entry(&trace))
 		return;
-	}
 
 	err = ftrace_push_return_trace(old, self_addr, &trace.depth,
 				       frame_pointer);
-	if (err == -EBUSY) {
-		*parent = old;
+	if (err == -EBUSY)
 		return;
-	}
+	else
+		*parent = return_hooker;
 }
 
 #ifdef CONFIG_DYNAMIC_FTRACE

From 30e9fa2678d1ab96894aa1ee670c5a804fe5a29b Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Tue, 15 Dec 2015 17:33:40 +0900
Subject: [PATCH 639/797] arm64: pass a task parameter to unwind_frame()

Function graph tracer modifies a return address (LR) in a stack frame
to hook a function's return. This will result in many useless entries
(return_to_handler) showing up in a call stack list.
We will fix this problem in a later patch ("arm64: ftrace: fix a stack
tracer's output under function graph tracer"). But since real return
addresses are saved in ret_stack[] array in struct task_struct,
unwind functions need to be notified of, in addition to a stack pointer
address, which task is being traced in order to find out real return
addresses.

This patch extends unwind functions' interfaces by adding an extra
argument of a pointer to task_struct.

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit fe13f95b720075327a761fe6ddb45b0c90cab504)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/stacktrace.h | 6 ++++--
 arch/arm64/kernel/perf_callchain.c  | 2 +-
 arch/arm64/kernel/process.c         | 2 +-
 arch/arm64/kernel/return_address.c  | 2 +-
 arch/arm64/kernel/stacktrace.c      | 8 ++++----
 arch/arm64/kernel/time.c            | 2 +-
 arch/arm64/kernel/traps.c           | 2 +-
 7 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 7318f6d54aa9..6fb61c5090b4 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -16,14 +16,16 @@
 #ifndef __ASM_STACKTRACE_H
 #define __ASM_STACKTRACE_H
 
+struct task_struct;
+
 struct stackframe {
 	unsigned long fp;
 	unsigned long sp;
 	unsigned long pc;
 };
 
-extern int unwind_frame(struct stackframe *frame);
-extern void walk_stackframe(struct stackframe *frame,
+extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame);
+extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
 			    int (*fn)(struct stackframe *, void *), void *data);
 
 #endif	/* __ASM_STACKTRACE_H */
diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c
index 3aa74830cc69..797220da912b 100644
--- a/arch/arm64/kernel/perf_callchain.c
+++ b/arch/arm64/kernel/perf_callchain.c
@@ -165,7 +165,7 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry,
 	frame.sp = regs->sp;
 	frame.pc = regs->pc;
 
-	walk_stackframe(&frame, callchain_trace, entry);
+	walk_stackframe(current, &frame, callchain_trace, entry);
 }
 
 unsigned long perf_instruction_pointer(struct pt_regs *regs)
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index f75b540bc3b4..98bf5461d4b6 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -348,7 +348,7 @@ unsigned long get_wchan(struct task_struct *p)
 	do {
 		if (frame.sp < stack_page ||
 		    frame.sp >= stack_page + THREAD_SIZE ||
-		    unwind_frame(&frame))
+		    unwind_frame(p, &frame))
 			return 0;
 		if (!in_sched_functions(frame.pc))
 			return frame.pc;
diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c
index 6c4fd2810ecb..07b37ac05be4 100644
--- a/arch/arm64/kernel/return_address.c
+++ b/arch/arm64/kernel/return_address.c
@@ -44,7 +44,7 @@ void *return_address(unsigned int level)
 	frame.sp = current_stack_pointer;
 	frame.pc = (unsigned long)return_address; /* dummy */
 
-	walk_stackframe(&frame, save_return_addr, &data);
+	walk_stackframe(current, &frame, save_return_addr, &data);
 
 	if (!data.level)
 		return data.addr;
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index b9fd3a8abfc1..f7ee597ec883 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -36,7 +36,7 @@
  *	ldp	x29, x30, [sp]
  *	add	sp, sp, #0x10
  */
-int notrace unwind_frame(struct stackframe *frame)
+int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 {
 	unsigned long high, low;
 	unsigned long fp = frame->fp;
@@ -99,7 +99,7 @@ int notrace unwind_frame(struct stackframe *frame)
 	return 0;
 }
 
-void notrace walk_stackframe(struct stackframe *frame,
+void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
 		     int (*fn)(struct stackframe *, void *), void *data)
 {
 	while (1) {
@@ -107,7 +107,7 @@ void notrace walk_stackframe(struct stackframe *frame,
 
 		if (fn(frame, data))
 			break;
-		ret = unwind_frame(frame);
+		ret = unwind_frame(tsk, frame);
 		if (ret < 0)
 			break;
 	}
@@ -159,7 +159,7 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 		frame.pc = (unsigned long)save_stack_trace_tsk;
 	}
 
-	walk_stackframe(&frame, save_trace, &data);
+	walk_stackframe(tsk, &frame, save_trace, &data);
 	if (trace->nr_entries < trace->max_entries)
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index 13339b6ffc1a..6e5c521f123a 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -53,7 +53,7 @@ unsigned long profile_pc(struct pt_regs *regs)
 	frame.sp = regs->sp;
 	frame.pc = regs->pc;
 	do {
-		int ret = unwind_frame(&frame);
+		int ret = unwind_frame(NULL, &frame);
 		if (ret < 0)
 			return 0;
 	} while (in_lock_functions(frame.pc));
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 8a0084541f84..937008523fa5 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -177,7 +177,7 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 		int ret;
 
 		dump_backtrace_entry(where);
-		ret = unwind_frame(&frame);
+		ret = unwind_frame(tsk, &frame);
 		if (ret < 0)
 			break;
 		stack = frame.sp;

From 1f18836b775f8f041dcad620c1b5d18c22c25c06 Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Tue, 15 Dec 2015 17:33:41 +0900
Subject: [PATCH 640/797] arm64: ftrace: fix a stack tracer's output under
 function graph tracer

Function graph tracer modifies a return address (LR) in a stack frame
to hook a function return. This will result in many useless entries
(return_to_handler) showing up in
 a) a stack tracer's output
 b) perf call graph (with perf record -g)
 c) dump_backtrace (at panic et al.)

For example, in case of a),
  $ echo function_graph > /sys/kernel/debug/tracing/current_tracer
  $ echo 1 > /proc/sys/kernel/stack_trace_enabled
  $ cat /sys/kernel/debug/tracing/stack_trace
        Depth    Size   Location    (54 entries)
        -----    ----   --------
  0)     4504      16   gic_raise_softirq+0x28/0x150
  1)     4488      80   smp_cross_call+0x38/0xb8
  2)     4408      48   return_to_handler+0x0/0x40
  3)     4360      32   return_to_handler+0x0/0x40
  ...

In case of b),
  $ echo function_graph > /sys/kernel/debug/tracing/current_tracer
  $ perf record -e mem:XXX:x -ag -- sleep 10
  $ perf report
                  ...
                  |          |          |--0.22%-- 0x550f8
                  |          |          |          0x10888
                  |          |          |          el0_svc_naked
                  |          |          |          sys_openat
                  |          |          |          return_to_handler
                  |          |          |          return_to_handler
                  ...

In case of c),
  $ echo function_graph > /sys/kernel/debug/tracing/current_tracer
  $ echo c > /proc/sysrq-trigger
  ...
  Call trace:
  [<ffffffc00044d3ac>] sysrq_handle_crash+0x24/0x30
  [<ffffffc000092250>] return_to_handler+0x0/0x40
  [<ffffffc000092250>] return_to_handler+0x0/0x40
  ...

This patch replaces such entries with real addresses preserved in
current->ret_stack[] at unwind_frame(). This way, we can cover all
the cases.

Reviewed-by: Jungseok Lee <jungseoklee85@gmail.com>
Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
[will: fixed minor context changes conflicting with irq stack bits]
Signed-off-by: Will Deacon <will.deacon@arm.com>

(cherry picked from commit 20380bb390a443b2c5c8800cec59743faf8151b4)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/ftrace.h     |  2 ++
 arch/arm64/include/asm/stacktrace.h |  3 +++
 arch/arm64/kernel/perf_callchain.c  |  3 +++
 arch/arm64/kernel/process.c         |  3 +++
 arch/arm64/kernel/return_address.c  |  3 +++
 arch/arm64/kernel/stacktrace.c      | 17 +++++++++++++++++
 arch/arm64/kernel/time.c            |  3 +++
 arch/arm64/kernel/traps.c           | 26 ++++++++++++++++++++------
 8 files changed, 54 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index c5534facf941..3c60f37e48ab 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -28,6 +28,8 @@ struct dyn_arch_ftrace {
 
 extern unsigned long ftrace_graph_call;
 
+extern void return_to_handler(void);
+
 static inline unsigned long ftrace_call_adjust(unsigned long addr)
 {
 	/*
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 6fb61c5090b4..801a16dbbdf6 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -22,6 +22,9 @@ struct stackframe {
 	unsigned long fp;
 	unsigned long sp;
 	unsigned long pc;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	unsigned int graph;
+#endif
 };
 
 extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame);
diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c
index 797220da912b..ff4665462a02 100644
--- a/arch/arm64/kernel/perf_callchain.c
+++ b/arch/arm64/kernel/perf_callchain.c
@@ -164,6 +164,9 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry,
 	frame.fp = regs->regs[29];
 	frame.sp = regs->sp;
 	frame.pc = regs->pc;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	frame.graph = current->curr_ret_stack;
+#endif
 
 	walk_stackframe(current, &frame, callchain_trace, entry);
 }
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 98bf5461d4b6..88d742ba19d5 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -344,6 +344,9 @@ unsigned long get_wchan(struct task_struct *p)
 	frame.fp = thread_saved_fp(p);
 	frame.sp = thread_saved_sp(p);
 	frame.pc = thread_saved_pc(p);
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	frame.graph = p->curr_ret_stack;
+#endif
 	stack_page = (unsigned long)task_stack_page(p);
 	do {
 		if (frame.sp < stack_page ||
diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c
index 07b37ac05be4..1718706fde83 100644
--- a/arch/arm64/kernel/return_address.c
+++ b/arch/arm64/kernel/return_address.c
@@ -43,6 +43,9 @@ void *return_address(unsigned int level)
 	frame.fp = (unsigned long)__builtin_frame_address(0);
 	frame.sp = current_stack_pointer;
 	frame.pc = (unsigned long)return_address; /* dummy */
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	frame.graph = current->curr_ret_stack;
+#endif
 
 	walk_stackframe(current, &frame, save_return_addr, &data);
 
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index f7ee597ec883..4fad9787ab46 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -17,6 +17,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/export.h>
+#include <linux/ftrace.h>
 #include <linux/sched.h>
 #include <linux/stacktrace.h>
 
@@ -66,6 +67,19 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 	frame->fp = *(unsigned long *)(fp);
 	frame->pc = *(unsigned long *)(fp + 8);
 
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	if (tsk && tsk->ret_stack &&
+			(frame->pc == (unsigned long)return_to_handler)) {
+		/*
+		 * This is a case where function graph tracer has
+		 * modified a return address (LR) in a stack frame
+		 * to hook a function return.
+		 * So replace it to an original value.
+		 */
+		frame->pc = tsk->ret_stack[frame->graph--].ret;
+	}
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
 	/*
 	 * Check whether we are going to walk through from interrupt stack
 	 * to task stack.
@@ -158,6 +172,9 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 		frame.sp = current_stack_pointer;
 		frame.pc = (unsigned long)save_stack_trace_tsk;
 	}
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	frame.graph = tsk->curr_ret_stack;
+#endif
 
 	walk_stackframe(tsk, &frame, save_trace, &data);
 	if (trace->nr_entries < trace->max_entries)
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index 6e5c521f123a..59779699a1a4 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -52,6 +52,9 @@ unsigned long profile_pc(struct pt_regs *regs)
 	frame.fp = regs->regs[29];
 	frame.sp = regs->sp;
 	frame.pc = regs->pc;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	frame.graph = -1; /* no task info */
+#endif
 	do {
 		int ret = unwind_frame(NULL, &frame);
 		if (ret < 0)
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 937008523fa5..bdc293f6adc4 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -147,17 +147,14 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 {
 	struct stackframe frame;
 	unsigned long irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id());
+	int skip;
 
 	pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
 
 	if (!tsk)
 		tsk = current;
 
-	if (regs) {
-		frame.fp = regs->regs[29];
-		frame.sp = regs->sp;
-		frame.pc = regs->pc;
-	} else if (tsk == current) {
+	if (tsk == current) {
 		frame.fp = (unsigned long)__builtin_frame_address(0);
 		frame.sp = current_stack_pointer;
 		frame.pc = (unsigned long)dump_backtrace;
@@ -169,14 +166,31 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 		frame.sp = thread_saved_sp(tsk);
 		frame.pc = thread_saved_pc(tsk);
 	}
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	frame.graph = tsk->curr_ret_stack;
+#endif
 
+	skip = !!regs;
 	pr_emerg("Call trace:\n");
 	while (1) {
 		unsigned long where = frame.pc;
 		unsigned long stack;
 		int ret;
 
-		dump_backtrace_entry(where);
+		/* skip until specified stack frame */
+		if (!skip) {
+			dump_backtrace_entry(where);
+		} else if (frame.fp == regs->regs[29]) {
+			skip = 0;
+			/*
+			 * Mostly, this is the case where this function is
+			 * called in panic/abort. As exception handler's
+			 * stack frame does not contain the corresponding pc
+			 * at which an exception has taken place, use regs->pc
+			 * instead.
+			 */
+			dump_backtrace_entry(regs->pc);
+		}
 		ret = unwind_frame(tsk, &frame);
 		if (ret < 0)
 			break;

From d2b08280e2c1f40fa209aaaaf86c9b730de28204 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 21 Dec 2015 16:44:27 +0000
Subject: [PATCH 641/797] arm64: traps: address fallout from printk -> pr_*
 conversion

Commit ac7b406c1a9d ("arm64: Use pr_* instead of printk") was a fairly
mindless s/printk/pr_*/ change driven by a complaint from checkpatch.

As is usual with such changes, this has led to some odd behaviour on
arm64:

  * syslog now picks up the "pr_emerg" line from dump_backtrace, but not
    the actual trace, which leads to a bunch of "kernel:Call trace:"
    lines in the log

  * __{pte,pmd,pgd}_error print at KERN_CRIT, as opposed to KERN_ERR
    which is used by other architectures.

This patch restores the original printk behaviour for dump_backtrace
and downgrade the pgtable error macros to KERN_ERR.

Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit c9cd0ed925c0b927283d4739bfe689eb9d1e9dfd)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/traps.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index bdc293f6adc4..cbedd724f48e 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -171,7 +171,7 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 #endif
 
 	skip = !!regs;
-	pr_emerg("Call trace:\n");
+	printk("Call trace:\n");
 	while (1) {
 		unsigned long where = frame.pc;
 		unsigned long stack;
@@ -482,22 +482,22 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
 
 void __pte_error(const char *file, int line, unsigned long val)
 {
-	pr_crit("%s:%d: bad pte %016lx.\n", file, line, val);
+	pr_err("%s:%d: bad pte %016lx.\n", file, line, val);
 }
 
 void __pmd_error(const char *file, int line, unsigned long val)
 {
-	pr_crit("%s:%d: bad pmd %016lx.\n", file, line, val);
+	pr_err("%s:%d: bad pmd %016lx.\n", file, line, val);
 }
 
 void __pud_error(const char *file, int line, unsigned long val)
 {
-	pr_crit("%s:%d: bad pud %016lx.\n", file, line, val);
+	pr_err("%s:%d: bad pud %016lx.\n", file, line, val);
 }
 
 void __pgd_error(const char *file, int line, unsigned long val)
 {
-	pr_crit("%s:%d: bad pgd %016lx.\n", file, line, val);
+	pr_err("%s:%d: bad pgd %016lx.\n", file, line, val);
 }
 
 /* GENERIC_BUG traps */

From 12037cecc2cfc56e01851c0fbf605be9e05bbe95 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 5 Jan 2016 10:18:51 +0100
Subject: [PATCH 642/797] arm64: module: fix relocation of movz instruction
 with negative immediate

The test whether a movz instruction with a signed immediate should be
turned into a movn instruction (i.e., when the immediate is negative)
is flawed, since the value of imm is always positive. Also, the
subsequent bounds check is incorrect since the limit update never
executes, due to the fact that the imm_type comparison will always be
false for negative signed immediates.

Let's fix this by performing the sign test on sval directly, and
replacing the bounds check with a simple comparison against U16_MAX.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
[will: tidied up use of sval, renamed MOVK enum value to MOVKZ]
Signed-off-by: Will Deacon <will.deacon@arm.com>

(cherry picked from commit b24a557527f97ad88619d5bd4c8017c635056d69)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/module.c | 51 ++++++++++++++------------------------
 1 file changed, 18 insertions(+), 33 deletions(-)

diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index f4bc779e62e8..03464ab0fff2 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -30,9 +30,6 @@
 #include <asm/insn.h>
 #include <asm/sections.h>
 
-#define	AARCH64_INSN_IMM_MOVNZ		AARCH64_INSN_IMM_MAX
-#define	AARCH64_INSN_IMM_MOVK		AARCH64_INSN_IMM_16
-
 void *module_alloc(unsigned long size)
 {
 	void *p;
@@ -110,16 +107,20 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
 	return 0;
 }
 
+enum aarch64_insn_movw_imm_type {
+	AARCH64_INSN_IMM_MOVNZ,
+	AARCH64_INSN_IMM_MOVKZ,
+};
+
 static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val,
-			   int lsb, enum aarch64_insn_imm_type imm_type)
+			   int lsb, enum aarch64_insn_movw_imm_type imm_type)
 {
-	u64 imm, limit = 0;
+	u64 imm;
 	s64 sval;
 	u32 insn = le32_to_cpu(*(u32 *)place);
 
 	sval = do_reloc(op, place, val);
-	sval >>= lsb;
-	imm = sval & 0xffff;
+	imm = sval >> lsb;
 
 	if (imm_type == AARCH64_INSN_IMM_MOVNZ) {
 		/*
@@ -128,7 +129,7 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val,
 		 * immediate is less than zero.
 		 */
 		insn &= ~(3 << 29);
-		if ((s64)imm >= 0) {
+		if (sval >= 0) {
 			/* >=0: Set the instruction to MOVZ (opcode 10b). */
 			insn |= 2 << 29;
 		} else {
@@ -140,29 +141,13 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val,
 			 */
 			imm = ~imm;
 		}
-		imm_type = AARCH64_INSN_IMM_MOVK;
 	}
 
 	/* Update the instruction with the new encoding. */
-	insn = aarch64_insn_encode_immediate(imm_type, insn, imm);
+	insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm);
 	*(u32 *)place = cpu_to_le32(insn);
 
-	/* Shift out the immediate field. */
-	sval >>= 16;
-
-	/*
-	 * For unsigned immediates, the overflow check is straightforward.
-	 * For signed immediates, the sign bit is actually the bit past the
-	 * most significant bit of the field.
-	 * The AARCH64_INSN_IMM_16 immediate type is unsigned.
-	 */
-	if (imm_type != AARCH64_INSN_IMM_16) {
-		sval++;
-		limit++;
-	}
-
-	/* Check the upper bits depending on the sign of the immediate. */
-	if ((u64)sval > limit)
+	if (imm > U16_MAX)
 		return -ERANGE;
 
 	return 0;
@@ -267,25 +252,25 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 			overflow_check = false;
 		case R_AARCH64_MOVW_UABS_G0:
 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
-					      AARCH64_INSN_IMM_16);
+					      AARCH64_INSN_IMM_MOVKZ);
 			break;
 		case R_AARCH64_MOVW_UABS_G1_NC:
 			overflow_check = false;
 		case R_AARCH64_MOVW_UABS_G1:
 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16,
-					      AARCH64_INSN_IMM_16);
+					      AARCH64_INSN_IMM_MOVKZ);
 			break;
 		case R_AARCH64_MOVW_UABS_G2_NC:
 			overflow_check = false;
 		case R_AARCH64_MOVW_UABS_G2:
 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32,
-					      AARCH64_INSN_IMM_16);
+					      AARCH64_INSN_IMM_MOVKZ);
 			break;
 		case R_AARCH64_MOVW_UABS_G3:
 			/* We're using the top bits so we can't overflow. */
 			overflow_check = false;
 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 48,
-					      AARCH64_INSN_IMM_16);
+					      AARCH64_INSN_IMM_MOVKZ);
 			break;
 		case R_AARCH64_MOVW_SABS_G0:
 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
@@ -302,7 +287,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 		case R_AARCH64_MOVW_PREL_G0_NC:
 			overflow_check = false;
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0,
-					      AARCH64_INSN_IMM_MOVK);
+					      AARCH64_INSN_IMM_MOVKZ);
 			break;
 		case R_AARCH64_MOVW_PREL_G0:
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0,
@@ -311,7 +296,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 		case R_AARCH64_MOVW_PREL_G1_NC:
 			overflow_check = false;
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16,
-					      AARCH64_INSN_IMM_MOVK);
+					      AARCH64_INSN_IMM_MOVKZ);
 			break;
 		case R_AARCH64_MOVW_PREL_G1:
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16,
@@ -320,7 +305,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 		case R_AARCH64_MOVW_PREL_G2_NC:
 			overflow_check = false;
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32,
-					      AARCH64_INSN_IMM_MOVK);
+					      AARCH64_INSN_IMM_MOVKZ);
 			break;
 		case R_AARCH64_MOVW_PREL_G2:
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32,

From 3fd9316702a82b498fcf7055f9781589ea6c1e1c Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 5 Jan 2016 10:18:52 +0100
Subject: [PATCH 643/797] arm64: module: avoid undefined shift behavior in
 reloc_data()

Compilers may engage the improbability drive when encountering shifts
by a distance that is a multiple of the size of the operand type. Since
the required bounds check is very simple here, we can get rid of all the
fuzzy masking, shifting and comparing, and use the documented bounds
directly.

Reported-by: David Binderman <dcb314@hotmail.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit f930896967fa3f9ab16a6f87267b92798308d48f)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/module.c | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 03464ab0fff2..93e970231ca9 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -72,15 +72,18 @@ static u64 do_reloc(enum aarch64_reloc_op reloc_op, void *place, u64 val)
 
 static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
 {
-	u64 imm_mask = (1 << len) - 1;
 	s64 sval = do_reloc(op, place, val);
 
 	switch (len) {
 	case 16:
 		*(s16 *)place = sval;
+		if (sval < S16_MIN || sval > U16_MAX)
+			return -ERANGE;
 		break;
 	case 32:
 		*(s32 *)place = sval;
+		if (sval < S32_MIN || sval > U32_MAX)
+			return -ERANGE;
 		break;
 	case 64:
 		*(s64 *)place = sval;
@@ -89,21 +92,6 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
 		pr_err("Invalid length (%d) for data relocation\n", len);
 		return 0;
 	}
-
-	/*
-	 * Extract the upper value bits (including the sign bit) and
-	 * shift them to bit 0.
-	 */
-	sval = (s64)(sval & ~(imm_mask >> 1)) >> (len - 1);
-
-	/*
-	 * Overflow has occurred if the value is not representable in
-	 * len bits (i.e the bottom len bits are not sign-extended and
-	 * the top bits are not all zero).
-	 */
-	if ((u64)(sval + 1) > 2)
-		return -ERANGE;
-
 	return 0;
 }
 

From 87e4c1f363cfd0ed3a673d47f229725a6b0946d7 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 5 Jan 2016 15:36:59 +0000
Subject: [PATCH 644/797] arm64: mm: move pgd_cache initialisation to
 pgtable_cache_init

Initialising the suppport for EFI runtime services requires us to
allocate a pgd off the back of an early_initcall. On systems where the
PGD_SIZE is smaller than PAGE_SIZE (e.g. 64k pages and 48-bit VA), the
pgd_cache isn't initialised at this stage, and we panic with a NULL
dereference during boot:

  Unable to handle kernel NULL pointer dereference at virtual address 00000000

  __create_mapping.isra.5+0x84/0x350
  create_pgd_mapping+0x20/0x28
  efi_create_mapping+0x5c/0x6c
  arm_enable_runtime_services+0x154/0x1e4
  do_one_initcall+0x8c/0x190
  kernel_init_freeable+0x84/0x1ec
  kernel_init+0x10/0xe0
  ret_from_fork+0x10/0x50

This patch fixes the problem by initialising the pgd_cache earlier, in
the pgtable_cache_init callback, which sounds suspiciously like what it
was intended for.

Reported-by: Dennis Chen <dennis.chen@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 39b5be9b4233a9f212b98242bddf008f379b5122)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/pgtable.h |  3 ++-
 arch/arm64/mm/pgd.c              | 12 ++++++------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index fd3d7c177c5f..76ff5d93c6c3 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -682,7 +682,8 @@ extern int kern_addr_valid(unsigned long addr);
 
 #include <asm-generic/pgtable.h>
 
-#define pgtable_cache_init() do { } while (0)
+void pgd_cache_init(void);
+#define pgtable_cache_init	pgd_cache_init
 
 /*
  * On AArch64, the cache coherency is handled via the set_pte_at() function.
diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c
index cb3ba1b812e7..ae11d4e03d0e 100644
--- a/arch/arm64/mm/pgd.c
+++ b/arch/arm64/mm/pgd.c
@@ -46,14 +46,14 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 		kmem_cache_free(pgd_cache, pgd);
 }
 
-static int __init pgd_cache_init(void)
+void __init pgd_cache_init(void)
 {
+	if (PGD_SIZE == PAGE_SIZE)
+		return;
+
 	/*
 	 * Naturally aligned pgds required by the architecture.
 	 */
-	if (PGD_SIZE != PAGE_SIZE)
-		pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_SIZE,
-					      SLAB_PANIC, NULL);
-	return 0;
+	pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_SIZE,
+				      SLAB_PANIC, NULL);
 }
-core_initcall(pgd_cache_init);

From ec567e8f53127f17d757c1ac30cfe09db6d1ea66 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 5 Jan 2016 17:33:34 +0000
Subject: [PATCH 645/797] arm64: entry: remove pointless SPSR mode check

In work_pending, we may skip work if the stacked SPSR value represents
anything other than an EL0 context. We then immediately invoke the
kernel_exit 0 macro as part of ret_to_user, assuming a return to EL0.
This is somewhat confusing.

We use work_pending as part of the ret_to_user/ret_fast_syscall state
machine. We only use ret_fast_syscall in the return from an SVC issued
from EL0. We use ret_to_user for return from EL0 exception handlers and
also for return from ret_from_fork in the case the task was not a kernel
thread (i.e. it is a user task).

Thus in all cases the stacked SPSR value must represent an EL0 context,
and the check is redundant. This patch removes it, along with the now
unused no_work_pending label.

Cc: Chris Metcalf <cmetcalf@ezchip.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit ee03353bc04f8e460cc4e3da80d9721d9ecb89f1)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/entry.S | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index c0db321db7e1..1f7f5a2b61bf 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -676,10 +676,7 @@ ret_fast_syscall_trace:
 work_pending:
 	tbnz	x1, #TIF_NEED_RESCHED, work_resched
 	/* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */
-	ldr	x2, [sp, #S_PSTATE]
 	mov	x0, sp				// 'regs'
-	tst	x2, #PSR_MODE_MASK		// user mode regs?
-	b.ne	no_work_pending			// returning to kernel
 	enable_irq				// enable interrupts for do_notify_resume()
 	bl	do_notify_resume
 	b	ret_to_user
@@ -698,7 +695,6 @@ ret_to_user:
 	and	x2, x1, #_TIF_WORK_MASK
 	cbnz	x2, work_pending
 	enable_step_tsk x1, x2
-no_work_pending:
 	kernel_exit 0
 ENDPROC(ret_to_user)
 

From 91a6481661c6e9d8e503540decf6382a604c7893 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 23 Dec 2015 10:29:28 +0100
Subject: [PATCH 646/797] efi: stub: define DISABLE_BRANCH_PROFILING for all
 architectures

This moves the DISABLE_BRANCH_PROFILING define from the x86 specific
to the general CFLAGS definition for the stub. This fixes build errors
when building for arm64 with CONFIG_PROFILE_ALL_BRANCHES_ENABLED.

Reviewed-by: Matt Fleming <matt@codeblueprint.co.uk>
Reported-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit b523e185bba36164ca48a190f5468c140d815414)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/firmware/efi/libstub/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index 3c0467d3688c..c0ddd1b8dca3 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -8,7 +8,7 @@ cflags-$(CONFIG_X86_32)		:= -march=i386
 cflags-$(CONFIG_X86_64)		:= -mcmodel=small
 cflags-$(CONFIG_X86)		+= -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 \
 				   -fPIC -fno-strict-aliasing -mno-red-zone \
-				   -mno-mmx -mno-sse -DDISABLE_BRANCH_PROFILING
+				   -mno-mmx -mno-sse
 
 cflags-$(CONFIG_ARM64)		:= $(subst -pg,,$(KBUILD_CFLAGS))
 cflags-$(CONFIG_ARM)		:= $(subst -pg,,$(KBUILD_CFLAGS)) \
@@ -16,7 +16,7 @@ cflags-$(CONFIG_ARM)		:= $(subst -pg,,$(KBUILD_CFLAGS)) \
 
 cflags-$(CONFIG_EFI_ARMSTUB)	+= -I$(srctree)/scripts/dtc/libfdt
 
-KBUILD_CFLAGS			:= $(cflags-y) \
+KBUILD_CFLAGS			:= $(cflags-y) -DDISABLE_BRANCH_PROFILING \
 				   $(call cc-option,-ffreestanding) \
 				   $(call cc-option,-fno-stack-protector)
 

From 1ebc63c2d5ebd9578b625a049b6f83e9181caf8d Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 6 Jan 2016 11:05:27 +0000
Subject: [PATCH 647/797] arm64: head.S: use memset to clear BSS

Currently we use an open-coded memzero to clear the BSS. As it is a
trivial implementation, it is sub-optimal.

Our optimised memset doesn't use the stack, is position-independent, and
for the memzero case can use of DC ZVA to clear large blocks
efficiently. In __mmap_switched the MMU is on and there are no live
caller-saved registers, so we can safely call an uninstrumented memset.

This patch changes __mmap_switched to use memset when clearing the BSS.
We use the __pi_memset alias so as to avoid any instrumentation in all
kernel configurations.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 2a803c4db615d85126c5c7afd5849a3cfde71422)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/head.S | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 17ce7285bb12..917d98108b3f 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -415,14 +415,13 @@ ENDPROC(__create_page_tables)
  */
 	.set	initial_sp, init_thread_union + THREAD_START_SP
 __mmap_switched:
-	adr_l	x6, __bss_start
-	adr_l	x7, __bss_stop
+	// Clear BSS
+	adr_l	x0, __bss_start
+	mov	x1, xzr
+	adr_l	x2, __bss_stop
+	sub	x2, x2, x0
+	bl	__pi_memset
 
-1:	cmp	x6, x7
-	b.hs	2f
-	str	xzr, [x6], #8			// Clear BSS
-	b	1b
-2:
 	adr_l	sp, initial_sp, x4
 	mov	x4, sp
 	and	x4, x4, #~(THREAD_SIZE - 1)

From a9bd748299179a8d8f8fcd937c74ab321981ab4d Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 6 May 2016 12:03:29 -0400
Subject: [PATCH 648/797] Revert: "powerpc/tm: Check for already reclaimed
 tasks"

This reverts commit e924c60db1b4891e45d15a33474ac5fab62cf029 which was
commit 7f821fc9c77a9b01fe7b1d6e72717b33d8d64142 upstream.

It shouldn't have been applied as the original was already in 4.4.

Reported-by: Jiri Slaby <jslaby@suse.cz>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/kernel/process.c | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index ef2ad2d682da..646bf4d222c1 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -551,24 +551,6 @@ static void tm_reclaim_thread(struct thread_struct *thr,
 		msr_diff &= MSR_FP | MSR_VEC | MSR_VSX | MSR_FE0 | MSR_FE1;
 	}
 
-	/*
-	 * Use the current MSR TM suspended bit to track if we have
-	 * checkpointed state outstanding.
-	 * On signal delivery, we'd normally reclaim the checkpointed
-	 * state to obtain stack pointer (see:get_tm_stackpointer()).
-	 * This will then directly return to userspace without going
-	 * through __switch_to(). However, if the stack frame is bad,
-	 * we need to exit this thread which calls __switch_to() which
-	 * will again attempt to reclaim the already saved tm state.
-	 * Hence we need to check that we've not already reclaimed
-	 * this state.
-	 * We do this using the current MSR, rather tracking it in
-	 * some specific thread_struct bit, as it has the additional
-	 * benifit of checking for a potential TM bad thing exception.
-	 */
-	if (!MSR_TM_SUSPENDED(mfmsr()))
-		return;
-
 	/*
 	 * Use the current MSR TM suspended bit to track if we have
 	 * checkpointed state outstanding.

From 2349384312b4192c5200d16f6089921fb4ffb7d8 Mon Sep 17 00:00:00 2001
From: Hariprasad S <hariprasad@chelsio.com>
Date: Tue, 5 Apr 2016 10:23:48 +0530
Subject: [PATCH 649/797] RDMA/iw_cxgb4: Fix bar2 virt addr calculation for T4
 chips

commit 32cc92c7b5e52357a0a24010bae9eb257fa75d3e upstream.

For T4, kernel mode qps don't use the user doorbell. User mode qps during
flow control db ringing are forced into kernel, where user doorbell is
treated as kernel doorbell and proper bar2 offset in bar2 virtual space is
calculated, which incase of T4 is a bogus address, causing a kernel panic
due to illegal write during doorbell ringing.
In case of T4, kernel mode qp bar2 virtual address should be 0. Added T4
check during bar2 virtual address calculation to return 0. Fixed Bar2
range checks based on bar2 physical address.

The below oops will be fixed

  <1>BUG: unable to handle kernel paging request at 000000000002aa08
  <1>IP: [<ffffffffa011d800>] c4iw_uld_control+0x4e0/0x880 [iw_cxgb4]
  <4>PGD 1416a8067 PUD 15bf35067 PMD 0
  <4>Oops: 0002 [#1] SMP
  <4>last sysfs file:
  /sys/devices/pci0000:00/0000:00:03.0/0000:02:00.4/infiniband/cxgb4_0/node_guid
  <4>CPU 5
  <4>Modules linked in: rdma_ucm rdma_cm ib_cm ib_sa ib_mad ib_uverbs
  ip6table_filter ip6_tables ebtable_nat ebtables ipt_MASQUERADE
  iptable_nat nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack
  ipt_REJECT xt_CHECKSUM iptable_mangle iptable_filter ip_tables bridge autofs4
  target_core_iblock target_core_file target_core_pscsi target_core_mod
  configfs bnx2fc cnic uio fcoe libfcoe libfc scsi_transport_fc scsi_tgt 8021q
  garp stp llc cpufreq_ondemand acpi_cpufreq freq_table mperf vhost_net macvtap
  macvlan tun kvm uinput microcode iTCO_wdt iTCO_vendor_support sg joydev
  serio_raw i2c_i801 i2c_core lpc_ich mfd_core e1000e ptp pps_core ioatdma dca
  i7core_edac edac_core shpchp ext3 jbd mbcache sd_mod crc_t10dif pata_acpi
  ata_generic ata_piix iw_cxgb4 iw_cm ib_core ib_addr cxgb4 ipv6 dm_mirror
  dm_region_hash dm_log dm_mod [last unloaded: scsi_wait_scan]
  <4>
  Supermicro X8ST3/X8ST3
  <4>RIP: 0010:[<ffffffffa011d800>]  [<ffffffffa011d800>]
  c4iw_uld_control+0x4e0/0x880 [iw_cxgb4]
  <4>RSP: 0000:ffff880155a03db0  EFLAGS: 00010006
  <4>RAX: 000000000000001d RBX: ffff88013ae5fc00 RCX: ffff880155adb180
  <4>RDX: 000000000002aa00 RSI: 0000000000000001 RDI: ffff88013ae5fdf8
  <4>RBP: ffff880155a03e10 R08: 0000000000000000 R09: 0000000000000001
  <4>R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
  <4>R13: 000000000000001d R14: ffff880156414ab0 R15: ffffe8ffffc05b88
  <4>FS:  0000000000000000(0000) GS:ffff8800282a0000(0000) knlGS:0000000000000000
  <4>CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
  <4>CR2: 000000000002aa08 CR3: 000000015bd0e000 CR4: 00000000000007e0
  <4>DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  <4>DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
  <4>Process cxgb4 (pid: 394, threadinfo ffff880155a00000, task ffff880156414ab0)
  <4>Stack:
  <4> ffff880156415068 ffff880155adb180 ffff880155a03df0 ffffffffa00a344b
  <4><d> 00000000000003e8 ffff880155920000 0000000000000004 ffff880155920000
  <4><d> ffff88015592d438 ffffffffa00a3860 ffff880155a03fd8 ffffe8ffffc05b88
  <4>Call Trace:
  <4> [<ffffffffa00a344b>] ? enable_txq_db+0x2b/0x80 [cxgb4]
  <4> [<ffffffffa00a3860>] ? process_db_full+0x0/0xa0 [cxgb4]
  <4> [<ffffffffa00a38a6>] process_db_full+0x46/0xa0 [cxgb4]
  <4> [<ffffffff8109fda0>] worker_thread+0x170/0x2a0
  <4> [<ffffffff810a6aa0>] ? autoremove_wake_function+0x0/0x40
  <4> [<ffffffff8109fc30>] ? worker_thread+0x0/0x2a0
  <4> [<ffffffff810a660e>] kthread+0x9e/0xc0
  <4> [<ffffffff8100c28a>] child_rip+0xa/0x20
  <4> [<ffffffff810a6570>] ? kthread+0x0/0xc0
  <4> [<ffffffff8100c280>] ? child_rip+0x0/0x20
  <4>Code: e9 ba 00 00 00 66 0f 1f 44 00 00 44 8b 05 29 07 02 00 45 85 c0 0f 85
  71 02 00 00 8b 83 70 01 00 00 45 0f b7 ed c1 e0 0f 44 09 e8 <89> 42 08 0f ae f8
  66 c7 83 82 01 00 00 00 00 44 0f b7 ab dc 01
  <1>RIP  [<ffffffffa011d800>] c4iw_uld_control+0x4e0/0x880 [iw_cxgb4]
  <4> RSP <ffff880155a03db0>
  <4>CR2: 000000000002aa08`

Based on original work by Bharat Potnuri <bharat@chelsio.com>

Fixes: 74217d4c6a4fb0d8 ("iw_cxgb4: support for bar2 qid densities exceeding the page size")

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Hariprasad Shenai <hariprasad@chelsio.com>
Reviewed-by: Leon Romanovsky <leon@leon.nu>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/hw/cxgb4/cq.c | 2 +-
 drivers/infiniband/hw/cxgb4/qp.c | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index de9cd6901752..bc147582bed9 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -162,7 +162,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
 	cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, T4_BAR2_QTYPE_INGRESS,
 				      &cq->bar2_qid,
 				      user ? &cq->bar2_pa : NULL);
-	if (user && !cq->bar2_va) {
+	if (user && !cq->bar2_pa) {
 		pr_warn(MOD "%s: cqid %u not in BAR2 range.\n",
 			pci_name(rdev->lldi.pdev), cq->cqid);
 		ret = -EINVAL;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index aa515afee724..53aa7515f542 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -185,6 +185,10 @@ void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid,
 
 	if (pbar2_pa)
 		*pbar2_pa = (rdev->bar2_pa + bar2_qoffset) & PAGE_MASK;
+
+	if (is_t4(rdev->lldi.adapter_type))
+		return NULL;
+
 	return rdev->bar2_kva + bar2_qoffset;
 }
 
@@ -270,7 +274,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 	/*
 	 * User mode must have bar2 access.
 	 */
-	if (user && (!wq->sq.bar2_va || !wq->rq.bar2_va)) {
+	if (user && (!wq->sq.bar2_pa || !wq->rq.bar2_pa)) {
 		pr_warn(MOD "%s: sqid %u or rqid %u not in BAR2 range.\n",
 			pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid);
 		goto free_dma;

From c6a012ba56536cac022045cab504d76d342d8c91 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 27 Jan 2016 14:52:02 +0100
Subject: [PATCH 650/797] ipvs: handle ip_vs_fill_iph_skb_off failure

commit 3f20efba41916ee17ce82f0fdd02581ada2872b2 upstream.

ip_vs_fill_iph_skb_off() may not find an IP header, and gcc has
determined that ip_vs_sip_fill_param() then incorrectly accesses
the protocol fields:

net/netfilter/ipvs/ip_vs_pe_sip.c: In function 'ip_vs_sip_fill_param':
net/netfilter/ipvs/ip_vs_pe_sip.c:76:5: error: 'iph.protocol' may be used uninitialized in this function [-Werror=maybe-uninitialized]
  if (iph.protocol != IPPROTO_UDP)
     ^
net/netfilter/ipvs/ip_vs_pe_sip.c:81:10: error: 'iph.len' may be used uninitialized in this function [-Werror=maybe-uninitialized]
  dataoff = iph.len + sizeof(struct udphdr);
          ^

This adds a check for the ip_vs_fill_iph_skb_off() return code
before looking at the ip header data returned from it.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: b0e010c527de ("ipvs: replace ip_vs_fill_ip4hdr with ip_vs_fill_iph_skb_off")
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/ipvs/ip_vs_pe_sip.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index 1b8d594e493a..c4e9ca016a88 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -70,10 +70,10 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
 	const char *dptr;
 	int retc;
 
-	ip_vs_fill_iph_skb(p->af, skb, false, &iph);
+	retc = ip_vs_fill_iph_skb(p->af, skb, false, &iph);
 
 	/* Only useful with UDP */
-	if (iph.protocol != IPPROTO_UDP)
+	if (!retc || iph.protocol != IPPROTO_UDP)
 		return -EINVAL;
 	/* todo: IPv6 fragments:
 	 *       I think this only should be done for the first fragment. /HS

From ba5e7e673624b6099640111e7366be850cf5dbe7 Mon Sep 17 00:00:00 2001
From: Marco Angaroni <marcoangaroni@gmail.com>
Date: Sat, 5 Mar 2016 12:10:02 +0100
Subject: [PATCH 651/797] ipvs: correct initial offset of Call-ID header search
 in SIP persistence engine

commit 7617a24f83b5d67f4dab1844956be1cebc44aec8 upstream.

The IPVS SIP persistence engine is not able to parse the SIP header
"Call-ID" when such header is inserted in the first positions of
the SIP message.

When IPVS is configured with "--pe sip" option, like for example:
ipvsadm -A -u 1.2.3.4:5060 -s rr --pe sip -p 120 -o
some particular messages (see below for details) do not create entries
in the connection template table, which can be listed with:
ipvsadm -Lcn --persistent-conn

Problematic SIP messages are SIP responses having "Call-ID" header
positioned just after message first line:
SIP/2.0 200 OK
[Call-ID header here]
[rest of the headers]

When "Call-ID" header is positioned down (after a few other headers)
it is correctly recognized.

This is due to the data offset used in get_callid function call inside
ip_vs_pe_sip.c file: since dptr already points to the start of the
SIP message, the value of dataoff should be initially 0.
Otherwise the header is searched starting from some bytes after the
first character of the SIP message.

Fixes: 758ff0338722 ("IPVS: sip persistence engine")
Signed-off-by: Marco Angaroni <marcoangaroni@gmail.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/ipvs/ip_vs_pe_sip.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index c4e9ca016a88..0a6eb5c0d9e9 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -88,7 +88,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
 	dptr = skb->data + dataoff;
 	datalen = skb->len - dataoff;
 
-	if (get_callid(dptr, dataoff, datalen, &matchoff, &matchlen))
+	if (get_callid(dptr, 0, datalen, &matchoff, &matchlen))
 		return -EINVAL;
 
 	/* N.B: pe_data is only set on success,

From f94ad404f8934e0cae299dd6520707dc02bbf2fc Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sat, 5 Mar 2016 15:03:22 +0200
Subject: [PATCH 652/797] ipvs: drop first packet to redirect conntrack

commit f719e3754ee2f7275437e61a6afd520181fdd43b upstream.

Jiri Bohac is reporting for a problem where the attempt
to reschedule existing connection to another real server
needs proper redirect for the conntrack used by the IPVS
connection. For example, when IPVS connection is created
to NAT-ed real server we alter the reply direction of
conntrack. If we later decide to select different real
server we can not alter again the conntrack. And if we
expire the old connection, the new connection is left
without conntrack.

So, the only way to redirect both the IPVS connection and
the Netfilter's conntrack is to drop the SYN packet that
hits existing connection, to wait for the next jiffie
to expire the old connection and its conntrack and to rely
on client's retransmission to create new connection as
usually.

Jiri Bohac provided a fix that drops all SYNs on rescheduling,
I extended his patch to do such drops only for connections
that use conntrack. Here is the original report from Jiri Bohac:

Since commit dc7b3eb900aa ("ipvs: Fix reuse connection if real server
is dead"), new connections to dead servers are redistributed
immediately to new servers.  The old connection is expired using
ip_vs_conn_expire_now() which sets the connection timer to expire
immediately.

However, before the timer callback, ip_vs_conn_expire(), is run
to clean the connection's conntrack entry, the new redistributed
connection may already be established and its conntrack removed
instead.

Fix this by dropping the first packet of the new connection
instead, like we do when the destination server is not available.
The timer will have deleted the old conntrack entry long before
the first packet of the new connection is retransmitted.

Fixes: dc7b3eb900aa ("ipvs: Fix reuse connection if real server is dead")
Signed-off-by: Jiri Bohac <jbohac@suse.cz>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/ip_vs.h             | 17 +++++++++++++++
 net/netfilter/ipvs/ip_vs_core.c | 37 +++++++++++++++++++++++++--------
 2 files changed, 45 insertions(+), 9 deletions(-)

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 0816c872b689..a6cc576fd467 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1588,6 +1588,23 @@ static inline void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
 }
 #endif /* CONFIG_IP_VS_NFCT */
 
+/* Really using conntrack? */
+static inline bool ip_vs_conn_uses_conntrack(struct ip_vs_conn *cp,
+					     struct sk_buff *skb)
+{
+#ifdef CONFIG_IP_VS_NFCT
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+
+	if (!(cp->flags & IP_VS_CONN_F_NFCT))
+		return false;
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct && !nf_ct_is_untracked(ct))
+		return true;
+#endif
+	return false;
+}
+
 static inline int
 ip_vs_dest_conn_overhead(struct ip_vs_dest *dest)
 {
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index f57b4dcdb233..4da560005b0e 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1757,15 +1757,34 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
 	cp = pp->conn_in_get(ipvs, af, skb, &iph);
 
 	conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
-	if (conn_reuse_mode && !iph.fragoffs &&
-	    is_new_conn(skb, &iph) && cp &&
-	    ((unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
-	      unlikely(!atomic_read(&cp->dest->weight))) ||
-	     unlikely(is_new_conn_expected(cp, conn_reuse_mode)))) {
-		if (!atomic_read(&cp->n_control))
-			ip_vs_conn_expire_now(cp);
-		__ip_vs_conn_put(cp);
-		cp = NULL;
+	if (conn_reuse_mode && !iph.fragoffs && is_new_conn(skb, &iph) && cp) {
+		bool uses_ct = false, resched = false;
+
+		if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
+		    unlikely(!atomic_read(&cp->dest->weight))) {
+			resched = true;
+			uses_ct = ip_vs_conn_uses_conntrack(cp, skb);
+		} else if (is_new_conn_expected(cp, conn_reuse_mode)) {
+			uses_ct = ip_vs_conn_uses_conntrack(cp, skb);
+			if (!atomic_read(&cp->n_control)) {
+				resched = true;
+			} else {
+				/* Do not reschedule controlling connection
+				 * that uses conntrack while it is still
+				 * referenced by controlled connection(s).
+				 */
+				resched = !uses_ct;
+			}
+		}
+
+		if (resched) {
+			if (!atomic_read(&cp->n_control))
+				ip_vs_conn_expire_now(cp);
+			__ip_vs_conn_put(cp);
+			if (uses_ct)
+				return NF_DROP;
+			cp = NULL;
+		}
 	}
 
 	if (unlikely(!cp)) {

From 8cc8381397b44d8888c02ad1995496d709cd541a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 22 Jan 2016 16:48:46 +0200
Subject: [PATCH 653/797] mfd: intel-lpss: Remove clock tree on error path

commit 84cb36cac581c915ef4e8b70abb73e084325df92 upstream.

We forgot to remove the clock tree if something goes wrong in ->probe(). Add a
call to intel_lpss_unregister_clock() on error path in ->probe() to fix the
potential issue.

Fixes: 4b45efe85263 (mfd: Add support for Intel Sunrisepoint LPSS devices)
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mfd/intel-lpss.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mfd/intel-lpss.c b/drivers/mfd/intel-lpss.c
index 6255513f54c7..68aa31ae553a 100644
--- a/drivers/mfd/intel-lpss.c
+++ b/drivers/mfd/intel-lpss.c
@@ -445,6 +445,7 @@ int intel_lpss_probe(struct device *dev,
 err_remove_ltr:
 	intel_lpss_debugfs_remove(lpss);
 	intel_lpss_ltr_hide(lpss);
+	intel_lpss_unregister_clock(lpss);
 
 err_clk_register:
 	ida_simple_remove(&intel_lpss_devid_ida, lpss->devid);

From be0860081ab64f99bcdaeaaf106778b1a16a3198 Mon Sep 17 00:00:00 2001
From: Dan Streetman <dan.streetman@canonical.com>
Date: Thu, 14 Jan 2016 13:42:32 -0500
Subject: [PATCH 654/797] nbd: ratelimit error msgs after socket close

commit da6ccaaa79caca4f38b540b651238f87215217a2 upstream.

Make the "Attempted send on closed socket" error messages generated in
nbd_request_handler() ratelimited.

When the nbd socket is shutdown, the nbd_request_handler() function emits
an error message for every request remaining in its queue.  If the queue
is large, this will spam a large amount of messages to the log.  There's
no need for a separate error message for each request, so this patch
ratelimits it.

In the specific case this was found, the system was virtual and the error
messages were logged to the serial port, which overwhelmed it.

Fixes: 4d48a542b427 ("nbd: fix I/O hang on disconnected nbds")
Signed-off-by: Dan Streetman <dan.streetman@canonical.com>
Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/block/nbd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 93b3f99b6865..8f1ce6d57a08 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -618,8 +618,8 @@ static void nbd_request_handler(struct request_queue *q)
 			req, req->cmd_type);
 
 		if (unlikely(!nbd->sock)) {
-			dev_err(disk_to_dev(nbd->disk),
-				"Attempted send on closed socket\n");
+			dev_err_ratelimited(disk_to_dev(nbd->disk),
+					    "Attempted send on closed socket\n");
 			req->errors++;
 			nbd_end_request(nbd, req);
 			spin_lock_irq(q->queue_lock);

From 5dd660ee0ebedf9aea7bbf7360584668af3cecce Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 10 Mar 2016 10:45:32 +0300
Subject: [PATCH 655/797] ata: ahci_xgene: dereferencing uninitialized pointer
 in probe

commit 8134233e8d346aaa1c929dc510e75482ae318bce upstream.

If the call to acpi_get_object_info() fails then "info" hasn't been
initialized.  In that situation, we already know that "version" should
be XGENE_AHCI_V1 so we don't actually need to dereference "info".

Fixes: c9802a4be661 ('ata: ahci_xgene: Add AHCI Support for 2nd HW version of APM X-Gene SoC AHCI SATA Host controller.')
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ata/ahci_xgene.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c
index e2c6d9e0c5ac..e916bff6cee8 100644
--- a/drivers/ata/ahci_xgene.c
+++ b/drivers/ata/ahci_xgene.c
@@ -739,9 +739,9 @@ static int xgene_ahci_probe(struct platform_device *pdev)
 				dev_warn(&pdev->dev, "%s: Error reading device info. Assume version1\n",
 					__func__);
 				version = XGENE_AHCI_V1;
-			}
-			if (info->valid & ACPI_VALID_CID)
+			} else if (info->valid & ACPI_VALID_CID) {
 				version = XGENE_AHCI_V2;
+			}
 		}
 	}
 #endif

From c94897e820a85375c4cb7447ac429f6d1550b331 Mon Sep 17 00:00:00 2001
From: Amitkumar Karwar <akarwar@marvell.com>
Date: Tue, 23 Feb 2016 05:16:17 -0800
Subject: [PATCH 656/797] mwifiex: fix corner case association failure

commit a6139b6271f9f95377fe3486aed6120c9142779b upstream.

This patch corrects the error case in association path by returning
-1. Earlier "media_connected" used to remain on in this error case
causing failure for further association attempts.

Signed-off-by: Amitkumar Karwar <akarwar@marvell.com>
Fixes: b887664d882ee4 ('mwifiex: channel switch handling for station')
Signed-off-by: Cathy Luo <cluo@marvell.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/mwifiex/sta_ioctl.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/mwifiex/sta_ioctl.c b/drivers/net/wireless/mwifiex/sta_ioctl.c
index a6c8a4f7bfe9..d6c4f0f60839 100644
--- a/drivers/net/wireless/mwifiex/sta_ioctl.c
+++ b/drivers/net/wireless/mwifiex/sta_ioctl.c
@@ -313,6 +313,7 @@ int mwifiex_bss_start(struct mwifiex_private *priv, struct cfg80211_bss *bss,
 			mwifiex_dbg(adapter, ERROR,
 				    "Attempt to reconnect on csa closed chan(%d)\n",
 				    bss_desc->channel);
+			ret = -1;
 			goto done;
 		}
 

From d65bf4e2407824aecba454c1b0bc7908a5113047 Mon Sep 17 00:00:00 2001
From: Krzysztof Halasa <khalasa@piap.pl>
Date: Fri, 11 Mar 2016 12:32:14 +0100
Subject: [PATCH 657/797] CNS3xxx: Fix PCI cns3xxx_write_config()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 88e9da9a2a70b6f1a171fbf30a681d6bc4031c4d upstream.

The "where" offset was added twice, fix it.

Signed-off-by: Krzysztof Hałasa <khalasa@piap.pl>
Fixes: 498a92d42596 ("ARM: cns3xxx: pci: avoid potential stack overflow")
Signed-off-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-cns3xxx/pcie.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm/mach-cns3xxx/pcie.c b/arch/arm/mach-cns3xxx/pcie.c
index 47905a50e075..318394ed5c7a 100644
--- a/arch/arm/mach-cns3xxx/pcie.c
+++ b/arch/arm/mach-cns3xxx/pcie.c
@@ -220,13 +220,13 @@ static void cns3xxx_write_config(struct cns3xxx_pcie *cnspci,
 	u32 mask = (0x1ull << (size * 8)) - 1;
 	int shift = (where % 4) * 8;
 
-	v = readl_relaxed(base + (where & 0xffc));
+	v = readl_relaxed(base);
 
 	v &= ~(mask << shift);
 	v |= (val & mask) << shift;
 
-	writel_relaxed(v, base + (where & 0xffc));
-	readl_relaxed(base + (where & 0xffc));
+	writel_relaxed(v, base);
+	readl_relaxed(base);
 }
 
 static void __init cns3xxx_pcie_hw_init(struct cns3xxx_pcie *cnspci)

From f0e92143b8e2e6fa1e854385667427011cfe1059 Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko@sntech.de>
Date: Thu, 21 Jan 2016 21:53:09 +0100
Subject: [PATCH 658/797] clk-divider: make sure read-only dividers do not
 write to their register

commit 50359819794b4a16ae35051cd80f2dab025f6019 upstream.

Commit e6d5e7d90be9 ("clk-divider: Fix READ_ONLY when divider > 1") removed
the special ops struct for read-only clocks and instead opted to handle
them inside the regular ops.

On the rk3368 this results in breakage as aclkm now gets set a value.
While it is the same divider value, the A53 core still doesn't like it,
which can result in the cpu ending up in a hang.
The reason being that "ACLKENMasserts one clock cycle before the rising
edge of ACLKM" and the clock should only be touched when STANDBYWFIL2
is asserted.

To fix this, reintroduce the read-only ops but do include the round_rate
callback. That way no writes that may be unsafe are done to the divider
register in any case.

The Rockchip use of the clk_divider_ops is adapted to this split again,
as is the nxp, lpc18xx-ccu driver that was included since the original
commit. On lpc18xx-ccu the divider seems to always be read-only
so only uses the new ops now.

Fixes: e6d5e7d90be9 ("clk-divider: Fix READ_ONLY when divider > 1")
Reported-by: Zhang Qing <zhangqing@rock-chips.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/clk-divider.c         | 11 ++++++++++-
 drivers/clk/nxp/clk-lpc18xx-ccu.c |  2 +-
 drivers/clk/rockchip/clk.c        |  4 +++-
 include/linux/clk-provider.h      |  1 +
 4 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c
index 3ace102a2a0a..bbf206e3da0d 100644
--- a/drivers/clk/clk-divider.c
+++ b/drivers/clk/clk-divider.c
@@ -422,6 +422,12 @@ const struct clk_ops clk_divider_ops = {
 };
 EXPORT_SYMBOL_GPL(clk_divider_ops);
 
+const struct clk_ops clk_divider_ro_ops = {
+	.recalc_rate = clk_divider_recalc_rate,
+	.round_rate = clk_divider_round_rate,
+};
+EXPORT_SYMBOL_GPL(clk_divider_ro_ops);
+
 static struct clk *_register_divider(struct device *dev, const char *name,
 		const char *parent_name, unsigned long flags,
 		void __iomem *reg, u8 shift, u8 width,
@@ -445,7 +451,10 @@ static struct clk *_register_divider(struct device *dev, const char *name,
 		return ERR_PTR(-ENOMEM);
 
 	init.name = name;
-	init.ops = &clk_divider_ops;
+	if (clk_divider_flags & CLK_DIVIDER_READ_ONLY)
+		init.ops = &clk_divider_ro_ops;
+	else
+		init.ops = &clk_divider_ops;
 	init.flags = flags | CLK_IS_BASIC;
 	init.parent_names = (parent_name ? &parent_name: NULL);
 	init.num_parents = (parent_name ? 1 : 0);
diff --git a/drivers/clk/nxp/clk-lpc18xx-ccu.c b/drivers/clk/nxp/clk-lpc18xx-ccu.c
index 13aabbb3acbe..558da89555af 100644
--- a/drivers/clk/nxp/clk-lpc18xx-ccu.c
+++ b/drivers/clk/nxp/clk-lpc18xx-ccu.c
@@ -222,7 +222,7 @@ static void lpc18xx_ccu_register_branch_gate_div(struct lpc18xx_clk_branch *bran
 		div->width = 1;
 
 		div_hw = &div->hw;
-		div_ops = &clk_divider_ops;
+		div_ops = &clk_divider_ro_ops;
 	}
 
 	branch->gate.reg = branch->offset + reg_base;
diff --git a/drivers/clk/rockchip/clk.c b/drivers/clk/rockchip/clk.c
index be6c7fd8315d..e37eee819df9 100644
--- a/drivers/clk/rockchip/clk.c
+++ b/drivers/clk/rockchip/clk.c
@@ -90,7 +90,9 @@ static struct clk *rockchip_clk_register_branch(const char *name,
 		div->width = div_width;
 		div->lock = lock;
 		div->table = div_table;
-		div_ops = &clk_divider_ops;
+		div_ops = (div_flags & CLK_DIVIDER_READ_ONLY)
+						? &clk_divider_ro_ops
+						: &clk_divider_ops;
 	}
 
 	clk = clk_register_composite(NULL, name, parent_names, num_parents,
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index c56988ac63f7..7cd0171963ae 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -384,6 +384,7 @@ struct clk_divider {
 #define CLK_DIVIDER_MAX_AT_ZERO		BIT(6)
 
 extern const struct clk_ops clk_divider_ops;
+extern const struct clk_ops clk_divider_ro_ops;
 
 unsigned long divider_recalc_rate(struct clk_hw *hw, unsigned long parent_rate,
 		unsigned int val, const struct clk_div_table *table,

From e6ce6ce062650eda95bc2b44420a4a7151c42d8a Mon Sep 17 00:00:00 2001
From: Shawn Lin <shawn.lin@rock-chips.com>
Date: Mon, 1 Feb 2016 16:18:40 +0800
Subject: [PATCH 659/797] soc: rockchip: power-domain: fix err handle while
 probing

commit 1d961f11a108af9f7fbe89cc950a8d16ddbdbb28 upstream.

If we fail to probe the driver, we should not directly break
from the for_each_available_child_of_node since it calls of_node_get
while iterating. This patch add of_node_put to fix the unbalanced
call pair.

Fixes: 7c696693a4f5 ("soc: rockchip: power-domain: Add power domain driver")
Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/soc/rockchip/pm_domains.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/soc/rockchip/pm_domains.c b/drivers/soc/rockchip/pm_domains.c
index 534c58937a56..4a65c5bda146 100644
--- a/drivers/soc/rockchip/pm_domains.c
+++ b/drivers/soc/rockchip/pm_domains.c
@@ -419,6 +419,7 @@ static int rockchip_pm_domain_probe(struct platform_device *pdev)
 		if (error) {
 			dev_err(dev, "failed to handle node %s: %d\n",
 				node->name, error);
+			of_node_put(node);
 			goto err_out;
 		}
 	}

From c7ea1f7642d7d77503804086af9b2336621b31e8 Mon Sep 17 00:00:00 2001
From: Shawn Lin <shawn.lin@rock-chips.com>
Date: Tue, 2 Feb 2016 11:37:50 +0800
Subject: [PATCH 660/797] clk: rockchip: free memory in error cases when
 registering clock branches

commit 2467b6745e0ae9c6cdccff24c4cceeb14b1cce3f upstream.

Add free memeory if rockchip_clk_register_branch fails.

Fixes: a245fecbb806 ("clk: rockchip: add basic infrastructure...")
Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/rockchip/clk.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/rockchip/clk.c b/drivers/clk/rockchip/clk.c
index e37eee819df9..9b6c8188efac 100644
--- a/drivers/clk/rockchip/clk.c
+++ b/drivers/clk/rockchip/clk.c
@@ -70,7 +70,7 @@ static struct clk *rockchip_clk_register_branch(const char *name,
 	if (gate_offset >= 0) {
 		gate = kzalloc(sizeof(*gate), GFP_KERNEL);
 		if (!gate)
-			return ERR_PTR(-ENOMEM);
+			goto err_gate;
 
 		gate->flags = gate_flags;
 		gate->reg = base + gate_offset;
@@ -82,7 +82,7 @@ static struct clk *rockchip_clk_register_branch(const char *name,
 	if (div_width > 0) {
 		div = kzalloc(sizeof(*div), GFP_KERNEL);
 		if (!div)
-			return ERR_PTR(-ENOMEM);
+			goto err_div;
 
 		div->flags = div_flags;
 		div->reg = base + muxdiv_offset;
@@ -102,6 +102,11 @@ static struct clk *rockchip_clk_register_branch(const char *name,
 				     flags);
 
 	return clk;
+err_div:
+	kfree(gate);
+err_gate:
+	kfree(mux);
+	return ERR_PTR(-ENOMEM);
 }
 
 static struct clk *rockchip_clk_register_frac_branch(const char *name,

From cf5281ef53856c5c9e4e9b0728805416502e466a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20F=C3=A4rber?= <afaerber@suse.de>
Date: Sun, 7 Feb 2016 22:13:03 +0100
Subject: [PATCH 661/797] clk: meson: Fix meson_clk_register_clks() signature
 type mismatch
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit bb473593c8099302bfd7befc23de67df907e3a99 upstream.

As preparation for arm64 based mesongxbb, which pulls in this code once
enabling ARCH_MESON, fix a size_t vs. unsigned int type mismatch.
The loop uses a local unsigned int variable, so adopt that type,
matching the header.

Fixes: 7a29a869434e ("clk: meson: Add support for Meson clock controller")
Signed-off-by: Andreas Färber <afaerber@suse.de>
Acked-by: Carlo Caione <carlo@endlessm.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/meson/clkc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/meson/clkc.c b/drivers/clk/meson/clkc.c
index c83ae1367abc..d920d410b51d 100644
--- a/drivers/clk/meson/clkc.c
+++ b/drivers/clk/meson/clkc.c
@@ -198,7 +198,7 @@ meson_clk_register_fixed_rate(const struct clk_conf *clk_conf,
 }
 
 void __init meson_clk_register_clks(const struct clk_conf *clk_confs,
-				    size_t nr_confs,
+				    unsigned int nr_confs,
 				    void __iomem *clk_base)
 {
 	unsigned int i;

From faaf496612c39c8ca6d46fec5a6af78b85689f65 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Mon, 22 Feb 2016 11:43:39 +0000
Subject: [PATCH 662/797] clk: qcom: msm8960: fix ce3_core clk enable register

commit 732d6913691848db9fabaa6a25b4d6fad10ddccf upstream.

This patch corrects the enable register offset which is actually 0x36cc
instead of 0x36c4

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Fixes: 5f775498bdc4 ("clk: qcom: Fully support apq8064 global clock control")
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/qcom/gcc-msm8960.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/qcom/gcc-msm8960.c b/drivers/clk/qcom/gcc-msm8960.c
index 66c18bc97857..2c83c03309cb 100644
--- a/drivers/clk/qcom/gcc-msm8960.c
+++ b/drivers/clk/qcom/gcc-msm8960.c
@@ -2769,7 +2769,7 @@ static struct clk_branch ce3_core_clk = {
 	.halt_reg = 0x2fdc,
 	.halt_bit = 5,
 	.clkr = {
-		.enable_reg = 0x36c4,
+		.enable_reg = 0x36cc,
 		.enable_mask = BIT(4),
 		.hw.init = &(struct clk_init_data){
 			.name = "ce3_core_clk",

From 5a9a5671011a3732daf1d300d5205aaae82ee558 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 24 Feb 2016 09:39:11 +0100
Subject: [PATCH 663/797] clk: versatile: sp810: support reentrance

commit ec7957a6aa0aaf981fb8356dc47a2cdd01cde03c upstream.

Despite care take to allocate clocks state containers the
SP810 driver actually just supports creating one instance:
all clocks registered for every instance will end up with the
exact same name and __clk_init() will fail.

Rename the timclken<0> .. timclken<n> to sp810_<instance>_<n>
so every clock on every instance gets a unique name.

This is necessary for the RealView PBA8 which has two SP810
blocks: the second block will not register its clocks unless
every clock on every instance is unique and results in boot
logs like this:

------------[ cut here ]------------
WARNING: CPU: 0 PID: 0 at ../drivers/clk/versatile/clk-sp810.c:137
  clk_sp810_of_setup+0x110/0x154()
Modules linked in:
CPU: 0 PID: 0 Comm: swapper/0 Not tainted
4.5.0-rc2-00030-g352718fc39f6-dirty #225
Hardware name: ARM RealView Machine (Device Tree Support)
[<c00167f8>] (unwind_backtrace) from [<c0013204>]
             (show_stack+0x10/0x14)
[<c0013204>] (show_stack) from [<c01a049c>]
             (dump_stack+0x84/0x9c)
[<c01a049c>] (dump_stack) from [<c0024990>]
             (warn_slowpath_common+0x74/0xb0)
[<c0024990>] (warn_slowpath_common) from [<c0024a68>]
             (warn_slowpath_null+0x1c/0x24)
[<c0024a68>] (warn_slowpath_null) from [<c051eb44>]
             (clk_sp810_of_setup+0x110/0x154)
[<c051eb44>] (clk_sp810_of_setup) from [<c051e3a4>]
             (of_clk_init+0x12c/0x1c8)
[<c051e3a4>] (of_clk_init) from [<c0504714>]
             (time_init+0x20/0x2c)
[<c0504714>] (time_init) from [<c0501b18>]
             (start_kernel+0x244/0x3c4)
[<c0501b18>] (start_kernel) from [<7000807c>] (0x7000807c)
---[ end trace cb88537fdc8fa200 ]---

Cc: Michael Turquette <mturquette@baylibre.com>
Cc: Pawel Moll <pawel.moll@arm.com>
Fixes: 6e973d2c4385 "clk: vexpress: Add separate SP810 driver"
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/versatile/clk-sp810.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/clk/versatile/clk-sp810.c b/drivers/clk/versatile/clk-sp810.c
index a1cdef6b0f90..897c36c1754a 100644
--- a/drivers/clk/versatile/clk-sp810.c
+++ b/drivers/clk/versatile/clk-sp810.c
@@ -92,6 +92,7 @@ static void __init clk_sp810_of_setup(struct device_node *node)
 	int num = ARRAY_SIZE(parent_names);
 	char name[12];
 	struct clk_init_data init;
+	static int instance;
 	int i;
 	bool deprecated;
 
@@ -118,7 +119,7 @@ static void __init clk_sp810_of_setup(struct device_node *node)
 	deprecated = !of_find_property(node, "assigned-clock-parents", NULL);
 
 	for (i = 0; i < ARRAY_SIZE(sp810->timerclken); i++) {
-		snprintf(name, ARRAY_SIZE(name), "timerclken%d", i);
+		snprintf(name, sizeof(name), "sp810_%d_%d", instance, i);
 
 		sp810->timerclken[i].sp810 = sp810;
 		sp810->timerclken[i].channel = i;
@@ -139,5 +140,6 @@ static void __init clk_sp810_of_setup(struct device_node *node)
 	}
 
 	of_clk_add_provider(node, clk_sp810_timerclken_of_get, sp810);
+	instance++;
 }
 CLK_OF_DECLARE(sp810, "arm,sp810", clk_sp810_of_setup);

From 0d50da4683464e150961142341c69ea5a578974a Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Tue, 1 Mar 2016 17:26:48 -0800
Subject: [PATCH 664/797] clk: qcom: msm8960: Fix ce3_src register offset

commit 0f75e1a370fd843c9e508fc1ccf0662833034827 upstream.

The offset seems to have been copied from the sata clk. Fix it so
that enabling the crypto engine source clk works.

Tested-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Tested-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Fixes: 5f775498bdc4 ("clk: qcom: Fully support apq8064 global clock control")
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/qcom/gcc-msm8960.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/qcom/gcc-msm8960.c b/drivers/clk/qcom/gcc-msm8960.c
index 2c83c03309cb..bdc4b2d07a23 100644
--- a/drivers/clk/qcom/gcc-msm8960.c
+++ b/drivers/clk/qcom/gcc-msm8960.c
@@ -2753,7 +2753,7 @@ static struct clk_rcg ce3_src = {
 	},
 	.freq_tbl = clk_tbl_ce3,
 	.clkr = {
-		.enable_reg = 0x2c08,
+		.enable_reg = 0x36c0,
 		.enable_mask = BIT(7),
 		.hw.init = &(struct clk_init_data){
 			.name = "ce3_src",

From 8e9a156140f9fb568ffcbdaaf390862bbfb09d83 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 14 Mar 2016 15:29:44 +0100
Subject: [PATCH 665/797] lpfc: fix misleading indentation

commit aeb6641f8ebdd61939f462a8255b316f9bfab707 upstream.

gcc-6 complains about the indentation of the lpfc_destroy_vport_work_array()
call in lpfc_online(), which clearly doesn't look right:

drivers/scsi/lpfc/lpfc_init.c: In function 'lpfc_online':
drivers/scsi/lpfc/lpfc_init.c:2880:3: warning: statement is indented as if it were guarded by... [-Wmisleading-indentation]
   lpfc_destroy_vport_work_array(phba, vports);
   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/scsi/lpfc/lpfc_init.c:2863:2: note: ...this 'if' clause, but it is not
  if (vports != NULL)
  ^~

Looking at the patch that introduced this code, it's clear that the
behavior is correct and the indentation is wrong.

This fixes the indentation and adds curly braces around the previous
if() block for clarity, as that is most likely what caused the code
to be misindented in the first place.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 549e55cd2a1b ("[SCSI] lpfc 8.2.2 : Fix locking around HBA's port_list")
Reviewed-by: Sebastian Herbszt <herbszt@gmx.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Ewan D. Milne <emilne@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/lpfc/lpfc_init.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index db9446c612da..b0d92b84bcdc 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -2855,7 +2855,7 @@ lpfc_online(struct lpfc_hba *phba)
 	}
 
 	vports = lpfc_create_vport_work_array(phba);
-	if (vports != NULL)
+	if (vports != NULL) {
 		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
 			struct Scsi_Host *shost;
 			shost = lpfc_shost_from_vport(vports[i]);
@@ -2872,7 +2872,8 @@ lpfc_online(struct lpfc_hba *phba)
 			}
 			spin_unlock_irq(shost->host_lock);
 		}
-		lpfc_destroy_vport_work_array(phba, vports);
+	}
+	lpfc_destroy_vport_work_array(phba, vports);
 
 	lpfc_unblock_mgmt_io(phba);
 	return 0;

From ee22885fc1fb49f98bb59e0825cc4ef3b1004f00 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <linux@rempel-privat.de>
Date: Tue, 12 Apr 2016 19:37:44 +0200
Subject: [PATCH 666/797] ath9k: ar5008_hw_cmn_spur_mitigate: add missing
 mask_m & mask_p initialisation

commit de478a61389cacafe94dc8b035081b681b878f9d upstream.

by moving common code to ar5008_hw_cmn_spur_mitigate i forgot to move
mask_m & mask_p initialisation. This coused a performance regression
on ar9281.

Fixes: f911085ffa88 ("ath9k: split ar5008_hw_spur_mitigate and reuse common code in ar9002_hw_spur_mitigate.")
Reported-by: Gustav Frederiksen <lkml2017@openmailbox.org>
Tested-by: Gustav Frederiksen <lkml2017@openmailbox.org>
Signed-off-by: Oleksij Rempel <linux@rempel-privat.de>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ath/ath9k/ar5008_phy.c | 8 +++-----
 drivers/net/wireless/ath/ath9k/ar9002_phy.c | 5 -----
 2 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/ath/ath9k/ar5008_phy.c b/drivers/net/wireless/ath/ath9k/ar5008_phy.c
index 8f8793004b9f..1b271b99c49e 100644
--- a/drivers/net/wireless/ath/ath9k/ar5008_phy.c
+++ b/drivers/net/wireless/ath/ath9k/ar5008_phy.c
@@ -274,6 +274,9 @@ void ar5008_hw_cmn_spur_mitigate(struct ath_hw *ah,
 	};
 	static const int inc[4] = { 0, 100, 0, 0 };
 
+	memset(&mask_m, 0, sizeof(int8_t) * 123);
+	memset(&mask_p, 0, sizeof(int8_t) * 123);
+
 	cur_bin = -6000;
 	upper = bin + 100;
 	lower = bin - 100;
@@ -424,14 +427,9 @@ static void ar5008_hw_spur_mitigate(struct ath_hw *ah,
 	int tmp, new;
 	int i;
 
-	int8_t mask_m[123];
-	int8_t mask_p[123];
 	int cur_bb_spur;
 	bool is2GHz = IS_CHAN_2GHZ(chan);
 
-	memset(&mask_m, 0, sizeof(int8_t) * 123);
-	memset(&mask_p, 0, sizeof(int8_t) * 123);
-
 	for (i = 0; i < AR_EEPROM_MODAL_SPURS; i++) {
 		cur_bb_spur = ah->eep_ops->get_spur_channel(ah, i, is2GHz);
 		if (AR_NO_SPUR == cur_bb_spur)
diff --git a/drivers/net/wireless/ath/ath9k/ar9002_phy.c b/drivers/net/wireless/ath/ath9k/ar9002_phy.c
index db6624527d99..53d7445a5d12 100644
--- a/drivers/net/wireless/ath/ath9k/ar9002_phy.c
+++ b/drivers/net/wireless/ath/ath9k/ar9002_phy.c
@@ -178,14 +178,9 @@ static void ar9002_hw_spur_mitigate(struct ath_hw *ah,
 	int i;
 	struct chan_centers centers;
 
-	int8_t mask_m[123];
-	int8_t mask_p[123];
 	int cur_bb_spur;
 	bool is2GHz = IS_CHAN_2GHZ(chan);
 
-	memset(&mask_m, 0, sizeof(int8_t) * 123);
-	memset(&mask_p, 0, sizeof(int8_t) * 123);
-
 	ath9k_hw_get_channel_centers(ah, chan, &centers);
 	freq = centers.synth_center;
 

From fe9295e05bf878652e8d0e5caef53516d8de1789 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 26 Apr 2016 13:47:08 +0200
Subject: [PATCH 667/797] mac80211: fix statistics leak if dev_alloc_name()
 fails

commit e6436be21e77e3659b4ff7e357ab5a8342d132d2 upstream.

In the case that dev_alloc_name() fails, e.g. because the name was
given by the user and already exists, we need to clean up properly
and free the per-CPU statistics. Fix that.

Fixes: 5a490510ba5f ("mac80211: use per-CPU TX/RX statistics")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/mac80211/iface.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 7a2b7915093b..bcb0a1b64556 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1750,7 +1750,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 
 		ret = dev_alloc_name(ndev, ndev->name);
 		if (ret < 0) {
-			free_netdev(ndev);
+			ieee80211_if_free(ndev);
 			return ret;
 		}
 
@@ -1836,7 +1836,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 
 		ret = register_netdevice(ndev);
 		if (ret) {
-			free_netdev(ndev);
+			ieee80211_if_free(ndev);
 			return ret;
 		}
 	}

From 8d2923930be15a5b295ace2029c76653dc4def13 Mon Sep 17 00:00:00 2001
From: Chunyu Hu <chuhu@redhat.com>
Date: Tue, 3 May 2016 19:34:34 +0800
Subject: [PATCH 668/797] tracing: Don't display trigger file for events that
 can't be enabled

commit 854145e0a8e9a05f7366d240e2f99d9c1ca6d6dd upstream.

Currently register functions for events will be called
through the 'reg' field of event class directly without
any check when seting up triggers.

Triggers for events that don't support register through
debug fs (events under events/ftrace are for trace-cmd to
read event format, and most of them don't have a register
function except events/ftrace/functionx) can't be enabled
at all, and an oops will be hit when setting up trigger
for those events, so just not creating them is an easy way
to avoid the oops.

Link: http://lkml.kernel.org/r/1462275274-3911-1-git-send-email-chuhu@redhat.com

Fixes: 85f2b08268c01 ("tracing: Add basic event trigger framework")
Signed-off-by: Chunyu Hu <chuhu@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/trace/trace_events.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index d202d991edae..996f0fd34312 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2107,8 +2107,13 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file)
 	trace_create_file("filter", 0644, file->dir, file,
 			  &ftrace_event_filter_fops);
 
-	trace_create_file("trigger", 0644, file->dir, file,
-			  &event_trigger_fops);
+	/*
+	 * Only event directories that can be enabled should have
+	 * triggers.
+	 */
+	if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
+		trace_create_file("trigger", 0644, file->dir, file,
+				  &event_trigger_fops);
 
 	trace_create_file("format", 0444, file->dir, call,
 			  &ftrace_event_format_fops);

From f3b51a03bea6dc4cda740481d48b2ff49abdced5 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Mon, 25 Apr 2016 16:52:38 -0700
Subject: [PATCH 669/797] MD: make bio mergeable

commit 9c573de3283af007ea11c17bde1e4568d9417328 upstream.

blk_queue_split marks bio unmergeable, which makes sense for normal bio.
But if dispatching the bio to underlayer disk, the blk_queue_split
checks are invalid, hence it's possible the bio becomes mergeable.

In the reported bug, this bug causes trim against raid0 performance slash
https://bugzilla.kernel.org/show_bug.cgi?id=117051

Reported-and-tested-by: Park Ju Hyung <qkrwngud825@gmail.com>
Fixes: 6ac45aeb6bca(block: avoid to merge splitted bio)
Cc: Ming Lei <ming.lei@canonical.com>
Cc: Neil Brown <neilb@suse.de>
Reviewed-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/md.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index b1e1f6b95782..c57fdf847b47 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -293,6 +293,8 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
 	 * go away inside make_request
 	 */
 	sectors = bio_sectors(bio);
+	/* bio could be mergeable after passing to underlayer */
+	bio->bi_rw &= ~REQ_NOMERGE;
 	mddev->pers->make_request(mddev, bio);
 
 	cpu = part_stat_lock();

From fe21a25e8c0cc97a080cc73c135e92ddce61a660 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 2 May 2016 12:46:42 -0700
Subject: [PATCH 670/797] Minimal fix-up of bad hashing behavior of hash_64()

commit 689de1d6ca95b3b5bd8ee446863bf81a4883ea25 upstream.

This is a fairly minimal fixup to the horribly bad behavior of hash_64()
with certain input patterns.

In particular, because the multiplicative value used for the 64-bit hash
was intentionally bit-sparse (so that the multiply could be done with
shifts and adds on architectures without hardware multipliers), some
bits did not get spread out very much.  In particular, certain fairly
common bit ranges in the input (roughly bits 12-20: commonly with the
most information in them when you hash things like byte offsets in files
or memory that have block factors that mean that the low bits are often
zero) would not necessarily show up much in the result.

There's a bigger patch-series brewing to fix up things more completely,
but this is the fairly minimal fix for the 64-bit hashing problem.  It
simply picks a much better constant multiplier, spreading the bits out a
lot better.

NOTE! For 32-bit architectures, the bad old hash_64() remains the same
for now, since 64-bit multiplies are expensive.  The bigger hashing
cleanup will replace the 32-bit case with something better.

The new constants were picked by George Spelvin who wrote that bigger
cleanup series.  I just picked out the constants and part of the comment
from that series.

Cc: George Spelvin <linux@horizon.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hash.h | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/include/linux/hash.h b/include/linux/hash.h
index 1afde47e1528..79c52fa81cac 100644
--- a/include/linux/hash.h
+++ b/include/linux/hash.h
@@ -32,12 +32,28 @@
 #error Wordsize not 32 or 64
 #endif
 
+/*
+ * The above primes are actively bad for hashing, since they are
+ * too sparse. The 32-bit one is mostly ok, the 64-bit one causes
+ * real problems. Besides, the "prime" part is pointless for the
+ * multiplicative hash.
+ *
+ * Although a random odd number will do, it turns out that the golden
+ * ratio phi = (sqrt(5)-1)/2, or its negative, has particularly nice
+ * properties.
+ *
+ * These are the negative, (1 - phi) = (phi^2) = (3 - sqrt(5))/2.
+ * (See Knuth vol 3, section 6.4, exercise 9.)
+ */
+#define GOLDEN_RATIO_32 0x61C88647
+#define GOLDEN_RATIO_64 0x61C8864680B583EBull
+
 static __always_inline u64 hash_64(u64 val, unsigned int bits)
 {
 	u64 hash = val;
 
-#if defined(CONFIG_ARCH_HAS_FAST_MULTIPLIER) && BITS_PER_LONG == 64
-	hash = hash * GOLDEN_RATIO_PRIME_64;
+#if BITS_PER_LONG == 64
+	hash = hash * GOLDEN_RATIO_64;
 #else
 	/*  Sigh, gcc can't optimise this alone like it does for 32 bits. */
 	u64 n = hash;

From d27e2ddc40b632db8f84c4d3236e8191d0eabc69 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 5 May 2016 16:22:15 -0700
Subject: [PATCH 671/797] mm, cma: prevent nr_isolated_* counters from going
 negative

commit 14af4a5e9b26ad251f81c174e8a43f3e179434a5 upstream.

/proc/sys/vm/stat_refresh warns nr_isolated_anon and nr_isolated_file go
increasingly negative under compaction: which would add delay when
should be none, or no delay when should delay.  The bug in compaction
was due to a recent mmotm patch, but much older instance of the bug was
also noticed in isolate_migratepages_range() which is used for CMA and
gigantic hugepage allocations.

The bug is caused by putback_movable_pages() in an error path
decrementing the isolated counters without them being previously
incremented by acct_isolated().  Fix isolate_migratepages_range() by
removing the error-path putback, thus reaching acct_isolated() with
migratepages still isolated, and leaving putback to caller like most
other places do.

Fixes: edc2ca612496 ("mm, compaction: move pageblock checks up from isolate_migratepages_range()")
[vbabka@suse.cz: expanded the changelog]
Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/compaction.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/mm/compaction.c b/mm/compaction.c
index de3e1e71cd9f..7881e072dc33 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -880,16 +880,8 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn,
 		pfn = isolate_migratepages_block(cc, pfn, block_end_pfn,
 							ISOLATE_UNEVICTABLE);
 
-		/*
-		 * In case of fatal failure, release everything that might
-		 * have been isolated in the previous iteration, and signal
-		 * the failure back to caller.
-		 */
-		if (!pfn) {
-			putback_movable_pages(&cc->migratepages);
-			cc->nr_migratepages = 0;
+		if (!pfn)
 			break;
-		}
 
 		if (cc->nr_migratepages == COMPACT_CLUSTER_MAX)
 			break;

From 851375cc493de34a1443d85c46b026d8aeda715a Mon Sep 17 00:00:00 2001
From: Dan Streetman <ddstreet@ieee.org>
Date: Thu, 5 May 2016 16:22:23 -0700
Subject: [PATCH 672/797] mm/zswap: provide unique zpool name

commit 32a4e169039927bfb6ee9f0ccbbe3a8aaf13a4bc upstream.

Instead of using "zswap" as the name for all zpools created, add an
atomic counter and use "zswap%x" with the counter number for each zpool
created, to provide a unique name for each new zpool.

As zsmalloc, one of the zpool implementations, requires/expects a unique
name for each pool created, zswap should provide a unique name.  The
zsmalloc pool creation does not fail if a new pool with a conflicting
name is created, unless CONFIG_ZSMALLOC_STAT is enabled; in that case,
zsmalloc pool creation fails with -ENOMEM.  Then zswap will be unable to
change its compressor parameter if its zpool is zsmalloc; it also will
be unable to change its zpool parameter back to zsmalloc, if it has any
existing old zpool using zsmalloc with page(s) in it.  Attempts to
change the parameters will result in failure to create the zpool.  This
changes zswap to provide a unique name for each zpool creation.

Fixes: f1c54846ee45 ("zswap: dynamic pool creation")
Signed-off-by: Dan Streetman <ddstreet@ieee.org>
Reported-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Dan Streetman <dan.streetman@canonical.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/zswap.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index bf14508afd64..340261946fda 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -170,6 +170,8 @@ static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
 static LIST_HEAD(zswap_pools);
 /* protects zswap_pools list modification */
 static DEFINE_SPINLOCK(zswap_pools_lock);
+/* pool counter to provide unique names to zpool */
+static atomic_t zswap_pools_count = ATOMIC_INIT(0);
 
 /* used by param callback function */
 static bool zswap_init_started;
@@ -565,6 +567,7 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
 static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
 {
 	struct zswap_pool *pool;
+	char name[38]; /* 'zswap' + 32 char (max) num + \0 */
 	gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
 
 	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
@@ -573,7 +576,10 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
 		return NULL;
 	}
 
-	pool->zpool = zpool_create_pool(type, "zswap", gfp, &zswap_zpool_ops);
+	/* unique name for each pool specifically required by zsmalloc */
+	snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count));
+
+	pool->zpool = zpool_create_pool(type, name, gfp, &zswap_zpool_ops);
 	if (!pool->zpool) {
 		pr_err("%s zpool not available\n", type);
 		goto error;

From 63e9a60f4357e700a181980d424ffeaff32d5340 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Fri, 22 Apr 2016 09:26:52 +0200
Subject: [PATCH 673/797] ARM: EXYNOS: Properly skip unitialized parent clock
 in power domain on

commit a0a966b83873f33778710a4fc59240244b0734a5 upstream.

We want to skip reparenting a clock on turning on power domain, if we
do not have the parent yet. The parent is obtained when turning the
domain off. However due to a typo, the loop is continued on IS_ERR() of
clock being reparented, not on the IS_ERR() of the parent.

Theoretically this could lead to OOPS on first turn on of a power
domain, if there was no turn off before. Practically that should never
happen because all power domains are turned on by default (reset value,
bootloader does not turn off them usually) so the first action will be
always turn off.

Fixes: 29e5eea06bc1 ("ARM: EXYNOS: Get current parent clock for power domain on/off")
Reported-by: Vladimir Zapolskiy <vz@mleia.com>
Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-exynos/pm_domains.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mach-exynos/pm_domains.c b/arch/arm/mach-exynos/pm_domains.c
index 7c21760f590f..875a2bab64f6 100644
--- a/arch/arm/mach-exynos/pm_domains.c
+++ b/arch/arm/mach-exynos/pm_domains.c
@@ -92,7 +92,7 @@ static int exynos_pd_power(struct generic_pm_domain *domain, bool power_on)
 			if (IS_ERR(pd->clk[i]))
 				break;
 
-			if (IS_ERR(pd->clk[i]))
+			if (IS_ERR(pd->pclk[i]))
 				continue; /* Skip on first power up */
 			if (clk_set_parent(pd->clk[i], pd->pclk[i]))
 				pr_err("%s: error setting parent to clock%d\n",

From dd86efc570e528de7931f9eb039fa0c91bb3c1bd Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Wed, 20 Apr 2016 13:34:31 +0000
Subject: [PATCH 674/797] ARM: SoCFPGA: Fix secondary CPU startup in thumb2
 kernel

commit 5616f36713ea77f57ae908bf2fef641364403c9f upstream.

The secondary CPU starts up in ARM mode. When the kernel is compiled in
thumb2 mode we have to explicitly compile the secondary startup
trampoline in ARM mode, otherwise the CPU will go to Nirvana.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Reported-by: Steffen Trumtrar <s.trumtrar@pengutronix.de>
Suggested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Dinh Nguyen <dinguyen@opensource.altera.com>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-socfpga/headsmp.S | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/mach-socfpga/headsmp.S b/arch/arm/mach-socfpga/headsmp.S
index 5d94b7a2fb10..c160fa3007e9 100644
--- a/arch/arm/mach-socfpga/headsmp.S
+++ b/arch/arm/mach-socfpga/headsmp.S
@@ -13,6 +13,7 @@
 #include <asm/assembler.h>
 
 	.arch	armv7-a
+	.arm
 
 ENTRY(secondary_trampoline)
 	/* CPU1 will always fetch from 0x0 when it is brought out of reset.

From 0f7ea0699ac02fb7c5d67e8eac8f8581912f4988 Mon Sep 17 00:00:00 2001
From: Ross Lagerwall <ross.lagerwall@citrix.com>
Date: Thu, 17 Mar 2016 16:51:59 +0000
Subject: [PATCH 675/797] xen: Fix page <-> pfn conversion on 32 bit systems

commit 60901df3aed230d4565dca003f11b6a95fbf30d9 upstream.

Commit 1084b1988d22dc165c9dbbc2b0e057f9248ac4db (xen: Add Xen specific
page definition) caused a regression in 4.4.

The xen functions to convert between pages and pfns fail due to an
overflow on systems where a physical address may not fit in an
unsigned long (e.g. x86 32 bit PAE systems). Rework the conversion to
avoid overflow. This should also result in simpler object code.

This bug manifested itself as disk corruption with Linux 4.4 when
using blkfront in a Xen HVM x86 32 bit guest with more than 4 GiB of
memory.

Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/xen/page.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/xen/page.h b/include/xen/page.h
index 96294ac93755..9dc46cb8a0fd 100644
--- a/include/xen/page.h
+++ b/include/xen/page.h
@@ -15,9 +15,9 @@
  */
 
 #define xen_pfn_to_page(xen_pfn)	\
-	((pfn_to_page(((unsigned long)(xen_pfn) << XEN_PAGE_SHIFT) >> PAGE_SHIFT)))
+	(pfn_to_page((unsigned long)(xen_pfn) >> (PAGE_SHIFT - XEN_PAGE_SHIFT)))
 #define page_to_xen_pfn(page)		\
-	(((page_to_pfn(page)) << PAGE_SHIFT) >> XEN_PAGE_SHIFT)
+	((page_to_pfn(page)) << (PAGE_SHIFT - XEN_PAGE_SHIFT))
 
 #define XEN_PFN_PER_PAGE	(PAGE_SIZE / XEN_PAGE_SIZE)
 

From 58d378b24b7f615b779ec20e7b7eebed8a3d8011 Mon Sep 17 00:00:00 2001
From: Ross Lagerwall <ross.lagerwall@citrix.com>
Date: Thu, 17 Mar 2016 16:52:00 +0000
Subject: [PATCH 676/797] xen/balloon: Fix crash when ballooning on x86 32 bit
 PAE

commit dfd74a1edfaba5864276a2859190a8d242d18952 upstream.

Commit 55b3da98a40dbb3776f7454daf0d95dde25c33d2 (xen/balloon: find
non-conflicting regions to place hotplugged memory) caused a
regression in 4.4.

When ballooning on an x86 32 bit PAE system with close to 64 GiB of
memory, the address returned by allocate_resource may be above 64 GiB.
When using CONFIG_SPARSEMEM, this setup is limited to using physical
addresses < 64 GiB.  When adding memory at this address, it runs off
the end of the mem_section array and causes a crash.  Instead, fail
the ballooning request.

Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/xen/balloon.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 12eab503efd1..364bc44610c1 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -152,6 +152,8 @@ static DECLARE_WAIT_QUEUE_HEAD(balloon_wq);
 static void balloon_process(struct work_struct *work);
 static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
 
+static void release_memory_resource(struct resource *resource);
+
 /* When ballooning out (allocating memory to return to Xen) we don't really
    want the kernel to try too hard since that can trigger the oom killer. */
 #define GFP_BALLOON \
@@ -268,6 +270,20 @@ static struct resource *additional_memory_resource(phys_addr_t size)
 		return NULL;
 	}
 
+#ifdef CONFIG_SPARSEMEM
+	{
+		unsigned long limit = 1UL << (MAX_PHYSMEM_BITS - PAGE_SHIFT);
+		unsigned long pfn = res->start >> PAGE_SHIFT;
+
+		if (pfn > limit) {
+			pr_err("New System RAM resource outside addressable RAM (%lu > %lu)\n",
+			       pfn, limit);
+			release_memory_resource(res);
+			return NULL;
+		}
+	}
+#endif
+
 	return res;
 }
 

From 11dc8042c691244a085c16396fce4a3a3e9bc186 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Wed, 4 May 2016 07:02:36 -0600
Subject: [PATCH 677/797] xen/evtchn: fix ring resize when binding new events

commit 27e0e6385377c4dc68a4ddaf1a35a2dfa951f3c5 upstream.

The copying of ring data was wrong for two cases: For a full ring
nothing got copied at all (as in that case the canonicalized producer
and consumer indexes are identical). And in case one or both of the
canonicalized (after the resize) indexes would point into the second
half of the buffer, the copied data ended up in the wrong (free) part
of the new buffer. In both cases uninitialized data would get passed
back to the caller.

Fix this by simply copying the old ring contents twice: Once to the
low half of the new buffer, and a second time to the high half.

This addresses the inability to boot a HVM guest with 64 or more
vCPUs.  This regression was caused by 8620015499101090 (xen/evtchn:
dynamically grow pending event channel ring).

Reported-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/xen/evtchn.c | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index 38272ad24551..f4edd6df3df2 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -316,7 +316,6 @@ static int evtchn_resize_ring(struct per_user_data *u)
 {
 	unsigned int new_size;
 	evtchn_port_t *new_ring, *old_ring;
-	unsigned int p, c;
 
 	/*
 	 * Ensure the ring is large enough to capture all possible
@@ -346,20 +345,17 @@ static int evtchn_resize_ring(struct per_user_data *u)
 	/*
 	 * Copy the old ring contents to the new ring.
 	 *
-	 * If the ring contents crosses the end of the current ring,
-	 * it needs to be copied in two chunks.
+	 * To take care of wrapping, a full ring, and the new index
+	 * pointing into the second half, simply copy the old contents
+	 * twice.
 	 *
 	 * +---------+    +------------------+
-	 * |34567  12| -> |       1234567    |
-	 * +-----p-c-+    +------------------+
+	 * |34567  12| -> |34567  1234567  12|
+	 * +-----p-c-+    +-------c------p---+
 	 */
-	p = evtchn_ring_offset(u, u->ring_prod);
-	c = evtchn_ring_offset(u, u->ring_cons);
-	if (p < c) {
-		memcpy(new_ring + c, u->ring + c, (u->ring_size - c) * sizeof(*u->ring));
-		memcpy(new_ring + u->ring_size, u->ring, p * sizeof(*u->ring));
-	} else
-		memcpy(new_ring + c, u->ring + c, (p - c) * sizeof(*u->ring));
+	memcpy(new_ring, old_ring, u->ring_size * sizeof(*u->ring));
+	memcpy(new_ring + u->ring_size, old_ring,
+	       u->ring_size * sizeof(*u->ring));
 
 	u->ring = new_ring;
 	u->ring_size = new_size;

From bba1e81824e04c76c14fe614b3d440b1a4d65db9 Mon Sep 17 00:00:00 2001
From: Ping Cheng <pinglinux@gmail.com>
Date: Tue, 12 Apr 2016 13:37:45 -0700
Subject: [PATCH 678/797] HID: wacom: Add support for DTK-1651

commit e1123fe975852cc0970b4e53ea65ca917e54c923 upstream.

DTK-1651 is a display pen-only tablet

Signed-off-by: Ping Cheng <pingc@wacom.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hid/wacom_wac.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 3c0f47ac8e53..5c02d7bbc7f2 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -3449,6 +3449,10 @@ static const struct wacom_features wacom_features_0x33E =
 	{ "Wacom Intuos PT M 2", 21600, 13500, 2047, 63,
 	  INTUOSHT2, WACOM_INTUOS_RES, WACOM_INTUOS_RES, .touch_max = 16,
 	  .check_for_hid_type = true, .hid_type = HID_TYPE_USBNONE };
+static const struct wacom_features wacom_features_0x343 =
+	{ "Wacom DTK1651", 34616, 19559, 1023, 0,
+	  DTUS, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 4,
+	  WACOM_DTU_OFFSET, WACOM_DTU_OFFSET };
 
 static const struct wacom_features wacom_features_HID_ANY_ID =
 	{ "Wacom HID", .type = HID_GENERIC };
@@ -3614,6 +3618,7 @@ const struct hid_device_id wacom_ids[] = {
 	{ USB_DEVICE_WACOM(0x33C) },
 	{ USB_DEVICE_WACOM(0x33D) },
 	{ USB_DEVICE_WACOM(0x33E) },
+	{ USB_DEVICE_WACOM(0x343) },
 	{ USB_DEVICE_WACOM(0x4001) },
 	{ USB_DEVICE_WACOM(0x4004) },
 	{ USB_DEVICE_WACOM(0x5000) },

From 5844e4cdacc5e002dfceb2872352af20cff40742 Mon Sep 17 00:00:00 2001
From: Nazar Mokrynskyi <nazar@mokrynskyi.com>
Date: Mon, 25 Apr 2016 17:01:56 +0300
Subject: [PATCH 679/797] HID: Fix boot delay for Creative SB Omni Surround 5.1
 with quirk

commit 567a44ecb44eb2584ddb93e962cfb133ce77e0bb upstream.

Needed for v2 of the device firmware, otherwise kernel will stuck for few
seconds and throw "usb_submit_urb(ctrl) failed: -1" early on system boot.

Signed-off-by: Nazar Mokrynskyi <nazar@mokrynskyi.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hid/hid-ids.h           | 1 +
 drivers/hid/usbhid/hid-quirks.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 8b78a7f1f779..909ab0176ef2 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -255,6 +255,7 @@
 #define USB_DEVICE_ID_CORSAIR_K90	0x1b02
 
 #define USB_VENDOR_ID_CREATIVELABS	0x041e
+#define USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51	0x322c
 #define USB_DEVICE_ID_PRODIKEYS_PCMIDI	0x2801
 
 #define USB_VENDOR_ID_CVTOUCH		0x1ff7
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index 7dd0953cd70f..dc8e6adf95a4 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -70,6 +70,7 @@ static const struct hid_blacklist {
 	{ USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_3AXIS_5BUTTON_STICK, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_AXIS_295, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE, HID_QUIRK_ALWAYS_POLL },
+	{ USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_WIIU, HID_QUIRK_MULTI_INPUT },
 	{ USB_VENDOR_ID_ELAN, HID_ANY_ID, HID_QUIRK_ALWAYS_POLL },

From 5fd407ad088227ee030e93246cffe757541483f0 Mon Sep 17 00:00:00 2001
From: Knut Wohlrab <Knut.Wohlrab@de.bosch.com>
Date: Mon, 25 Apr 2016 14:08:25 -0700
Subject: [PATCH 680/797] Input: zforce_ts - fix dual touch recognition

commit 6984ab1ab35f422292b7781c65284038bcc0f6a6 upstream.

A wrong decoding of the touch coordinate message causes a wrong touch
ID. Touch ID for dual touch must be 0 or 1.

According to the actual Neonode nine byte touch coordinate coding,
the state is transported in the lower nibble and the touch ID in
the higher nibble of payload byte five.

Signed-off-by: Knut Wohlrab <Knut.Wohlrab@de.bosch.com>
Signed-off-by: Oleksij Rempel <linux@rempel-privat.de>
Signed-off-by: Dirk Behme <dirk.behme@de.bosch.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/touchscreen/zforce_ts.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/input/touchscreen/zforce_ts.c b/drivers/input/touchscreen/zforce_ts.c
index 9bbadaaf6bc3..7b3845aa5983 100644
--- a/drivers/input/touchscreen/zforce_ts.c
+++ b/drivers/input/touchscreen/zforce_ts.c
@@ -370,8 +370,8 @@ static int zforce_touch_event(struct zforce_ts *ts, u8 *payload)
 			point.coord_x = point.coord_y = 0;
 		}
 
-		point.state = payload[9 * i + 5] & 0x03;
-		point.id = (payload[9 * i + 5] & 0xfc) >> 2;
+		point.state = payload[9 * i + 5] & 0x0f;
+		point.id = (payload[9 * i + 5] & 0xf0) >> 4;
 
 		/* determine touch major, minor and orientation */
 		point.area_major = max(payload[9 * i + 6],

From 898149d10b855a0d0a5a9f8f05e4359970919eb9 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Thu, 5 May 2016 16:22:26 -0700
Subject: [PATCH 681/797] proc: prevent accessing /proc/<PID>/environ until
 it's ready

commit 8148a73c9901a8794a50f950083c00ccf97d43b3 upstream.

If /proc/<PID>/environ gets read before the envp[] array is fully set up
in create_{aout,elf,elf_fdpic,flat}_tables(), we might end up trying to
read more bytes than are actually written, as env_start will already be
set but env_end will still be zero, making the range calculation
underflow, allowing to read beyond the end of what has been written.

Fix this as it is done for /proc/<PID>/cmdline by testing env_end for
zero.  It is, apparently, intentionally set last in create_*_tables().

This bug was found by the PaX size_overflow plugin that detected the
arithmetic underflow of 'this_len = env_end - (env_start + src)' when
env_end is still zero.

The expected consequence is that userland trying to access
/proc/<PID>/environ of a not yet fully set up process may get
inconsistent data as we're in the middle of copying in the environment
variables.

Fixes: https://forums.grsecurity.net/viewtopic.php?f=3&t=4363
Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=116461
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Emese Revfy <re.emese@gmail.com>
Cc: Pax Team <pageexec@freemail.hu>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Mateusz Guzik <mguzik@redhat.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/proc/base.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index b7de324bec11..e8bbf6cdb437 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -954,7 +954,8 @@ static ssize_t environ_read(struct file *file, char __user *buf,
 	int ret = 0;
 	struct mm_struct *mm = file->private_data;
 
-	if (!mm)
+	/* Ensure the process spawned far enough to have an environment. */
+	if (!mm || !mm->env_end)
 		return 0;
 
 	page = (char *)__get_free_page(GFP_TEMPORARY);

From 24b8a175a66946ccb4ca227df52f517e1d8f5ef6 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@akamai.com>
Date: Thu, 5 May 2016 16:22:12 -0700
Subject: [PATCH 682/797] mm: update min_free_kbytes from khugepaged after core
 initialization

commit bc22af74f271ef76b2e6f72f3941f91f0da3f5f8 upstream.

Khugepaged attempts to raise min_free_kbytes if its set too low.
However, on boot khugepaged sets min_free_kbytes first from
subsys_initcall(), and then the mm 'core' over-rides min_free_kbytes
after from init_per_zone_wmark_min(), via a module_init() call.

Khugepaged used to use a late_initcall() to set min_free_kbytes (such
that it occurred after the core initialization), however this was
removed when the initialization of min_free_kbytes was integrated into
the starting of the khugepaged thread.

The fix here is simply to invoke the core initialization using a
core_initcall() instead of module_init(), such that the previous
initialization ordering is restored.  I didn't restore the
late_initcall() since start_stop_khugepaged() already sets
min_free_kbytes via set_recommended_min_free_kbytes().

This was noticed when we had a number of page allocation failures when
moving a workload to a kernel with this new initialization ordering.  On
an 8GB system this restores min_free_kbytes back to 67584 from 11365
when CONFIG_TRANSPARENT_HUGEPAGE=y is set and either
CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y or
CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y.

Fixes: 79553da293d3 ("thp: cleanup khugepaged startup")
Signed-off-by: Jason Baron <jbaron@akamai.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/page_alloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c69531afbd8f..6cf5cadeaef7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6193,7 +6193,7 @@ int __meminit init_per_zone_wmark_min(void)
 	setup_per_zone_inactive_ratio();
 	return 0;
 }
-module_init(init_per_zone_wmark_min)
+core_initcall(init_per_zone_wmark_min)
 
 /*
  * min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so

From f9d46494887e1494c3a6b40434ab425f74b15148 Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <a@unstable.cc>
Date: Sat, 12 Mar 2016 11:12:59 +0100
Subject: [PATCH 683/797] batman-adv: fix DAT candidate selection (must use
 vid)

commit 2871734e85e920503d49b3a8bc0afbe0773b6036 upstream.

Now that DAT is VLAN aware, it must use the VID when
computing the DHT address of the candidate nodes where
an entry is going to be stored/retrieved.

Fixes: be1db4f6615b ("batman-adv: make the Distributed ARP Table vlan aware")
Signed-off-by: Antonio Quartulli <a@unstable.cc>
[sven@narfation.org: fix conflicts with current version]
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/batman-adv/distributed-arp-table.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index a49c705fb86b..5f19133c5530 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -553,6 +553,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
  * be sent to
  * @bat_priv: the bat priv with all the soft interface information
  * @ip_dst: ipv4 to look up in the DHT
+ * @vid: VLAN identifier
  *
  * An originator O is selected if and only if its DHT_ID value is one of three
  * closest values (from the LEFT, with wrap around if needed) then the hash
@@ -561,7 +562,8 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
  * Returns the candidate array of size BATADV_DAT_CANDIDATE_NUM.
  */
 static struct batadv_dat_candidate *
-batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
+batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst,
+			     unsigned short vid)
 {
 	int select;
 	batadv_dat_addr_t last_max = BATADV_DAT_ADDR_MAX, ip_key;
@@ -577,7 +579,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
 		return NULL;
 
 	dat.ip = ip_dst;
-	dat.vid = 0;
+	dat.vid = vid;
 	ip_key = (batadv_dat_addr_t)batadv_hash_dat(&dat,
 						    BATADV_DAT_ADDR_MAX);
 
@@ -597,6 +599,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
  * @bat_priv: the bat priv with all the soft interface information
  * @skb: payload to send
  * @ip: the DHT key
+ * @vid: VLAN identifier
  * @packet_subtype: unicast4addr packet subtype to use
  *
  * This function copies the skb with pskb_copy() and is sent as unicast packet
@@ -607,7 +610,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
  */
 static bool batadv_dat_send_data(struct batadv_priv *bat_priv,
 				 struct sk_buff *skb, __be32 ip,
-				 int packet_subtype)
+				 unsigned short vid, int packet_subtype)
 {
 	int i;
 	bool ret = false;
@@ -616,7 +619,7 @@ static bool batadv_dat_send_data(struct batadv_priv *bat_priv,
 	struct sk_buff *tmp_skb;
 	struct batadv_dat_candidate *cand;
 
-	cand = batadv_dat_select_candidates(bat_priv, ip);
+	cand = batadv_dat_select_candidates(bat_priv, ip, vid);
 	if (!cand)
 		goto out;
 
@@ -1004,7 +1007,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
 		ret = true;
 	} else {
 		/* Send the request to the DHT */
-		ret = batadv_dat_send_data(bat_priv, skb, ip_dst,
+		ret = batadv_dat_send_data(bat_priv, skb, ip_dst, vid,
 					   BATADV_P_DAT_DHT_GET);
 	}
 out:
@@ -1132,8 +1135,8 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
 	/* Send the ARP reply to the candidates for both the IP addresses that
 	 * the node obtained from the ARP reply
 	 */
-	batadv_dat_send_data(bat_priv, skb, ip_src, BATADV_P_DAT_DHT_PUT);
-	batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_PUT);
+	batadv_dat_send_data(bat_priv, skb, ip_src, vid, BATADV_P_DAT_DHT_PUT);
+	batadv_dat_send_data(bat_priv, skb, ip_dst, vid, BATADV_P_DAT_DHT_PUT);
 }
 
 /**

From c6865db3a49a8f80052489fc6e1848df56f12ade Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 26 Feb 2016 17:56:13 +0100
Subject: [PATCH 684/797] batman-adv: Check skb size before using encapsulated
 ETH+VLAN header

commit c78296665c3d81f040117432ab9e1cb125521b0c upstream.

The encapsulated ethernet and VLAN header may be outside the received
ethernet frame. Thus the skb buffer size has to be checked before it can be
parsed to find out if it encapsulates another batman-adv packet.

Fixes: 420193573f11 ("batman-adv: softif bridge loop avoidance")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Antonio Quartulli <a@unstable.cc>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/batman-adv/soft-interface.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index ac4d08de5df4..720f1a5b81ac 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -407,11 +407,17 @@ void batadv_interface_rx(struct net_device *soft_iface,
 	 */
 	nf_reset(skb);
 
+	if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
+		goto dropped;
+
 	vid = batadv_get_vid(skb, 0);
 	ethhdr = eth_hdr(skb);
 
 	switch (ntohs(ethhdr->h_proto)) {
 	case ETH_P_8021Q:
+		if (!pskb_may_pull(skb, VLAN_ETH_HLEN))
+			goto dropped;
+
 		vhdr = (struct vlan_ethhdr *)skb->data;
 
 		if (vhdr->h_vlan_encapsulated_proto != ethertype)
@@ -423,8 +429,6 @@ void batadv_interface_rx(struct net_device *soft_iface,
 	}
 
 	/* skb->dev & skb->pkt_type are set here */
-	if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
-		goto dropped;
 	skb->protocol = eth_type_trans(skb, soft_iface);
 
 	/* should not be necessary anymore as we use skb_pull_rcsum()

From e426a835c1da093b2804825bfb4bfd503492e543 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
Date: Fri, 11 Mar 2016 14:04:49 +0100
Subject: [PATCH 685/797] batman-adv: Fix broadcast/ogm queue limit on a
 removed interface
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit c4fdb6cff2aa0ae740c5f19b6f745cbbe786d42f upstream.

When removing a single interface while a broadcast or ogm packet is
still pending then we will free the forward packet without releasing the
queue slots again.

This patch is supposed to fix this issue.

Fixes: 6d5808d4ae1b ("batman-adv: Add missing hardif_free_ref in forw_packet_free")
Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
[sven@narfation.org: fix conflicts with current version]
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Antonio Quartulli <a@unstable.cc>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/batman-adv/send.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index f664324805eb..0e0c3b8ed927 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -630,6 +630,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
 
 		if (pending) {
 			hlist_del(&forw_packet->list);
+			if (!forw_packet->own)
+				atomic_inc(&bat_priv->bcast_queue_left);
+
 			batadv_forw_packet_free(forw_packet);
 		}
 	}
@@ -657,6 +660,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
 
 		if (pending) {
 			hlist_del(&forw_packet->list);
+			if (!forw_packet->own)
+				atomic_inc(&bat_priv->batman_queue_left);
+
 			batadv_forw_packet_free(forw_packet);
 		}
 	}

From 639ddeaee4f4111d80699452de5b70db29e96054 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sun, 20 Mar 2016 12:27:53 +0100
Subject: [PATCH 686/797] batman-adv: Reduce refcnt of removed router when
 updating route

commit d1a65f1741bfd9c69f9e4e2ad447a89b6810427d upstream.

_batadv_update_route rcu_derefences orig_ifinfo->router outside of a
spinlock protected region to print some information messages to the debug
log. But this pointer is not checked again when the new pointer is assigned
in the spinlock protected region. Thus is can happen that the value of
orig_ifinfo->router changed in the meantime and thus the reference counter
of the wrong router gets reduced after the spinlock protected region.

Just rcu_dereferencing the value of orig_ifinfo->router inside the spinlock
protected region (which also set the new pointer) is enough to get the
correct old router object.

Fixes: e1a5382f978b ("batman-adv: Make orig_node->router an rcu protected pointer")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Antonio Quartulli <a@unstable.cc>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/batman-adv/routing.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 3207667e69de..d8a2f33e60e5 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -104,6 +104,15 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
 		neigh_node = NULL;
 
 	spin_lock_bh(&orig_node->neigh_list_lock);
+	/* curr_router used earlier may not be the current orig_ifinfo->router
+	 * anymore because it was dereferenced outside of the neigh_list_lock
+	 * protected region. After the new best neighbor has replace the current
+	 * best neighbor the reference counter needs to decrease. Consequently,
+	 * the code needs to ensure the curr_router variable contains a pointer
+	 * to the replaced best neighbor.
+	 */
+	curr_router = rcu_dereference_protected(orig_ifinfo->router, true);
+
 	rcu_assign_pointer(orig_ifinfo->router, neigh_node);
 	spin_unlock_bh(&orig_node->neigh_list_lock);
 	batadv_orig_ifinfo_free_ref(orig_ifinfo);

From 4bc9468f1680e799e3036a6e816ed9ecfc7d98a3 Mon Sep 17 00:00:00 2001
From: Howard Cochran <hcochran@kernelspring.com>
Date: Thu, 10 Mar 2016 01:12:39 -0500
Subject: [PATCH 687/797] writeback: Fix performance regression in
 wb_over_bg_thresh()

commit 74d369443325063a5f0260e63971decb950fd8fa upstream.

Commit 947e9762a8dd ("writeback: update wb_over_bg_thresh() to use
wb_domain aware operations") unintentionally changed this function's
meaning from "are there more dirty pages than the background writeback
threshold" to "are there more dirty pages than the writeback threshold".
The background writeback threshold is typically half of the writeback
threshold, so this had the effect of raising the number of dirty pages
required to cause a writeback worker to perform background writeout.

This can cause a very severe performance regression when a BDI uses
BDI_CAP_STRICTLIMIT because balance_dirty_pages() and the writeback worker
can now disagree on whether writeback should be initiated.

For example, in a system having 1GB of RAM, a single spinning disk, and a
"pass-through" FUSE filesystem mounted over the disk, application code
mmapped a 128MB file on the disk and was randomly dirtying pages in that
mapping.

Because FUSE uses strictlimit and has a default max_ratio of only 1%, in
balance_dirty_pages, thresh is ~200, bg_thresh is ~100, and the
dirty_freerun_ceiling is the average of those, ~150. So, it pauses the
dirtying processes when we have 151 dirty pages and wakes up a background
writeback worker. But the worker tests the wrong threshold (200 instead of
100), so it does not initiate writeback and just returns.

Thus, balance_dirty_pages keeps looping, sleeping and then waking up the
worker who will do nothing. It remains stuck in this state until the few
dirty pages that we have finally expire and we write them back for that
reason. Then the whole process repeats, resulting in near-zero throughput
through the FUSE BDI.

The fix is to call the parameterized variant of wb_calc_thresh, so that the
worker will do writeback if the bg_thresh is exceeded which was the
behavior before the referenced commit.

Fixes: 947e9762a8dd ("writeback: update wb_over_bg_thresh() to use wb_domain aware operations")
Signed-off-by: Howard Cochran <hcochran@kernelspring.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Tested-by Sedat Dilek <sedat.dilek@gmail.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/page-writeback.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d15d88c8efa1..e40c9364582d 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1899,7 +1899,8 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb)
 	if (gdtc->dirty > gdtc->bg_thresh)
 		return true;
 
-	if (wb_stat(wb, WB_RECLAIMABLE) > __wb_calc_thresh(gdtc))
+	if (wb_stat(wb, WB_RECLAIMABLE) >
+	    wb_calc_thresh(gdtc->wb, gdtc->bg_thresh))
 		return true;
 
 	if (mdtc) {
@@ -1913,7 +1914,8 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb)
 		if (mdtc->dirty > mdtc->bg_thresh)
 			return true;
 
-		if (wb_stat(wb, WB_RECLAIMABLE) > __wb_calc_thresh(mdtc))
+		if (wb_stat(wb, WB_RECLAIMABLE) >
+		    wb_calc_thresh(mdtc->wb, mdtc->bg_thresh))
 			return true;
 	}
 

From a7ebd7f5d87b33f36041239f3c2087a0572db4fb Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt@codeblueprint.co.uk>
Date: Tue, 3 May 2016 20:29:39 +0100
Subject: [PATCH 688/797] MAINTAINERS: Remove asterisk from EFI directory names

commit e8dfe6d8f6762d515fcd4f30577f7bfcf7659887 upstream.

Mark reported that having asterisks on the end of directory names
confuses get_maintainer.pl when it encounters subdirectories, and that
my name does not appear when run on drivers/firmware/efi/libstub.

Reported-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/1462303781-8686-2-git-send-email-matt@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 MAINTAINERS | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 4c3e1d2ac31b..ab65bbecb159 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4097,8 +4097,8 @@ F:	Documentation/efi-stub.txt
 F:	arch/ia64/kernel/efi.c
 F:	arch/x86/boot/compressed/eboot.[ch]
 F:	arch/x86/include/asm/efi.h
-F:	arch/x86/platform/efi/*
-F:	drivers/firmware/efi/*
+F:	arch/x86/platform/efi/
+F:	drivers/firmware/efi/
 F:	include/linux/efi*.h
 
 EFI VARIABLE FILESYSTEM

From 73c1fd0aa105bdea4768f9a11c850574fb9091f9 Mon Sep 17 00:00:00 2001
From: Chen Yu <yu.c.chen@intel.com>
Date: Fri, 6 May 2016 11:33:39 +0800
Subject: [PATCH 689/797] x86/tsc: Read all ratio bits from MSR_PLATFORM_INFO

commit 886123fb3a8656699dff40afa0573df359abeb18 upstream.

Currently we read the tsc radio: ratio = (MSR_PLATFORM_INFO >> 8) & 0x1f;

Thus we get bit 8-12 of MSR_PLATFORM_INFO, however according to the SDM
(35.5), the ratio bits are bit 8-15.

Ignoring the upper bits can result in an incorrect tsc ratio, which causes the
TSC calibration and the Local APIC timer frequency to be incorrect.

Fix this problem by masking 0xff instead.

[ tglx: Massaged changelog ]

Fixes: 7da7c1561366 "x86, tsc: Add static (MSR) TSC calibration on Intel Atom SoCs"
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Bin Gao <bin.gao@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: http://lkml.kernel.org/r/1462505619-5516-1-git-send-email-yu.c.chen@intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/tsc_msr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 92ae6acac8a7..6aa0f4d9eea6 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -92,7 +92,7 @@ unsigned long try_msr_calibrate_tsc(void)
 
 	if (freq_desc_tables[cpu_index].msr_plat) {
 		rdmsr(MSR_PLATFORM_INFO, lo, hi);
-		ratio = (lo >> 8) & 0x1f;
+		ratio = (lo >> 8) & 0xff;
 	} else {
 		rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
 		ratio = (hi >> 8) & 0x1f;

From ddd5c3139de87400a6c6601ad3f54621e9d238fb Mon Sep 17 00:00:00 2001
From: Maxim Patlasov <mpatlasov@virtuozzo.com>
Date: Tue, 16 Feb 2016 11:45:33 -0800
Subject: [PATCH 690/797] fs/pnode.c: treat zero mnt_group_id-s as unequal

commit 7ae8fd0351f912b075149a1e03a017be8b903b9a upstream.

propagate_one(m) calculates "type" argument for copy_tree() like this:

>    if (m->mnt_group_id == last_dest->mnt_group_id) {
>        type = CL_MAKE_SHARED;
>    } else {
>        type = CL_SLAVE;
>        if (IS_MNT_SHARED(m))
>           type |= CL_MAKE_SHARED;
>   }

The "type" argument then governs clone_mnt() behavior with respect to flags
and mnt_master of new mount. When we iterate through a slave group, it is
possible that both current "m" and "last_dest" are not shared (although,
both are slaves, i.e. have non-NULL mnt_master-s). Then the comparison
above erroneously makes new mount shared and sets its mnt_master to
last_source->mnt_master. The patch fixes the problem by handling zero
mnt_group_id-s as though they are unequal.

The similar problem exists in the implementation of "else" clause above
when we have to ascend upward in the master/slave tree by calling:

>    last_source = last_source->mnt_master;
>    last_dest = last_source->mnt_parent;

proper number of times. The last step is governed by
"n->mnt_group_id != last_dest->mnt_group_id" condition that may lie if
both are zero. The patch fixes this case in the same way as the former one.

[AV: don't open-code an obvious helper...]

Signed-off-by: Maxim Patlasov <mpatlasov@virtuozzo.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Seth Forshee <seth.forshee@canonical.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/pnode.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/pnode.c b/fs/pnode.c
index 6367e1e435c6..c524fdddc7fb 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -202,6 +202,11 @@ static struct mount *last_dest, *last_source, *dest_master;
 static struct mountpoint *mp;
 static struct hlist_head *list;
 
+static inline bool peers(struct mount *m1, struct mount *m2)
+{
+	return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id;
+}
+
 static int propagate_one(struct mount *m)
 {
 	struct mount *child;
@@ -212,7 +217,7 @@ static int propagate_one(struct mount *m)
 	/* skip if mountpoint isn't covered by it */
 	if (!is_subdir(mp->m_dentry, m->mnt.mnt_root))
 		return 0;
-	if (m->mnt_group_id == last_dest->mnt_group_id) {
+	if (peers(m, last_dest)) {
 		type = CL_MAKE_SHARED;
 	} else {
 		struct mount *n, *p;
@@ -223,7 +228,7 @@ static int propagate_one(struct mount *m)
 					last_source = last_source->mnt_master;
 					last_dest = last_source->mnt_parent;
 				}
-				if (n->mnt_group_id != last_dest->mnt_group_id) {
+				if (!peers(n, last_dest)) {
 					last_source = last_source->mnt_master;
 					last_dest = last_source->mnt_parent;
 				}

From b17580a3cb901c56e9b9a3dea4d12153f5fc879e Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 5 May 2016 09:29:29 -0500
Subject: [PATCH 691/797] propogate_mnt: Handle the first propogated copy being
 a slave

commit 5ec0811d30378ae104f250bfc9b3640242d81e3f upstream.

When the first propgated copy was a slave the following oops would result:
> BUG: unable to handle kernel NULL pointer dereference at 0000000000000010
> IP: [<ffffffff811fba4e>] propagate_one+0xbe/0x1c0
> PGD bacd4067 PUD bac66067 PMD 0
> Oops: 0000 [#1] SMP
> Modules linked in:
> CPU: 1 PID: 824 Comm: mount Not tainted 4.6.0-rc5userns+ #1523
> Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007
> task: ffff8800bb0a8000 ti: ffff8800bac3c000 task.ti: ffff8800bac3c000
> RIP: 0010:[<ffffffff811fba4e>]  [<ffffffff811fba4e>] propagate_one+0xbe/0x1c0
> RSP: 0018:ffff8800bac3fd38  EFLAGS: 00010283
> RAX: 0000000000000000 RBX: ffff8800bb77ec00 RCX: 0000000000000010
> RDX: 0000000000000000 RSI: ffff8800bb58c000 RDI: ffff8800bb58c480
> RBP: ffff8800bac3fd48 R08: 0000000000000001 R09: 0000000000000000
> R10: 0000000000001ca1 R11: 0000000000001c9d R12: 0000000000000000
> R13: ffff8800ba713800 R14: ffff8800bac3fda0 R15: ffff8800bb77ec00
> FS:  00007f3c0cd9b7e0(0000) GS:ffff8800bfb00000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 0000000000000010 CR3: 00000000bb79d000 CR4: 00000000000006e0
> Stack:
>  ffff8800bb77ec00 0000000000000000 ffff8800bac3fd88 ffffffff811fbf85
>  ffff8800bac3fd98 ffff8800bb77f080 ffff8800ba713800 ffff8800bb262b40
>  0000000000000000 0000000000000000 ffff8800bac3fdd8 ffffffff811f1da0
> Call Trace:
>  [<ffffffff811fbf85>] propagate_mnt+0x105/0x140
>  [<ffffffff811f1da0>] attach_recursive_mnt+0x120/0x1e0
>  [<ffffffff811f1ec3>] graft_tree+0x63/0x70
>  [<ffffffff811f1f6b>] do_add_mount+0x9b/0x100
>  [<ffffffff811f2c1a>] do_mount+0x2aa/0xdf0
>  [<ffffffff8117efbe>] ? strndup_user+0x4e/0x70
>  [<ffffffff811f3a45>] SyS_mount+0x75/0xc0
>  [<ffffffff8100242b>] do_syscall_64+0x4b/0xa0
>  [<ffffffff81988f3c>] entry_SYSCALL64_slow_path+0x25/0x25
> Code: 00 00 75 ec 48 89 0d 02 22 22 01 8b 89 10 01 00 00 48 89 05 fd 21 22 01 39 8e 10 01 00 00 0f 84 e0 00 00 00 48 8b 80 d8 00 00 00 <48> 8b 50 10 48 89 05 df 21 22 01 48 89 15 d0 21 22 01 8b 53 30
> RIP  [<ffffffff811fba4e>] propagate_one+0xbe/0x1c0
>  RSP <ffff8800bac3fd38>
> CR2: 0000000000000010
> ---[ end trace 2725ecd95164f217 ]---

This oops happens with the namespace_sem held and can be triggered by
non-root users.  An all around not pleasant experience.

To avoid this scenario when finding the appropriate source mount to
copy stop the walk up the mnt_master chain when the first source mount
is encountered.

Further rewrite the walk up the last_source mnt_master chain so that
it is clear what is going on.

The reason why the first source mount is special is that it it's
mnt_parent is not a mount in the dest_mnt propagation tree, and as
such termination conditions based up on the dest_mnt mount propgation
tree do not make sense.

To avoid other kinds of confusion last_dest is not changed when
computing last_source.  last_dest is only used once in propagate_one
and that is above the point of the code being modified, so changing
the global variable is meaningless and confusing.

fixes: f2ebb3a921c1ca1e2ddd9242e95a1989a50c4c68 ("smarter propagate_mnt()")
Reported-by: Tycho Andersen <tycho.andersen@canonical.com>
Reviewed-by: Seth Forshee <seth.forshee@canonical.com>
Tested-by: Seth Forshee <seth.forshee@canonical.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/pnode.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/fs/pnode.c b/fs/pnode.c
index c524fdddc7fb..99899705b105 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -198,7 +198,7 @@ static struct mount *next_group(struct mount *m, struct mount *origin)
 
 /* all accesses are serialized by namespace_sem */
 static struct user_namespace *user_ns;
-static struct mount *last_dest, *last_source, *dest_master;
+static struct mount *last_dest, *first_source, *last_source, *dest_master;
 static struct mountpoint *mp;
 static struct hlist_head *list;
 
@@ -221,20 +221,22 @@ static int propagate_one(struct mount *m)
 		type = CL_MAKE_SHARED;
 	} else {
 		struct mount *n, *p;
+		bool done;
 		for (n = m; ; n = p) {
 			p = n->mnt_master;
-			if (p == dest_master || IS_MNT_MARKED(p)) {
-				while (last_dest->mnt_master != p) {
-					last_source = last_source->mnt_master;
-					last_dest = last_source->mnt_parent;
-				}
-				if (!peers(n, last_dest)) {
-					last_source = last_source->mnt_master;
-					last_dest = last_source->mnt_parent;
-				}
+			if (p == dest_master || IS_MNT_MARKED(p))
 				break;
-			}
 		}
+		do {
+			struct mount *parent = last_source->mnt_parent;
+			if (last_source == first_source)
+				break;
+			done = parent->mnt_master == p;
+			if (done && peers(n, parent))
+				break;
+			last_source = last_source->mnt_master;
+		} while (!done);
+
 		type = CL_SLAVE;
 		/* beginning of peer group among the slaves? */
 		if (IS_MNT_SHARED(m))
@@ -286,6 +288,7 @@ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
 	 */
 	user_ns = current->nsproxy->mnt_ns->user_ns;
 	last_dest = dest_mnt;
+	first_source = source_mnt;
 	last_source = source_mnt;
 	mp = dest_mp;
 	list = tree_list;

From 303fa967e0a3cf1f9116ceb009d10e196f899142 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Tue, 26 Apr 2016 12:15:01 +0100
Subject: [PATCH 692/797] ARM: cpuidle: Pass on arm_cpuidle_suspend()'s return
 value

commit 625fe4f8ffc1b915248558481bb94249f6bd411c upstream.

arm_cpuidle_suspend() may return -EOPNOTSUPP, or any value returned
by the cpu_ops/cpuidle_ops suspend call. arm_enter_idle_state() doesn't
update 'ret' with this value, meaning we always signal success to
cpuidle_enter_state(), causing it to update the usage counters as if we
succeeded.

Fixes: 191de17aa3c1 ("ARM64: cpuidle: Replace cpu_suspend by the common ARM/ARM64 function")
Signed-off-by: James Morse <james.morse@arm.com>
Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/cpuidle/cpuidle-arm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c
index 545069d5fdfb..e342565e8715 100644
--- a/drivers/cpuidle/cpuidle-arm.c
+++ b/drivers/cpuidle/cpuidle-arm.c
@@ -50,7 +50,7 @@ static int arm_enter_idle_state(struct cpuidle_device *dev,
 		 * call the CPU ops suspend protocol with idle index as a
 		 * parameter.
 		 */
-		arm_cpuidle_suspend(idx);
+		ret = arm_cpuidle_suspend(idx);
 
 		cpu_pm_exit();
 	}

From f0f21f80609c7e1da91e34face5b86547bd7401a Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 5 May 2016 13:32:34 +0530
Subject: [PATCH 693/797] ARC: Add missing io barriers to
 io{read,write}{16,32}be()

commit e5bc0478ab6cf565619224536d75ecb2aedca43b upstream.

While reviewing a different change to asm-generic/io.h Arnd spotted that
ARC ioread32 and ioread32be both of which come from asm-generic versions
are not symmetrical in terms of calling the io barriers.

generic ioread32   -> ARC readl()                  [ has barriers]
generic ioread32be -> __be32_to_cpu(__raw_readl()) [ lacks barriers]

While generic ioread32be is being remediated to call readl(), that involves
a swab32(), causing double swaps on ioread32be() on Big Endian systems.

So provide our versions of big endian IO accessors to ensure io barrier
calls while also keeping them optimal

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arc/include/asm/io.h | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h
index 27b17adea50d..cb69299a492e 100644
--- a/arch/arc/include/asm/io.h
+++ b/arch/arc/include/asm/io.h
@@ -13,6 +13,15 @@
 #include <asm/byteorder.h>
 #include <asm/page.h>
 
+#ifdef CONFIG_ISA_ARCV2
+#include <asm/barrier.h>
+#define __iormb()		rmb()
+#define __iowmb()		wmb()
+#else
+#define __iormb()		do { } while (0)
+#define __iowmb()		do { } while (0)
+#endif
+
 extern void __iomem *ioremap(unsigned long physaddr, unsigned long size);
 extern void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size,
 				  unsigned long flags);
@@ -22,6 +31,15 @@ extern void iounmap(const void __iomem *addr);
 #define ioremap_wc(phy, sz)		ioremap(phy, sz)
 #define ioremap_wt(phy, sz)		ioremap(phy, sz)
 
+/*
+ * io{read,write}{16,32}be() macros
+ */
+#define ioread16be(p)		({ u16 __v = be16_to_cpu((__force __be16)__raw_readw(p)); __iormb(); __v; })
+#define ioread32be(p)		({ u32 __v = be32_to_cpu((__force __be32)__raw_readl(p)); __iormb(); __v; })
+
+#define iowrite16be(v,p)	({ __iowmb(); __raw_writew((__force u16)cpu_to_be16(v), p); })
+#define iowrite32be(v,p)	({ __iowmb(); __raw_writel((__force u32)cpu_to_be32(v), p); })
+
 /* Change struct page to physical address */
 #define page_to_phys(page)		(page_to_pfn(page) << PAGE_SHIFT)
 
@@ -99,15 +117,6 @@ static inline void __raw_writel(u32 w, volatile void __iomem *addr)
 
 }
 
-#ifdef CONFIG_ISA_ARCV2
-#include <asm/barrier.h>
-#define __iormb()		rmb()
-#define __iowmb()		wmb()
-#else
-#define __iormb()		do { } while (0)
-#define __iowmb()		do { } while (0)
-#endif
-
 /*
  * MMIO can also get buffered/optimized in micro-arch, so barriers needed
  * Based on ARM model for the typical use case

From ac8fc72dec814226cfcb96cbe3023b89cc386428 Mon Sep 17 00:00:00 2001
From: Wang YanQing <udknight@gmail.com>
Date: Thu, 5 May 2016 14:14:21 +0100
Subject: [PATCH 694/797] x86/sysfb_efi: Fix valid BAR address range check

commit c10fcb14c7afd6688c7b197a814358fecf244222 upstream.

The code for checking whether a BAR address range is valid will break
out of the loop when a start address of 0x0 is encountered.

This behaviour is wrong since by breaking out of the loop we may miss
the BAR that describes the EFI frame buffer in a later iteration.

Because of this bug I can't use video=efifb: boot parameter to get
efifb on my new ThinkPad E550 for my old linux system hard disk with
3.10 kernel. In 3.10, efifb is the only choice due to DRM/I915 not
supporting the GPU.

This patch also add a trivial optimization to break out after we find
the frame buffer address range without testing later BARs.

Signed-off-by: Wang YanQing <udknight@gmail.com>
[ Rewrote changelog. ]
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Reviewed-by: Peter Jones <pjones@redhat.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: David Herrmann <dh.herrmann@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/1462454061-21561-2-git-send-email-matt@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/sysfb_efi.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/sysfb_efi.c b/arch/x86/kernel/sysfb_efi.c
index b285d4e8c68e..5da924bbf0a0 100644
--- a/arch/x86/kernel/sysfb_efi.c
+++ b/arch/x86/kernel/sysfb_efi.c
@@ -106,14 +106,24 @@ static int __init efifb_set_system(const struct dmi_system_id *id)
 					continue;
 				for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
 					resource_size_t start, end;
+					unsigned long flags;
+
+					flags = pci_resource_flags(dev, i);
+					if (!(flags & IORESOURCE_MEM))
+						continue;
+
+					if (flags & IORESOURCE_UNSET)
+						continue;
+
+					if (pci_resource_len(dev, i) == 0)
+						continue;
 
 					start = pci_resource_start(dev, i);
-					if (start == 0)
-						break;
 					end = pci_resource_end(dev, i);
 					if (screen_info.lfb_base >= start &&
 					    screen_info.lfb_base < end) {
 						found_bar = 1;
+						break;
 					}
 				}
 			}

From ee3e27f14e40bc3c95a175af482d6bbf35ab78bc Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Wed, 4 May 2016 13:48:56 +0800
Subject: [PATCH 695/797] ACPICA: Dispatcher: Update thread ID for recursive
 method calls

commit 93d68841a23a5779cef6fb9aa0ef32e7c5bd00da upstream.

ACPICA commit 7a3bd2d962f221809f25ddb826c9e551b916eb25

Set the mutex owner thread ID.
Original patch from: Prarit Bhargava <prarit@redhat.com>

Link: https://bugzilla.kernel.org/show_bug.cgi?id=115121
Link: https://github.com/acpica/acpica/commit/7a3bd2d9
Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Tested-by: Andy Lutomirski <luto@kernel.org> # On a Dell XPS 13 9350
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lv Zheng <lv.zheng@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/acpica/dsmethod.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/acpi/acpica/dsmethod.c b/drivers/acpi/acpica/dsmethod.c
index bc32f3194afe..28c50c6b5f45 100644
--- a/drivers/acpi/acpica/dsmethod.c
+++ b/drivers/acpi/acpica/dsmethod.c
@@ -417,6 +417,9 @@ acpi_ds_begin_method_execution(struct acpi_namespace_node *method_node,
 				obj_desc->method.mutex->mutex.
 				    original_sync_level =
 				    obj_desc->method.mutex->mutex.sync_level;
+
+				obj_desc->method.mutex->mutex.thread_id =
+				    acpi_os_get_thread_id();
 			}
 		}
 

From beac678d0908ee0a14200e1412f98a89b765c0aa Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Sat, 30 Apr 2016 08:29:27 +1000
Subject: [PATCH 696/797] powerpc: Fix bad inline asm constraint in
 create_zero_mask()

commit b4c112114aab9aff5ed4568ca5e662bb02cdfe74 upstream.

In create_zero_mask() we have:

	addi	%1,%2,-1
	andc	%1,%1,%2
	popcntd	%0,%1

using the "r" constraint for %2. r0 is a valid register in the "r" set,
but addi X,r0,X turns it into an li:

	li	r7,-1
	andc	r7,r7,r0
	popcntd	r4,r7

Fix this by using the "b" constraint, for which r0 is not a valid
register.

This was found with a kernel build using gcc trunk, narrowed down to
when -frename-registers was enabled at -O2. It is just luck however
that we aren't seeing this on older toolchains.

Thanks to Segher for working with me to find this issue.

Fixes: d0cebfa650a0 ("powerpc: word-at-a-time optimization for 64-bit Little Endian")
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/include/asm/word-at-a-time.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h
index e4396a7d0f7c..4afe66aa1400 100644
--- a/arch/powerpc/include/asm/word-at-a-time.h
+++ b/arch/powerpc/include/asm/word-at-a-time.h
@@ -82,7 +82,7 @@ static inline unsigned long create_zero_mask(unsigned long bits)
 	    "andc	%1,%1,%2\n\t"
 	    "popcntd	%0,%1"
 		: "=r" (leading_zero_bits), "=&r" (trailing_zero_bit_mask)
-		: "r" (bits));
+		: "b" (bits));
 
 	return leading_zero_bits;
 }

From a7fa0a478a625039ef0852e5606d1248cba093e4 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Fri, 1 Apr 2016 08:52:56 +0100
Subject: [PATCH 697/797] libahci: save port map for forced port map

commit 2fd0f46cb1b82587c7ae4a616d69057fb9bd0af7 upstream.

In usecases where force_port_map is used saved_port_map is never set,
resulting in not programming the PORTS_IMPL register as part of initial
config. This patch fixes this by setting it to port_map even in case
where force_port_map is used, making it more inline with other parts of
the code.

Fixes: 566d1827df2e ("libata: disable forced PORTS_IMPL for >= AHCI 1.3")
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Acked-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Andy Gross <andy.gross@linaro.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ata/libahci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index 998c6a85ad89..9628fa131757 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -467,6 +467,7 @@ void ahci_save_initial_config(struct device *dev, struct ahci_host_priv *hpriv)
 		dev_info(dev, "forcing port_map 0x%x -> 0x%x\n",
 			 port_map, hpriv->force_port_map);
 		port_map = hpriv->force_port_map;
+		hpriv->saved_port_map = port_map;
 	}
 
 	if (hpriv->mask_port_map) {

From 6e337a05df8adfc54540ca2a2b9d621836697796 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Fri, 1 Apr 2016 08:52:57 +0100
Subject: [PATCH 698/797] ata: ahci-platform: Add ports-implemented DT
 bindings.

commit 17dcc37e3e847bc0e67a5b1ec52471fcc6c18682 upstream.

On some SOCs PORTS_IMPL register value is never programmed by the
firmware and left at zero value. Which means that no sata ports are
available for software. AHCI driver used to cope up with this by
fabricating the port_map if the PORTS_IMPL register is read zero,
but recent patch broke this workaround as zero value was valid for
NVMe disks.

This patch adds ports-implemented DT bindings as workaround for this issue
in a way that DT can can override the PORTS_IMPL register in cases where
the firmware did not program it already.

Fixes: 566d1827df2e ("libata: disable forced PORTS_IMPL for >= AHCI 1.3")
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Acked-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Andy Gross <andy.gross@linaro.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/ata/ahci-platform.txt | 4 ++++
 drivers/ata/ahci_platform.c                             | 3 +++
 2 files changed, 7 insertions(+)

diff --git a/Documentation/devicetree/bindings/ata/ahci-platform.txt b/Documentation/devicetree/bindings/ata/ahci-platform.txt
index c2340eeeb97f..c000832a7fb9 100644
--- a/Documentation/devicetree/bindings/ata/ahci-platform.txt
+++ b/Documentation/devicetree/bindings/ata/ahci-platform.txt
@@ -30,6 +30,10 @@ Optional properties:
 - target-supply     : regulator for SATA target power
 - phys              : reference to the SATA PHY node
 - phy-names         : must be "sata-phy"
+- ports-implemented : Mask that indicates which ports that the HBA supports
+		      are available for software to use. Useful if PORTS_IMPL
+		      is not programmed by the BIOS, which is true with
+		      some embedded SOC's.
 
 Required properties when using sub-nodes:
 - #address-cells    : number of cells to encode an address
diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c
index 04975b851c23..639adb1f8abd 100644
--- a/drivers/ata/ahci_platform.c
+++ b/drivers/ata/ahci_platform.c
@@ -51,6 +51,9 @@ static int ahci_probe(struct platform_device *pdev)
 	if (rc)
 		return rc;
 
+	of_property_read_u32(dev->of_node,
+			     "ports-implemented", &hpriv->force_port_map);
+
 	if (of_device_is_compatible(dev->of_node, "hisilicon,hisi-ahci"))
 		hpriv->flags |= AHCI_HFLAG_NO_FBS | AHCI_HFLAG_NO_NCQ;
 

From c8f8a515ae418498e73fff210d0b0c23e2193e6a Mon Sep 17 00:00:00 2001
From: Mike Manning <michael@bsch.com.au>
Date: Mon, 18 Apr 2016 12:13:23 +0000
Subject: [PATCH 699/797] USB: serial: cp210x: add ID for Link ECU

commit 1d377f4d690637a0121eac8701f84a0aa1e69a69 upstream.

The Link ECU is an aftermarket ECU computer for vehicles that provides
full tuning abilities as well as datalogging and displaying capabilities
via the USB to Serial adapter built into the device.

Signed-off-by: Mike Manning <michael@bsch.com.au>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/cp210x.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index bdc0f2f24f19..7f45d00bf2ff 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -140,6 +140,8 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x10C4, 0xF004) }, /* Elan Digital Systems USBcount50 */
 	{ USB_DEVICE(0x10C5, 0xEA61) }, /* Silicon Labs MobiData GPRS USB Modem */
 	{ USB_DEVICE(0x10CE, 0xEA6A) }, /* Silicon Labs MobiData GPRS USB Modem 100EU */
+	{ USB_DEVICE(0x12B8, 0xEC60) }, /* Link G4 ECU */
+	{ USB_DEVICE(0x12B8, 0xEC62) }, /* Link G4+ ECU */
 	{ USB_DEVICE(0x13AD, 0x9999) }, /* Baltech card reader */
 	{ USB_DEVICE(0x1555, 0x0004) }, /* Owen AC4 USB-RS485 Converter */
 	{ USB_DEVICE(0x166A, 0x0201) }, /* Clipsal 5500PACA C-Bus Pascal Automation Controller */

From e5dd50f5729d6c94a0732fdfacac6ad7a1c0eb64 Mon Sep 17 00:00:00 2001
From: Jasem Mutlaq <mutlaqja@ikarustech.com>
Date: Tue, 19 Apr 2016 10:38:27 +0300
Subject: [PATCH 700/797] USB: serial: cp210x: add Straizona Focusers device
 ids

commit 613ac23a46e10d4d4339febdd534fafadd68e059 upstream.

Adding VID:PID for Straizona Focusers to cp210x driver.

Signed-off-by: Jasem Mutlaq <mutlaqja@ikarustech.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/cp210x.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index 7f45d00bf2ff..a2b43a6e7fa7 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -108,6 +108,7 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x10C4, 0x826B) }, /* Cygnal Integrated Products, Inc., Fasttrax GPS demonstration module */
 	{ USB_DEVICE(0x10C4, 0x8281) }, /* Nanotec Plug & Drive */
 	{ USB_DEVICE(0x10C4, 0x8293) }, /* Telegesis ETRX2USB */
+	{ USB_DEVICE(0x10C4, 0x82F4) }, /* Starizona MicroTouch */
 	{ USB_DEVICE(0x10C4, 0x82F9) }, /* Procyon AVS */
 	{ USB_DEVICE(0x10C4, 0x8341) }, /* Siemens MC35PU GPRS Modem */
 	{ USB_DEVICE(0x10C4, 0x8382) }, /* Cygnal Integrated Products, Inc. */
@@ -117,6 +118,7 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x10C4, 0x8418) }, /* IRZ Automation Teleport SG-10 GSM/GPRS Modem */
 	{ USB_DEVICE(0x10C4, 0x846E) }, /* BEI USB Sensor Interface (VCP) */
 	{ USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */
+	{ USB_DEVICE(0x10C4, 0x84B6) }, /* Starizona Hyperion */
 	{ USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */
 	{ USB_DEVICE(0x10C4, 0x85EB) }, /* AC-Services CIS-IBUS */
 	{ USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */

From 6e9544fb236325423d5066ebdeed577fb92be315 Mon Sep 17 00:00:00 2001
From: Stanislav Meduna <stano@meduna.org>
Date: Mon, 2 May 2016 16:05:11 +0100
Subject: [PATCH 701/797] nvmem: mxs-ocotp: fix buffer overflow in read

commit d1306eb675ad7a9a760b6b8e8e189824b8db89e7 upstream.

This patch fixes the issue where the mxs_ocotp_read is reading
the ocotp in reg_size steps but decrements the remaining size
by 1. The number of iterations is thus four times higher,
overwriting the area behind the output buffer.

Fixes: c01e9a11ab6f ("nvmem: add driver for ocotp in i.MX23 and i.MX28")
Tested-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: Stanislav Meduna <stano@meduna.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvmem/mxs-ocotp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/nvmem/mxs-ocotp.c b/drivers/nvmem/mxs-ocotp.c
index 8ba19bba3156..2bb3c5799ac4 100644
--- a/drivers/nvmem/mxs-ocotp.c
+++ b/drivers/nvmem/mxs-ocotp.c
@@ -94,7 +94,7 @@ static int mxs_ocotp_read(void *context, const void *reg, size_t reg_size,
 	if (ret)
 		goto close_banks;
 
-	while (val_size) {
+	while (val_size >= reg_size) {
 		if ((offset < OCOTP_DATA_OFFSET) || (offset % 16)) {
 			/* fill up non-data register */
 			*buf = 0;
@@ -103,7 +103,7 @@ static int mxs_ocotp_read(void *context, const void *reg, size_t reg_size,
 		}
 
 		buf++;
-		val_size--;
+		val_size -= reg_size;
 		offset += reg_size;
 	}
 

From c04e6e9730e5613ae2d5bd75ead2493eee0dabde Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Wed, 27 Apr 2016 10:17:51 +0200
Subject: [PATCH 702/797] gpu: ipu-v3: Fix imx-ipuv3-crtc module autoloading

commit 503fe87bd0a8346ba9d8b7f49115dcd0a4185226 upstream.

If of_node is set before calling platform_device_add, the driver core
will try to use of: modalias matching, which fails because the device
tree nodes don't have a compatible property set. This patch fixes
imx-ipuv3-crtc module autoloading by setting the of_node property only
after the platform modalias is set.

Fixes: 304e6be652e2 ("gpu: ipu-v3: Assign of_node of child platform devices to corresponding ports")
Reported-by: Dennis Gilmore <dennis@ausil.us>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Tested-By: Dennis Gilmore <dennis@ausil.us>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/ipu-v3/ipu-common.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c
index a0e28f3a278d..0585fd2031dd 100644
--- a/drivers/gpu/ipu-v3/ipu-common.c
+++ b/drivers/gpu/ipu-v3/ipu-common.c
@@ -1068,7 +1068,6 @@ static int ipu_add_client_devices(struct ipu_soc *ipu, unsigned long ipu_base)
 			goto err_register;
 		}
 
-		pdev->dev.of_node = of_node;
 		pdev->dev.parent = dev;
 
 		ret = platform_device_add_data(pdev, &reg->pdata,
@@ -1079,6 +1078,12 @@ static int ipu_add_client_devices(struct ipu_soc *ipu, unsigned long ipu_base)
 			platform_device_put(pdev);
 			goto err_register;
 		}
+
+		/*
+		 * Set of_node only after calling platform_device_add. Otherwise
+		 * the platform:imx-ipuv3-crtc modalias won't be used.
+		 */
+		pdev->dev.of_node = of_node;
 	}
 
 	return 0;

From 3d2ef4c1a725f185db7c25d186567f207813e74d Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 2 May 2016 18:54:39 -0400
Subject: [PATCH 703/797] drm/amdgpu: make sure vertical front porch is at
 least 1

commit 0126d4b9a516256f2432ca0dc78ab293a8255378 upstream.

hw doesn't like a 0 value.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/amd/amdgpu/atombios_encoders.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
index 1e0bba29e167..1cd6de575305 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
@@ -298,6 +298,10 @@ bool amdgpu_atombios_encoder_mode_fixup(struct drm_encoder *encoder,
 	    && (mode->crtc_vsync_start < (mode->crtc_vdisplay + 2)))
 		adjusted_mode->crtc_vsync_start = adjusted_mode->crtc_vdisplay + 2;
 
+	/* vertical FP must be at least 1 */
+	if (mode->crtc_vsync_start == mode->crtc_vdisplay)
+		adjusted_mode->crtc_vsync_start++;
+
 	/* get the native mode for scaling */
 	if (amdgpu_encoder->active_device & (ATOM_DEVICE_LCD_SUPPORT))
 		amdgpu_panel_mode_fixup(encoder, adjusted_mode);

From a71718ded5b74876097822f31416e6210795879a Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 3 May 2016 12:44:29 +1000
Subject: [PATCH 704/797] drm/amdgpu: set metadata pointer to NULL after
 freeing.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 0092d3edcb23fcdb8cbe4159ba94a534290ff982 upstream.

Without this there was a double free of the metadata,
which ended up freeing the fd table for me here, and taking
out the machine more often than not.

I reproduced with X.org + modesetting DDX + latest llvm/mesa,
also required using dri3.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index b8fbbd7699e4..73628c7599e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -540,6 +540,7 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,
 	if (!metadata_size) {
 		if (bo->metadata_size) {
 			kfree(bo->metadata);
+			bo->metadata = NULL;
 			bo->metadata_size = 0;
 		}
 		return 0;

From d3cd04a8a94ab3fc02eef4f861aac0f494b2366e Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Mon, 4 Apr 2016 14:54:59 +0900
Subject: [PATCH 705/797] iio: ak8975: Fix NULL pointer exception on early
 interrupt

commit 07d2390e36ee5b3265e9cc8305f2a106c8721e16 upstream.

In certain probe conditions the interrupt came right after registering
the handler causing a NULL pointer exception because of uninitialized
waitqueue:

$ udevadm trigger
i2c-gpio i2c-gpio-1: using pins 143 (SDA) and 144 (SCL)
i2c-gpio i2c-gpio-3: using pins 53 (SDA) and 52 (SCL)
Unable to handle kernel NULL pointer dereference at virtual address 00000000
pgd = e8b38000
[00000000] *pgd=00000000
Internal error: Oops: 5 [#1] SMP ARM
Modules linked in: snd_soc_i2s(+) i2c_gpio(+) snd_soc_idma snd_soc_s3c_dma snd_soc_core snd_pcm_dmaengine snd_pcm snd_timer snd soundcore ac97_bus spi_s3c64xx pwm_samsung dwc2 exynos_adc phy_exynos_usb2 exynosdrm exynos_rng rng_core rtc_s3c
CPU: 0 PID: 717 Comm: data-provider-m Not tainted 4.6.0-rc1-next-20160401-00011-g1b8d87473b9e-dirty #101
Hardware name: SAMSUNG EXYNOS (Flattened Device Tree)
(...)
(__wake_up_common) from [<c0379624>] (__wake_up+0x38/0x4c)
(__wake_up) from [<c0a41d30>] (ak8975_irq_handler+0x28/0x30)
(ak8975_irq_handler) from [<c0386720>] (handle_irq_event_percpu+0x88/0x140)
(handle_irq_event_percpu) from [<c038681c>] (handle_irq_event+0x44/0x68)
(handle_irq_event) from [<c0389c40>] (handle_edge_irq+0xf0/0x19c)
(handle_edge_irq) from [<c0385e04>] (generic_handle_irq+0x24/0x34)
(generic_handle_irq) from [<c05ee360>] (exynos_eint_gpio_irq+0x50/0x68)
(exynos_eint_gpio_irq) from [<c0386720>] (handle_irq_event_percpu+0x88/0x140)
(handle_irq_event_percpu) from [<c038681c>] (handle_irq_event+0x44/0x68)
(handle_irq_event) from [<c0389a70>] (handle_fasteoi_irq+0xb4/0x194)
(handle_fasteoi_irq) from [<c0385e04>] (generic_handle_irq+0x24/0x34)
(generic_handle_irq) from [<c03860b4>] (__handle_domain_irq+0x5c/0xb4)
(__handle_domain_irq) from [<c0301774>] (gic_handle_irq+0x54/0x94)
(gic_handle_irq) from [<c030c910>] (__irq_usr+0x50/0x80)

The bug was reproduced on exynos4412-trats2 (with a max77693 device also
using i2c-gpio) after building max77693 as a module.

Fixes: 94a6d5cf7caa ("iio:ak8975 Implement data ready interrupt handling")
Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Tested-by: Gregor Boirie <gregor.boirie@parrot.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iio/magnetometer/ak8975.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iio/magnetometer/ak8975.c b/drivers/iio/magnetometer/ak8975.c
index b13936dacc78..fd780bbcd07e 100644
--- a/drivers/iio/magnetometer/ak8975.c
+++ b/drivers/iio/magnetometer/ak8975.c
@@ -462,6 +462,8 @@ static int ak8975_setup_irq(struct ak8975_data *data)
 	int rc;
 	int irq;
 
+	init_waitqueue_head(&data->data_ready_queue);
+	clear_bit(0, &data->flags);
 	if (client->irq)
 		irq = client->irq;
 	else
@@ -477,8 +479,6 @@ static int ak8975_setup_irq(struct ak8975_data *data)
 		return rc;
 	}
 
-	init_waitqueue_head(&data->data_ready_queue);
-	clear_bit(0, &data->flags);
 	data->eoc_irq = irq;
 
 	return rc;

From 0f5c3afc750715fb644d9b234a7b05afb11dfe54 Mon Sep 17 00:00:00 2001
From: Richard Leitner <dev@g0hl1n.net>
Date: Tue, 5 Apr 2016 15:03:48 +0200
Subject: [PATCH 706/797] iio: ak8975: fix maybe-uninitialized warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 05be8d4101d960bad271d32b4f6096af1ccb1534 upstream.

If i2c_device_id *id is NULL and acpi_match_device returns NULL too,
then chipset may be unitialized when accessing &ak_def_array[chipset] in
ak8975_probe. Therefore initialize chipset to AK_MAX_TYPE, which will
return an error when not changed.

This patch fixes the following maybe-uninitialized warning:

drivers/iio/magnetometer/ak8975.c: In function ‘ak8975_probe’:
drivers/iio/magnetometer/ak8975.c:788:14: warning: ‘chipset’ may be used
uninitialized in this function [-Wmaybe-uninitialized]
  data->def = &ak_def_array[chipset];

Signed-off-by: Richard Leitner <dev@g0hl1n.net>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iio/magnetometer/ak8975.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/magnetometer/ak8975.c b/drivers/iio/magnetometer/ak8975.c
index fd780bbcd07e..f2a7f72f7aa6 100644
--- a/drivers/iio/magnetometer/ak8975.c
+++ b/drivers/iio/magnetometer/ak8975.c
@@ -732,7 +732,7 @@ static int ak8975_probe(struct i2c_client *client,
 	int eoc_gpio;
 	int err;
 	const char *name = NULL;
-	enum asahi_compass_chipset chipset;
+	enum asahi_compass_chipset chipset = AK_MAX_TYPE;
 
 	/* Grab and set up the supplied GPIO. */
 	if (client->dev.platform_data)

From 6b5f7a680d9804f0f441229ce1278efe6f22f8a5 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 2 May 2016 18:53:27 -0400
Subject: [PATCH 707/797] drm/radeon: make sure vertical front porch is at
 least 1

commit 3104b8128d4d646a574ed9d5b17c7d10752cd70b upstream.

hw doesn't like a 0 value.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/radeon/atombios_encoders.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
index adf74f4366bb..0b04b9282f56 100644
--- a/drivers/gpu/drm/radeon/atombios_encoders.c
+++ b/drivers/gpu/drm/radeon/atombios_encoders.c
@@ -310,6 +310,10 @@ static bool radeon_atom_mode_fixup(struct drm_encoder *encoder,
 	    && (mode->crtc_vsync_start < (mode->crtc_vdisplay + 2)))
 		adjusted_mode->crtc_vsync_start = adjusted_mode->crtc_vdisplay + 2;
 
+	/* vertical FP must be at least 1 */
+	if (mode->crtc_vsync_start == mode->crtc_vdisplay)
+		adjusted_mode->crtc_vsync_start++;
+
 	/* get the native mode for scaling */
 	if (radeon_encoder->active_device & (ATOM_DEVICE_LCD_SUPPORT)) {
 		radeon_panel_mode_fixup(encoder, adjusted_mode);

From cf26f675dbd9369a2f28555a6d241208cdc71c6e Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Mon, 18 Apr 2016 10:04:21 +0300
Subject: [PATCH 708/797] drm/i915/ddi: Fix eDP VDD handling during booting and
 suspend/resume
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 5eaa60c7109b40f17ac81090bc8b90482da76cd1 upstream.

The driver's VDD on/off logic assumes that whenever the VDD is on we
also hold an AUX power domain reference. Since BIOS can leave the VDD on
during booting and resuming and on DDI platforms we won't take a
corresponding power reference, the above assumption won't hold on those
platforms and an eventual delayed VDD off work will do an extraneous AUX
power domain put resulting in a refcount underflow. Fix this the same
way we did this for non-DDI DP encoders:

commit 6d93c0c41760c0 ("drm/i915: fix VDD state tracking after system
resume")

At the same time call the DP encoder suspend handler the same way as the
non-DDI DP encoders do to flush any pending VDD off work. Leaving the
work running may cause a HW access where we don't expect this (at a point
where power domains are suspended already).

While at it remove an unnecessary function call indirection.

This fixed for me AUX refcount underflow problems on BXT during
suspend/resume.

CC: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1460963062-13211-4-git-send-email-imre.deak@intel.com
(cherry picked from commit bf93ba67e9c05882f05b7ca2d773cfc8bf462c2a)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/intel_ddi.c | 10 +++-------
 drivers/gpu/drm/i915/intel_dp.c  |  4 ++--
 drivers/gpu/drm/i915/intel_drv.h |  2 ++
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index 7e6158b889da..241252de7186 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -3188,12 +3188,6 @@ void intel_ddi_get_config(struct intel_encoder *encoder,
 	intel_ddi_clock_get(encoder, pipe_config);
 }
 
-static void intel_ddi_destroy(struct drm_encoder *encoder)
-{
-	/* HDMI has nothing special to destroy, so we can go with this. */
-	intel_dp_encoder_destroy(encoder);
-}
-
 static bool intel_ddi_compute_config(struct intel_encoder *encoder,
 				     struct intel_crtc_state *pipe_config)
 {
@@ -3212,7 +3206,8 @@ static bool intel_ddi_compute_config(struct intel_encoder *encoder,
 }
 
 static const struct drm_encoder_funcs intel_ddi_funcs = {
-	.destroy = intel_ddi_destroy,
+	.reset = intel_dp_encoder_reset,
+	.destroy = intel_dp_encoder_destroy,
 };
 
 static struct intel_connector *
@@ -3284,6 +3279,7 @@ void intel_ddi_init(struct drm_device *dev, enum port port)
 	intel_encoder->post_disable = intel_ddi_post_disable;
 	intel_encoder->get_hw_state = intel_ddi_get_hw_state;
 	intel_encoder->get_config = intel_ddi_get_config;
+	intel_encoder->suspend = intel_dp_encoder_suspend;
 
 	intel_dig_port->port = port;
 	intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) &
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 78b8ec84d576..e55a82a99e7f 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -5035,7 +5035,7 @@ void intel_dp_encoder_destroy(struct drm_encoder *encoder)
 	kfree(intel_dig_port);
 }
 
-static void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder)
+void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder)
 {
 	struct intel_dp *intel_dp = enc_to_intel_dp(&intel_encoder->base);
 
@@ -5077,7 +5077,7 @@ static void intel_edp_panel_vdd_sanitize(struct intel_dp *intel_dp)
 	edp_panel_vdd_schedule_off(intel_dp);
 }
 
-static void intel_dp_encoder_reset(struct drm_encoder *encoder)
+void intel_dp_encoder_reset(struct drm_encoder *encoder)
 {
 	struct intel_dp *intel_dp;
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 0d00f07b7163..f34a219ec5c4 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1204,6 +1204,8 @@ void intel_dp_set_link_params(struct intel_dp *intel_dp,
 void intel_dp_start_link_train(struct intel_dp *intel_dp);
 void intel_dp_stop_link_train(struct intel_dp *intel_dp);
 void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode);
+void intel_dp_encoder_reset(struct drm_encoder *encoder);
+void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder);
 void intel_dp_encoder_destroy(struct drm_encoder *encoder);
 int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc);
 bool intel_dp_compute_config(struct intel_encoder *encoder,

From fa26a3c6c25bceed402055e06c7e0a2e4e13ebe5 Mon Sep 17 00:00:00 2001
From: Mika Kahola <mika.kahola@intel.com>
Date: Wed, 20 Apr 2016 15:39:02 +0300
Subject: [PATCH 709/797] drm/i915: Fix eDP low vswing for Broadwell
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 992e7a41f9fcc7bcd10e7d346aee5ed7a2c241cb upstream.

It was noticed on bug #94087 that module parameter
i915.edp_vswing=2 that should override the VBT setting
to use default voltage swing (400 mV) was not applied
for Broadwell.

This patch provides a fix for this by checking if default
i.e. higher voltage swing is requested to be used and
applies the DDI translations table for DP instead of eDP
(low vswing) table.

v2: Combine two if statements into one (Jani)
v3: Change dev_priv->edp_low_vswing to use dev_priv->vbt.edp.low_vswing

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94087
Signed-off-by: Mika Kahola <mika.kahola@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1461155942-7749-1-git-send-email-mika.kahola@intel.com
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
(cherry picked from commit 00983519214b61c1b9371ec2ed55a4dde773e384)
[Jani: s/dev_priv->vbt.edp.low_vswing/dev_priv->edp_low_vswing/ to backport]
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/intel_ddi.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index 241252de7186..3c6b07683bd9 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -464,9 +464,17 @@ static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port,
 	} else if (IS_BROADWELL(dev)) {
 		ddi_translations_fdi = bdw_ddi_translations_fdi;
 		ddi_translations_dp = bdw_ddi_translations_dp;
-		ddi_translations_edp = bdw_ddi_translations_edp;
+
+		if (dev_priv->edp_low_vswing) {
+			ddi_translations_edp = bdw_ddi_translations_edp;
+			n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_edp);
+		} else {
+			ddi_translations_edp = bdw_ddi_translations_dp;
+			n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_dp);
+		}
+
 		ddi_translations_hdmi = bdw_ddi_translations_hdmi;
-		n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_edp);
+
 		n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp);
 		n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi);
 		hdmi_default_entry = 7;

From bc631165a1b6583b3e96404fec4ddc8efb2f4392 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Wed, 20 Apr 2016 16:43:56 +0300
Subject: [PATCH 710/797] drm/i915: Make RPS EI/thresholds multiple of 25 on
 SNB-BDW
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 4ea3959018d09edfa36a9e7b5ccdbd4ec4b99e49 upstream.

Somehow my SNB GT1 (Dell XPS 8300) gets very unhappy around
GPU hangs if the RPS EI/thresholds aren't suitably aligned.
It seems like scheduling/timer interupts stop working somehow
and things get stuck eg. in usleep_range().

I bisected the problem down to
commit 8a5864377b12 ("drm/i915/skl: Restructured the gen6_set_rps_thresholds function")
I observed that before all the values were at least multiples of 25,
but afterwards they are not. And rounding things up to the next multiple
of 25 does seem to help, so lets' do that. I also tried roundup(..., 5)
but that wasn't sufficient. Also I have no idea if we might need this sort of
thing on gen9+ as well.

These are the original EI/thresholds:
 LOW_POWER
  GEN6_RP_UP_EI          12500
  GEN6_RP_UP_THRESHOLD   11800
  GEN6_RP_DOWN_EI        25000
  GEN6_RP_DOWN_THRESHOLD 21250
 BETWEEN
  GEN6_RP_UP_EI          10250
  GEN6_RP_UP_THRESHOLD    9225
  GEN6_RP_DOWN_EI        25000
  GEN6_RP_DOWN_THRESHOLD 18750
 HIGH_POWER
  GEN6_RP_UP_EI           8000
  GEN6_RP_UP_THRESHOLD    6800
  GEN6_RP_DOWN_EI        25000
  GEN6_RP_DOWN_THRESHOLD 15000

These are after 8a5864377b12:
 LOW_POWER
  GEN6_RP_UP_EI          12500
  GEN6_RP_UP_THRESHOLD   11875
  GEN6_RP_DOWN_EI        25000
  GEN6_RP_DOWN_THRESHOLD 21250
 BETWEEN
  GEN6_RP_UP_EI          10156
  GEN6_RP_UP_THRESHOLD    9140
  GEN6_RP_DOWN_EI        25000
  GEN6_RP_DOWN_THRESHOLD 18750
 HIGH_POWER
  GEN6_RP_UP_EI           7812
  GEN6_RP_UP_THRESHOLD    6640
  GEN6_RP_DOWN_EI        25000
  GEN6_RP_DOWN_THRESHOLD 15000

And these are what we have after this patch:
 LOW_POWER
  GEN6_RP_UP_EI          12500
  GEN6_RP_UP_THRESHOLD   11875
  GEN6_RP_DOWN_EI        25000
  GEN6_RP_DOWN_THRESHOLD 21250
 BETWEEN
  GEN6_RP_UP_EI          10175
  GEN6_RP_UP_THRESHOLD    9150
  GEN6_RP_DOWN_EI        25000
  GEN6_RP_DOWN_THRESHOLD 18750
 HIGH_POWER
  GEN6_RP_UP_EI           7825
  GEN6_RP_UP_THRESHOLD    6650
  GEN6_RP_DOWN_EI        25000
  GEN6_RP_DOWN_THRESHOLD 15000

Cc: Akash Goel <akash.goel@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Testcase: igt/kms_pipe_crc_basic/hang-read-crc-pipe-B
Fixes: 8a5864377b12 ("drm/i915/skl: Restructured the gen6_set_rps_thresholds function")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1461159836-9108-1-git-send-email-ville.syrjala@linux.intel.com
Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Patrik Jakobsson <patrik.jakobsson@linux.intel.com>
(cherry picked from commit 8a292d016d1cc4938ff14b4df25328230b08a408)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/i915_reg.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index bc7b8faba84d..7e461dca564c 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2838,7 +2838,14 @@ enum skl_disp_power_wells {
 #define GEN6_RP_STATE_CAP	(MCHBAR_MIRROR_BASE_SNB + 0x5998)
 #define BXT_RP_STATE_CAP        0x138170
 
-#define INTERVAL_1_28_US(us)	(((us) * 100) >> 7)
+/*
+ * Make these a multiple of magic 25 to avoid SNB (eg. Dell XPS
+ * 8300) freezing up around GPU hangs. Looks as if even
+ * scheduling/timer interrupts start misbehaving if the RPS
+ * EI/thresholds are "bad", leading to a very sluggish or even
+ * frozen machine.
+ */
+#define INTERVAL_1_28_US(us)	roundup(((us) * 100) >> 7, 25)
 #define INTERVAL_1_33_US(us)	(((us) * 3)   >> 2)
 #define INTERVAL_0_833_US(us)	(((us) * 6) / 5)
 #define GT_INTERVAL_FROM_US(dev_priv, us) (IS_GEN9(dev_priv) ? \

From 8e1001c5638e244ab9a2ddddf5466b05ddf6af77 Mon Sep 17 00:00:00 2001
From: Shashank Sharma <shashank.sharma@intel.com>
Date: Thu, 21 Apr 2016 16:48:32 +0530
Subject: [PATCH 711/797] drm/i915: Fake HDMI live status
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 60b3143c7cac7e8d2ca65c0b347466c5776395d1 upstream.

This patch does the following:
- Fakes live status of HDMI as connected (even if that's not).
  While testing certain (monitor + cable) combinations with
  various intel  platforms, it seems that live status register
  doesn't work reliably on some older devices. So limit the
  live_status check for HDMI detection, only for platforms
  from gen7 onwards.

V2: restrict faking live_status to certain platforms
V3: (Ville)
   - keep the debug message for !live_status case
   - fix indentation of comment
   - remove "warning" from the debug message

    (Jani)
   - Change format of fix details in the commit message

Fixes: 237ed86c693d ("drm/i915: Check live status before reading edid")
Suggested-by: Ville Syrjala <ville.syrjala@linux.intel.com>
Signed-off-by: Shashank Sharma <shashank.sharma@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1461237606-16491-1-git-send-email-shashank.sharma@intel.com
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
(cherry picked from commit 4f4a8185011773f7520d9916c6857db946e7f9d1)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/intel_hdmi.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index e6c035b0fc1c..4b8ed9f2dabc 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -1388,8 +1388,16 @@ intel_hdmi_detect(struct drm_connector *connector, bool force)
 				hdmi_to_dig_port(intel_hdmi));
 	}
 
-	if (!live_status)
-		DRM_DEBUG_KMS("Live status not up!");
+	if (!live_status) {
+		DRM_DEBUG_KMS("HDMI live status down\n");
+		/*
+		 * Live status register is not reliable on all intel platforms.
+		 * So consider live_status only for certain platforms, for
+		 * others, read EDID to determine presence of sink.
+		 */
+		if (INTEL_INFO(dev_priv)->gen < 7 || IS_IVYBRIDGE(dev_priv))
+			live_status = true;
+	}
 
 	intel_hdmi_unset_edid(connector);
 

From dfa11d586248a21ce2c7fae02c02964c3a4a8379 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Wed, 23 Mar 2016 21:07:39 -0700
Subject: [PATCH 712/797] ACPI / processor: Request native thermal interrupt
 handling via _OSC

commit a21211672c9a1d730a39aa65d4a5b3414700adfb upstream.

There are several reports of freeze on enabling HWP (Hardware PStates)
feature on Skylake-based systems by the Intel P-states driver. The root
cause is identified as the HWP interrupts causing BIOS code to freeze.

HWP interrupts use the thermal LVT which can be handled by Linux
natively, but on the affected Skylake-based systems SMM will respond
to it by default.  This is a problem for several reasons:
 - On the affected systems the SMM thermal LVT handler is broken (it
   will crash when invoked) and a BIOS update is necessary to fix it.
 - With thermal interrupt handled in SMM we lose all of the reporting
   features of the arch/x86/kernel/cpu/mcheck/therm_throt driver.
 - Some thermal drivers like x86-package-temp depend on the thermal
   threshold interrupts signaled via the thermal LVT.
 - The HWP interrupts are useful for debugging and tuning
   performance (if the kernel can handle them).
The native handling of thermal interrupts needs to be enabled
because of that.

This requires some way to tell SMM that the OS can handle thermal
interrupts.  That can be done by using _OSC/_PDC in processor
scope very early during ACPI initialization.

The meaning of _OSC/_PDC bit 12 in processor scope is whether or
not the OS supports native handling of interrupts for Collaborative
Processor Performance Control (CPPC) notifications.  Since on
HWP-capable systems CPPC is a firmware interface to HWP, setting
this bit effectively tells the firmware that the OS will handle
thermal interrupts natively going forward.

For details on _OSC/_PDC refer to:
http://www.intel.com/content/www/us/en/standards/processor-vendor-specific-acpi-specification.html

To implement the _OSC/_PDC handshake as described, introduce a new
function, acpi_early_processor_osc(), that walks the ACPI
namespace looking for ACPI processor objects and invokes _OSC for
them with bit 12 in the capabilities buffer set and terminates the
namespace walk on the first success.

Also modify intel_thermal_interrupt() to clear HWP status bits in
the HWP_STATUS MSR to acknowledge HWP interrupts (which prevents
them from firing continuously).

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
[ rjw: Subject & changelog, function rename ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/mcheck/therm_throt.c |  3 ++
 drivers/acpi/acpi_processor.c            | 52 ++++++++++++++++++++++++
 drivers/acpi/bus.c                       |  3 ++
 drivers/acpi/internal.h                  |  6 +++
 4 files changed, 64 insertions(+)

diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 2c5aaf8c2e2f..05538582a809 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -385,6 +385,9 @@ static void intel_thermal_interrupt(void)
 {
 	__u64 msr_val;
 
+	if (static_cpu_has(X86_FEATURE_HWP))
+		wrmsrl_safe(MSR_HWP_STATUS, 0);
+
 	rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
 
 	/* Check for violation of core thermal thresholds*/
diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index 6979186dbd4b..9f77943653fb 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -491,6 +491,58 @@ static void acpi_processor_remove(struct acpi_device *device)
 }
 #endif /* CONFIG_ACPI_HOTPLUG_CPU */
 
+#ifdef CONFIG_X86
+static bool acpi_hwp_native_thermal_lvt_set;
+static acpi_status __init acpi_hwp_native_thermal_lvt_osc(acpi_handle handle,
+							  u32 lvl,
+							  void *context,
+							  void **rv)
+{
+	u8 sb_uuid_str[] = "4077A616-290C-47BE-9EBD-D87058713953";
+	u32 capbuf[2];
+	struct acpi_osc_context osc_context = {
+		.uuid_str = sb_uuid_str,
+		.rev = 1,
+		.cap.length = 8,
+		.cap.pointer = capbuf,
+	};
+
+	if (acpi_hwp_native_thermal_lvt_set)
+		return AE_CTRL_TERMINATE;
+
+	capbuf[0] = 0x0000;
+	capbuf[1] = 0x1000; /* set bit 12 */
+
+	if (ACPI_SUCCESS(acpi_run_osc(handle, &osc_context))) {
+		if (osc_context.ret.pointer && osc_context.ret.length > 1) {
+			u32 *capbuf_ret = osc_context.ret.pointer;
+
+			if (capbuf_ret[1] & 0x1000) {
+				acpi_handle_info(handle,
+					"_OSC native thermal LVT Acked\n");
+				acpi_hwp_native_thermal_lvt_set = true;
+			}
+		}
+		kfree(osc_context.ret.pointer);
+	}
+
+	return AE_OK;
+}
+
+void __init acpi_early_processor_osc(void)
+{
+	if (boot_cpu_has(X86_FEATURE_HWP)) {
+		acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
+				    ACPI_UINT32_MAX,
+				    acpi_hwp_native_thermal_lvt_osc,
+				    NULL, NULL, NULL);
+		acpi_get_devices(ACPI_PROCESSOR_DEVICE_HID,
+				 acpi_hwp_native_thermal_lvt_osc,
+				 NULL, NULL);
+	}
+}
+#endif
+
 /*
  * The following ACPI IDs are known to be suitable for representing as
  * processor devices.
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index a212cefae524..ca4f28432d87 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -1004,6 +1004,9 @@ static int __init acpi_bus_init(void)
 		goto error1;
 	}
 
+	/* Set capability bits for _OSC under processor scope */
+	acpi_early_processor_osc();
+
 	/*
 	 * _OSC method may exist in module level code,
 	 * so it must be run after ACPI_FULL_INITIALIZATION
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 11d87bf67e73..0f3f41c13b38 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -130,6 +130,12 @@ void acpi_early_processor_set_pdc(void);
 static inline void acpi_early_processor_set_pdc(void) {}
 #endif
 
+#ifdef CONFIG_X86
+void acpi_early_processor_osc(void);
+#else
+static inline void acpi_early_processor_osc(void) {}
+#endif
+
 /* --------------------------------------------------------------------------
                                   Embedded Controller
    -------------------------------------------------------------------------- */

From f6ff7398220d7fda0f4d02b9c9755406d8169bc2 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 2 Feb 2016 16:57:18 -0800
Subject: [PATCH 713/797] lib/test-string_helpers.c: fix and improve
 string_get_size() tests

commit 72676bb53f33fd0ef3a1484fc1ecfd306dc6ff40 upstream.

Recently added commit 564b026fbd0d ("string_helpers: fix precision loss
for some inputs") fixed precision issues for string_get_size() and broke
tests.

Fix and improve them: test both STRING_UNITS_2 and STRING_UNITS_10 at a
time, better failure reporting, test small an huge values.

Fixes: 564b026fbd0d28e9 ("string_helpers: fix precision loss for some inputs")
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: James Bottomley <JBottomley@Odin.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/test-string_helpers.c | 67 ++++++++++++++++++++++++++++-----------
 1 file changed, 49 insertions(+), 18 deletions(-)

diff --git a/lib/test-string_helpers.c b/lib/test-string_helpers.c
index 98866a770770..25b5cbfb7615 100644
--- a/lib/test-string_helpers.c
+++ b/lib/test-string_helpers.c
@@ -327,36 +327,67 @@ static __init void test_string_escape(const char *name,
 }
 
 #define string_get_size_maxbuf 16
-#define test_string_get_size_one(size, blk_size, units, exp_result)            \
+#define test_string_get_size_one(size, blk_size, exp_result10, exp_result2)    \
 	do {                                                                   \
-		BUILD_BUG_ON(sizeof(exp_result) >= string_get_size_maxbuf);    \
-		__test_string_get_size((size), (blk_size), (units),            \
-				       (exp_result));                          \
+		BUILD_BUG_ON(sizeof(exp_result10) >= string_get_size_maxbuf);  \
+		BUILD_BUG_ON(sizeof(exp_result2) >= string_get_size_maxbuf);   \
+		__test_string_get_size((size), (blk_size), (exp_result10),     \
+				       (exp_result2));                         \
 	} while (0)
 
 
-static __init void __test_string_get_size(const u64 size, const u64 blk_size,
-					  const enum string_size_units units,
-					  const char *exp_result)
+static __init void test_string_get_size_check(const char *units,
+					      const char *exp,
+					      char *res,
+					      const u64 size,
+					      const u64 blk_size)
 {
-	char buf[string_get_size_maxbuf];
-
-	string_get_size(size, blk_size, units, buf, sizeof(buf));
-	if (!memcmp(buf, exp_result, strlen(exp_result) + 1))
+	if (!memcmp(res, exp, strlen(exp) + 1))
 		return;
 
-	buf[sizeof(buf) - 1] = '\0';
-	pr_warn("Test 'test_string_get_size_one' failed!\n");
-	pr_warn("string_get_size(size = %llu, blk_size = %llu, units = %d\n",
+	res[string_get_size_maxbuf - 1] = '\0';
+
+	pr_warn("Test 'test_string_get_size' failed!\n");
+	pr_warn("string_get_size(size = %llu, blk_size = %llu, units = %s)\n",
 		size, blk_size, units);
-	pr_warn("expected: '%s', got '%s'\n", exp_result, buf);
+	pr_warn("expected: '%s', got '%s'\n", exp, res);
+}
+
+static __init void __test_string_get_size(const u64 size, const u64 blk_size,
+					  const char *exp_result10,
+					  const char *exp_result2)
+{
+	char buf10[string_get_size_maxbuf];
+	char buf2[string_get_size_maxbuf];
+
+	string_get_size(size, blk_size, STRING_UNITS_10, buf10, sizeof(buf10));
+	string_get_size(size, blk_size, STRING_UNITS_2, buf2, sizeof(buf2));
+
+	test_string_get_size_check("STRING_UNITS_10", exp_result10, buf10,
+				   size, blk_size);
+
+	test_string_get_size_check("STRING_UNITS_2", exp_result2, buf2,
+				   size, blk_size);
 }
 
 static __init void test_string_get_size(void)
 {
-	test_string_get_size_one(16384, 512, STRING_UNITS_2, "8.00 MiB");
-	test_string_get_size_one(8192, 4096, STRING_UNITS_10, "32.7 MB");
-	test_string_get_size_one(1, 512, STRING_UNITS_10, "512 B");
+	/* small values */
+	test_string_get_size_one(0, 512, "0 B", "0 B");
+	test_string_get_size_one(1, 512, "512 B", "512 B");
+	test_string_get_size_one(1100, 1, "1.10 kB", "1.07 KiB");
+
+	/* normal values */
+	test_string_get_size_one(16384, 512, "8.39 MB", "8.00 MiB");
+	test_string_get_size_one(500118192, 512, "256 GB", "238 GiB");
+	test_string_get_size_one(8192, 4096, "33.6 MB", "32.0 MiB");
+
+	/* weird block sizes */
+	test_string_get_size_one(3000, 1900, "5.70 MB", "5.44 MiB");
+
+	/* huge values */
+	test_string_get_size_one(U64_MAX, 4096, "75.6 ZB", "64.0 ZiB");
+	test_string_get_size_one(4096, U64_MAX, "75.6 ZB", "64.0 ZiB");
 }
 
 static int __init test_string_helpers_init(void)

From 945b6ec05a475fc80bcb79ef006ee5c0263c7b3a Mon Sep 17 00:00:00 2001
From: Mat Martineau <mathew.j.martineau@linux.intel.com>
Date: Thu, 28 Jan 2016 15:19:23 -0800
Subject: [PATCH 714/797] drm/i915/skl: Fix DMC load on Skylake J0 and K0

commit a41c8882592fb80458959b10e37632ce030b68ca upstream.

The driver does not load firmware for unknown steppings, so these new
steppings must be added to the list.

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1454023163-25469-1-git-send-email-mathew.j.martineau@linux.intel.com
Cc: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/intel_csr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index 9e530a739354..fc28c512ece3 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -180,7 +180,8 @@ struct stepping_info {
 static const struct stepping_info skl_stepping_info[] = {
 		{'A', '0'}, {'B', '0'}, {'C', '0'},
 		{'D', '0'}, {'E', '0'}, {'F', '0'},
-		{'G', '0'}, {'H', '0'}, {'I', '0'}
+		{'G', '0'}, {'H', '0'}, {'I', '0'},
+		{'J', '0'}, {'K', '0'}
 };
 
 static struct stepping_info bxt_stepping_info[] = {

From 4c2795dd50f98fa162cb53190eb557be44f92f58 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 11 May 2016 11:23:26 +0200
Subject: [PATCH 715/797] Linux 4.4.10

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 0722cdf52152..5b5f462f834c 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 4
-SUBLEVEL = 9
+SUBLEVEL = 10
 EXTRAVERSION =
 NAME = Blurry Fish Butt
 

From 32b06020f36dd2dcfd7832ffd34a84f254b14e46 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 15 Jan 2016 13:28:57 +0100
Subject: [PATCH 716/797] arm64: hide __efistub_ aliases from kallsyms

Commit e8f3010f7326 ("arm64/efi: isolate EFI stub from the kernel
proper") isolated the EFI stub code from the kernel proper by prefixing
all of its symbols with __efistub_, and selectively allowing access to
core kernel symbols from the stub by emitting __efistub_ aliases for
functions and variables that the stub can access legally.

As an unintended side effect, these aliases are emitted into the
kallsyms symbol table, which means they may turn up in backtraces,
e.g.,

  ...
  PC is at __efistub_memset+0x108/0x200
  LR is at fixup_init+0x3c/0x48
  ...
  [<ffffff8008328608>] __efistub_memset+0x108/0x200
  [<ffffff8008094dcc>] free_initmem+0x2c/0x40
  [<ffffff8008645198>] kernel_init+0x20/0xe0
  [<ffffff8008085cd0>] ret_from_fork+0x10/0x40

The backtrace in question has nothing to do with the EFI stub, but
simply returns one of the several aliases of memset() that have been
recorded in the kallsyms table. This is undesirable, since it may
suggest to people who are not aware of this that the issue they are
seeing is somehow EFI related.

So hide the __efistub_ aliases from kallsyms, by emitting them as
absolute linker symbols explicitly. The distinction between those
and section relative symbols is completely irrelevant to these
definitions, and to the final link we are performing when these
definitions are being taken into account (the distinction is only
relevant to symbols defined inside a section definition when performing
a partial link), and so the resulting values are identical to the
original ones. Since absolute symbols are ignored by kallsyms, this
will result in these values to be omitted from its symbol table.

After this patch, the backtrace generated from the same address looks
like this:
  ...
  PC is at __memset+0x108/0x200
  LR is at fixup_init+0x3c/0x48
  ...
  [<ffffff8008328608>] __memset+0x108/0x200
  [<ffffff8008094dcc>] free_initmem+0x2c/0x40
  [<ffffff8008645198>] kernel_init+0x20/0xe0
  [<ffffff8008085cd0>] ret_from_fork+0x10/0x40

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 75feee3d9d51775072d3a04f47d4a439a4c4590e)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/image.h | 40 ++++++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index bc2abb8b1599..999633bd7294 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -64,6 +64,16 @@
 
 #ifdef CONFIG_EFI
 
+/*
+ * Prevent the symbol aliases below from being emitted into the kallsyms
+ * table, by forcing them to be absolute symbols (which are conveniently
+ * ignored by scripts/kallsyms) rather than section relative symbols.
+ * The distinction is only relevant for partial linking, and only for symbols
+ * that are defined within a section declaration (which is not the case for
+ * the definitions below) so the resulting values will be identical.
+ */
+#define KALLSYMS_HIDE(sym)	ABSOLUTE(sym)
+
 /*
  * The EFI stub has its own symbol namespace prefixed by __efistub_, to
  * isolate it from the kernel proper. The following symbols are legally
@@ -73,25 +83,25 @@
  * linked at. The routines below are all implemented in assembler in a
  * position independent manner
  */
-__efistub_memcmp		= __pi_memcmp;
-__efistub_memchr		= __pi_memchr;
-__efistub_memcpy		= __pi_memcpy;
-__efistub_memmove		= __pi_memmove;
-__efistub_memset		= __pi_memset;
-__efistub_strlen		= __pi_strlen;
-__efistub_strcmp		= __pi_strcmp;
-__efistub_strncmp		= __pi_strncmp;
-__efistub___flush_dcache_area	= __pi___flush_dcache_area;
+__efistub_memcmp		= KALLSYMS_HIDE(__pi_memcmp);
+__efistub_memchr		= KALLSYMS_HIDE(__pi_memchr);
+__efistub_memcpy		= KALLSYMS_HIDE(__pi_memcpy);
+__efistub_memmove		= KALLSYMS_HIDE(__pi_memmove);
+__efistub_memset		= KALLSYMS_HIDE(__pi_memset);
+__efistub_strlen		= KALLSYMS_HIDE(__pi_strlen);
+__efistub_strcmp		= KALLSYMS_HIDE(__pi_strcmp);
+__efistub_strncmp		= KALLSYMS_HIDE(__pi_strncmp);
+__efistub___flush_dcache_area	= KALLSYMS_HIDE(__pi___flush_dcache_area);
 
 #ifdef CONFIG_KASAN
-__efistub___memcpy		= __pi_memcpy;
-__efistub___memmove		= __pi_memmove;
-__efistub___memset		= __pi_memset;
+__efistub___memcpy		= KALLSYMS_HIDE(__pi_memcpy);
+__efistub___memmove		= KALLSYMS_HIDE(__pi_memmove);
+__efistub___memset		= KALLSYMS_HIDE(__pi_memset);
 #endif
 
-__efistub__text			= _text;
-__efistub__end			= _end;
-__efistub__edata		= _edata;
+__efistub__text			= KALLSYMS_HIDE(_text);
+__efistub__end			= KALLSYMS_HIDE(_end);
+__efistub__edata		= KALLSYMS_HIDE(_edata);
 
 #endif
 

From b87cf8adbe0d3b1998a7dafd01b70e9b118f641d Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Fri, 15 Jan 2016 16:55:37 -0800
Subject: [PATCH 717/797] arch/arm64/include/asm/pgtable.h: add pmd_mkclean for
 THP

MADV_FREE needs pmd_dirty and pmd_mkclean for detecting recent overwrite
of the contents since MADV_FREE syscall is called for THP page.

This patch adds pmd_mkclean for THP page MADV_FREE support.

Signed-off-by: Minchan Kim <minchan@kernel.org>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Shaohua Li <shli@kernel.org>
Cc: <yalin.wang2010@gmail.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chen Gang <gang.chen.5i5j@gmail.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: Daniel Micay <danielmicay@gmail.com>
Cc: Darrick J. Wong <darrick.wong@oracle.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Helge Deller <deller@gmx.de>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Jason Evans <je@fb.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mika Penttil <mika.penttila@nextfour.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Rik van Riel <riel@redhat.com>
Cc: Roland Dreier <roland@kernel.org>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Shaohua Li <shli@kernel.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
(cherry picked from commit 05ee26d9e7e29ab026995eab79be3c6e8351908c)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/pgtable.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 76ff5d93c6c3..2daf88970731 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -369,6 +369,7 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
 #define pmd_mksplitting(pmd)	pte_pmd(pte_mkspecial(pmd_pte(pmd)))
 #define pmd_mkold(pmd)		pte_pmd(pte_mkold(pmd_pte(pmd)))
 #define pmd_mkwrite(pmd)	pte_pmd(pte_mkwrite(pmd_pte(pmd)))
+#define pmd_mkclean(pmd)       pte_pmd(pte_mkclean(pmd_pte(pmd)))
 #define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
 #define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
 #define pmd_mknotpresent(pmd)	(__pmd(pmd_val(pmd) & ~PMD_TYPE_MASK))

From 035fdc46d48ae8a7cbf7199c74bac1de36cca626 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 11 Jan 2016 14:50:21 +0100
Subject: [PATCH 718/797] arm64: kasan: ensure that the KASAN zero page is
 mapped read-only

When switching from the early KASAN shadow region, which maps the
entire shadow space read-write, to the permanent KASAN shadow region,
which uses a zero page to shadow regions that are not subject to
instrumentation, the lowest level table kasan_zero_pte[] may be
reused unmodified, which means that the mappings of the zero page
that it contains will still be read-write.

So update it explicitly to map the zero page read only when we
activate the permanent mapping.

Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 7b1af9795773d745c2a8c7d4ca5f2936e8b6adfb)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/kasan_init.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index cf038c7d9fa9..cab7a5be40aa 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -120,6 +120,7 @@ static void __init cpu_set_ttbr1(unsigned long ttbr1)
 void __init kasan_init(void)
 {
 	struct memblock_region *reg;
+	int i;
 
 	/*
 	 * We are going to perform proper setup of shadow memory.
@@ -155,6 +156,14 @@ void __init kasan_init(void)
 				pfn_to_nid(virt_to_pfn(start)));
 	}
 
+	/*
+	 * KAsan may reuse the contents of kasan_zero_pte directly, so we
+	 * should make sure that it maps the zero page read-only.
+	 */
+	for (i = 0; i < PTRS_PER_PTE; i++)
+		set_pte(&kasan_zero_pte[i],
+			pfn_pte(virt_to_pfn(kasan_zero_page), PAGE_KERNEL_RO));
+
 	memset(kasan_zero_page, 0, PAGE_SIZE);
 	cpu_set_ttbr1(__pa(swapper_pg_dir));
 	flush_tlb_all();

From 8c226342ae45c7a2029af59cf81edc9147a60d56 Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Sun, 24 Jan 2016 15:24:12 +0900
Subject: [PATCH 719/797] arm64: Fix an enum typo in mm/dump.c

This patch fixes a typo in mm/dump.c:
"MODUELS_END_NR" should be "MODULES_END_NR".

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit b3122023df935cf14bf951da98ca598d71b9f826)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/dump.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index 5a22a119a74c..0adbebbc2803 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -46,7 +46,7 @@ enum address_markers_idx {
 	PCI_START_NR,
 	PCI_END_NR,
 	MODULES_START_NR,
-	MODUELS_END_NR,
+	MODULES_END_NR,
 	KERNEL_SPACE_NR,
 };
 

From d2779548f7c7156686d4d88ce4ae904460952a8c Mon Sep 17 00:00:00 2001
From: William Cohen <wcohen@redhat.com>
Date: Thu, 21 Jan 2016 22:56:26 -0500
Subject: [PATCH 720/797] Eliminate the .eh_frame sections from the aarch64
 vmlinux and kernel modules

By default the aarch64 gcc generates .eh_frame sections.  Unlike
.debug_frame sections, the .eh_frame sections are loaded into memory
when the associated code is loaded.  On an example kernel being built
with this default the .eh_frame section in vmlinux used an extra 1.7MB
of memory.  The x86 disables the creation of the .eh_frame section.
The aarch64 should probably do the same to save some memory.

Signed-off-by: William Cohen <wcohen@redhat.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 728dabd6d1751cf5e0f8e0535891393da62396e9)
Signed-off-by: Alex Shi <alex.shi@linaro.org>

Conflicts:
	pick 67dfa1751 arm64: errata: Add -mpc-relative-literal-loads
	in arch/arm64/Makefile
---
 arch/arm64/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index b6c90e5006e4..548a2939d7e6 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -28,6 +28,7 @@ endif
 
 KBUILD_CFLAGS	+= -mgeneral-regs-only $(lseinstr)
 KBUILD_CFLAGS	+= $(call cc-option, -mpc-relative-literal-loads)
+KBUILD_CFLAGS	+= -fno-asynchronous-unwind-tables
 KBUILD_AFLAGS	+= $(lseinstr)
 
 ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)

From 7c584b74f039645457bb762f5171e2de515720e4 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:44:55 +0000
Subject: [PATCH 721/797] asm-generic: Fix local variable shadow in
 __set_fixmap_offset

Currently __set_fixmap_offset is a macro function which has a local
variable called 'addr'. If a caller passes a 'phys' parameter which is
derived from a variable also called 'addr', the local variable will
shadow this, and the compiler will complain about the use of an
uninitialized variable. To avoid the issue with namespace clashes,
'addr' is prefixed with a liberal sprinkling of underscores.

Turning __set_fixmap_offset into a static inline breaks the build for
several architectures. Fixing this properly requires updates to a number
of architectures to make them agree on the prototype of __set_fixmap (it
could be done as a subsequent patch series).

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
[catalin.marinas@arm.com: squashed the original function patch and macro fixup]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

(cherry picked from commit 3694bd76781b76c4f8d2ecd85018feeb1609f0e5)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 include/asm-generic/fixmap.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/asm-generic/fixmap.h b/include/asm-generic/fixmap.h
index 1cbb8338edf3..827e4d3bbc7a 100644
--- a/include/asm-generic/fixmap.h
+++ b/include/asm-generic/fixmap.h
@@ -70,12 +70,12 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr)
 #endif
 
 /* Return a pointer with offset calculated */
-#define __set_fixmap_offset(idx, phys, flags)		      \
-({							      \
-	unsigned long addr;				      \
-	__set_fixmap(idx, phys, flags);			      \
-	addr = fix_to_virt(idx) + ((phys) & (PAGE_SIZE - 1)); \
-	addr;						      \
+#define __set_fixmap_offset(idx, phys, flags)				\
+({									\
+	unsigned long ________addr;					\
+	__set_fixmap(idx, phys, flags);					\
+	________addr = fix_to_virt(idx) + ((phys) & (PAGE_SIZE - 1));	\
+	________addr;							\
 })
 
 #define set_fixmap_offset(idx, phys) \

From c8403d828ed9741a4a9d820c829d71211685b659 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:44:56 +0000
Subject: [PATCH 722/797] arm64: mm: specialise pagetable allocators

We pass a size parameter to early_alloc and late_alloc, but these are
only ever used to allocate single pages. In late_alloc we always
allocate a single page.

Both allocators provide us with zeroed pages (such that all entries are
invalid), but we have no barriers between allocating a page and adding
that page to existing (live) tables. A concurrent page table walk may
see stale data, leading to a number of issues.

This patch specialises the two allocators for page tables. The size
parameter is removed and the necessary dsb(ishst) is folded into each.
To make it clear that the functions are intended for use for page table
allocation, they are renamed to {early,late}_pgtable_alloc, with the
related function pointed renamed to pgtable_alloc.

As the dsb(ishst) is now in the allocator, the existing barrier for the
zero page is redundant and thus is removed. The previously missing
include of barrier.h is added.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 21ab99c289d350f4ae454bc069870009db6df20e)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/mmu.c | 52 +++++++++++++++++++++++----------------------
 1 file changed, 27 insertions(+), 25 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index c5bd5bca8e3d..3ed128c96618 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -30,6 +30,7 @@
 #include <linux/slab.h>
 #include <linux/stop_machine.h>
 
+#include <asm/barrier.h>
 #include <asm/cputype.h>
 #include <asm/fixmap.h>
 #include <asm/kernel-pgtable.h>
@@ -62,15 +63,18 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 }
 EXPORT_SYMBOL(phys_mem_access_prot);
 
-static void __init *early_alloc(unsigned long sz)
+static void __init *early_pgtable_alloc(void)
 {
 	phys_addr_t phys;
 	void *ptr;
 
-	phys = memblock_alloc(sz, sz);
+	phys = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
 	BUG_ON(!phys);
 	ptr = __va(phys);
-	memset(ptr, 0, sz);
+	memset(ptr, 0, PAGE_SIZE);
+
+	/* Ensure the zeroed page is visible to the page table walker */
+	dsb(ishst);
 	return ptr;
 }
 
@@ -95,12 +99,12 @@ static void split_pmd(pmd_t *pmd, pte_t *pte)
 static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
 				  unsigned long end, unsigned long pfn,
 				  pgprot_t prot,
-				  void *(*alloc)(unsigned long size))
+				  void *(*pgtable_alloc)(void))
 {
 	pte_t *pte;
 
 	if (pmd_none(*pmd) || pmd_sect(*pmd)) {
-		pte = alloc(PTRS_PER_PTE * sizeof(pte_t));
+		pte = pgtable_alloc();
 		if (pmd_sect(*pmd))
 			split_pmd(pmd, pte);
 		__pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE);
@@ -130,7 +134,7 @@ static void split_pud(pud_t *old_pud, pmd_t *pmd)
 static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
 				  unsigned long addr, unsigned long end,
 				  phys_addr_t phys, pgprot_t prot,
-				  void *(*alloc)(unsigned long size))
+				  void *(*pgtable_alloc)(void))
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -139,7 +143,7 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
 	 * Check for initial section mappings in the pgd/pud and remove them.
 	 */
 	if (pud_none(*pud) || pud_sect(*pud)) {
-		pmd = alloc(PTRS_PER_PMD * sizeof(pmd_t));
+		pmd = pgtable_alloc();
 		if (pud_sect(*pud)) {
 			/*
 			 * need to have the 1G of mappings continue to be
@@ -174,7 +178,7 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
 			}
 		} else {
 			alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
-				       prot, alloc);
+				       prot, pgtable_alloc);
 		}
 		phys += next - addr;
 	} while (pmd++, addr = next, addr != end);
@@ -195,13 +199,13 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next,
 static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
 				  unsigned long addr, unsigned long end,
 				  phys_addr_t phys, pgprot_t prot,
-				  void *(*alloc)(unsigned long size))
+				  void *(*pgtable_alloc)(void))
 {
 	pud_t *pud;
 	unsigned long next;
 
 	if (pgd_none(*pgd)) {
-		pud = alloc(PTRS_PER_PUD * sizeof(pud_t));
+		pud = pgtable_alloc();
 		pgd_populate(mm, pgd, pud);
 	}
 	BUG_ON(pgd_bad(*pgd));
@@ -234,7 +238,8 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
 				}
 			}
 		} else {
-			alloc_init_pmd(mm, pud, addr, next, phys, prot, alloc);
+			alloc_init_pmd(mm, pud, addr, next, phys, prot,
+				       pgtable_alloc);
 		}
 		phys += next - addr;
 	} while (pud++, addr = next, addr != end);
@@ -247,7 +252,7 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
 static void  __create_mapping(struct mm_struct *mm, pgd_t *pgd,
 				    phys_addr_t phys, unsigned long virt,
 				    phys_addr_t size, pgprot_t prot,
-				    void *(*alloc)(unsigned long size))
+				    void *(*pgtable_alloc)(void))
 {
 	unsigned long addr, length, end, next;
 
@@ -265,18 +270,18 @@ static void  __create_mapping(struct mm_struct *mm, pgd_t *pgd,
 	end = addr + length;
 	do {
 		next = pgd_addr_end(addr, end);
-		alloc_init_pud(mm, pgd, addr, next, phys, prot, alloc);
+		alloc_init_pud(mm, pgd, addr, next, phys, prot, pgtable_alloc);
 		phys += next - addr;
 	} while (pgd++, addr = next, addr != end);
 }
 
-static void *late_alloc(unsigned long size)
+static void *late_pgtable_alloc(void)
 {
-	void *ptr;
-
-	BUG_ON(size > PAGE_SIZE);
-	ptr = (void *)__get_free_page(PGALLOC_GFP);
+	void *ptr = (void *)__get_free_page(PGALLOC_GFP);
 	BUG_ON(!ptr);
+
+	/* Ensure the zeroed page is visible to the page table walker */
+	dsb(ishst);
 	return ptr;
 }
 
@@ -289,7 +294,7 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt,
 		return;
 	}
 	__create_mapping(&init_mm, pgd_offset_k(virt), phys, virt,
-			 size, prot, early_alloc);
+			 size, prot, early_pgtable_alloc);
 }
 
 void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
@@ -297,7 +302,7 @@ void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
 			       pgprot_t prot)
 {
 	__create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot,
-				late_alloc);
+				late_pgtable_alloc);
 }
 
 static void create_mapping_late(phys_addr_t phys, unsigned long virt,
@@ -310,7 +315,7 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt,
 	}
 
 	return __create_mapping(&init_mm, pgd_offset_k(virt),
-				phys, virt, size, prot, late_alloc);
+				phys, virt, size, prot, late_pgtable_alloc);
 }
 
 #ifdef CONFIG_DEBUG_RODATA
@@ -458,15 +463,12 @@ void __init paging_init(void)
 	fixup_executable();
 
 	/* allocate the zero page. */
-	zero_page = early_alloc(PAGE_SIZE);
+	zero_page = early_pgtable_alloc();
 
 	bootmem_init();
 
 	empty_zero_page = virt_to_page(zero_page);
 
-	/* Ensure the zero page is visible to the page table walker */
-	dsb(ishst);
-
 	/*
 	 * TTBR0 is only used for the identity mapping at this stage. Make it
 	 * point to zero page to avoid speculatively fetching new entries.

From a4593c91bbb59e89df1aefe677493ff364a7ddb2 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:44:57 +0000
Subject: [PATCH 723/797] arm64: mm: place empty_zero_page in bss

Currently the zero page is set up in paging_init, and thus we cannot use
the zero page earlier. We use the zero page as a reserved TTBR value
from which no TLB entries may be allocated (e.g. when uninstalling the
idmap). To enable such usage earlier (as may be required for invasive
changes to the kernel page tables), and to minimise the time that the
idmap is active, we need to be able to use the zero page before
paging_init.

This patch follows the example set by x86, by allocating the zero page
at compile time, in .bss. This means that the zero page itself is
available immediately upon entry to start_kernel (as we zero .bss before
this), and also means that the zero page takes up no space in the raw
Image binary. The associated struct page is allocated in bootmem_init,
and remains unavailable until this time.

Outside of arch code, the only users of empty_zero_page assume that the
empty_zero_page symbol refers to the zeroed memory itself, and that
ZERO_PAGE(x) must be used to acquire the associated struct page,
following the example of x86. This patch also brings arm64 inline with
these assumptions.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 5227cfa71f9e8574373f4d0e9e754942d76cdf67)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/mmu_context.h | 2 +-
 arch/arm64/include/asm/pgtable.h     | 4 ++--
 arch/arm64/kernel/head.S             | 1 +
 arch/arm64/mm/mmu.c                  | 9 +--------
 4 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 24165784b803..600eacb9f7d5 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -48,7 +48,7 @@ static inline void contextidr_thread_switch(struct task_struct *next)
  */
 static inline void cpu_set_reserved_ttbr0(void)
 {
-	unsigned long ttbr = page_to_phys(empty_zero_page);
+	unsigned long ttbr = virt_to_phys(empty_zero_page);
 
 	asm(
 	"	msr	ttbr0_el1, %0			// set TTBR0\n"
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 2daf88970731..8a76e603d737 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -123,8 +123,8 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
  * ZERO_PAGE is a global shared page that is always zero: used
  * for zero-mapped memory areas etc..
  */
-extern struct page *empty_zero_page;
-#define ZERO_PAGE(vaddr)	(empty_zero_page)
+extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
+#define ZERO_PAGE(vaddr)	virt_to_page(empty_zero_page)
 
 #define pte_ERROR(pte)		__pte_error(__FILE__, __LINE__, pte_val(pte))
 
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 917d98108b3f..53b9f9f128c2 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -421,6 +421,7 @@ __mmap_switched:
 	adr_l	x2, __bss_stop
 	sub	x2, x2, x0
 	bl	__pi_memset
+	dsb	ishst				// Make zero page visible to PTW
 
 	adr_l	sp, initial_sp, x4
 	mov	x4, sp
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 3ed128c96618..e4932aa6c6e9 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -49,7 +49,7 @@ u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
  * Empty_zero_page is a special page that is used for zero-initialized data
  * and COW.
  */
-struct page *empty_zero_page;
+unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
 EXPORT_SYMBOL(empty_zero_page);
 
 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
@@ -457,18 +457,11 @@ void fixup_init(void)
  */
 void __init paging_init(void)
 {
-	void *zero_page;
-
 	map_mem();
 	fixup_executable();
 
-	/* allocate the zero page. */
-	zero_page = early_pgtable_alloc();
-
 	bootmem_init();
 
-	empty_zero_page = virt_to_page(zero_page);
-
 	/*
 	 * TTBR0 is only used for the identity mapping at this stage. Make it
 	 * point to zero page to avoid speculatively fetching new entries.

From 0dedc3948e315aff0f382a58c0d387547d8a35b5 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:44:58 +0000
Subject: [PATCH 724/797] arm64: unify idmap removal

We currently open-code the removal of the idmap and restoration of the
current task's MMU state in a few places.

Before introducing yet more copies of this sequence, unify these to call
a new helper, cpu_uninstall_idmap.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 9e8e865bbe294a69666a1996bda3e87825b258c0)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/mmu_context.h | 25 +++++++++++++++++++++++++
 arch/arm64/kernel/setup.c            |  1 +
 arch/arm64/kernel/smp.c              |  4 +---
 arch/arm64/kernel/suspend.c          | 20 ++++----------------
 arch/arm64/mm/mmu.c                  |  4 +---
 5 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 600eacb9f7d5..b1b2514d8883 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -27,6 +27,7 @@
 #include <asm-generic/mm_hooks.h>
 #include <asm/cputype.h>
 #include <asm/pgtable.h>
+#include <asm/tlbflush.h>
 
 #ifdef CONFIG_PID_IN_CONTEXTIDR
 static inline void contextidr_thread_switch(struct task_struct *next)
@@ -89,6 +90,30 @@ static inline void cpu_set_default_tcr_t0sz(void)
 	: "r"(TCR_T0SZ(VA_BITS)), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
 }
 
+/*
+ * Remove the idmap from TTBR0_EL1 and install the pgd of the active mm.
+ *
+ * The idmap lives in the same VA range as userspace, but uses global entries
+ * and may use a different TCR_EL1.T0SZ. To avoid issues resulting from
+ * speculative TLB fetches, we must temporarily install the reserved page
+ * tables while we invalidate the TLBs and set up the correct TCR_EL1.T0SZ.
+ *
+ * If current is a not a user task, the mm covers the TTBR1_EL1 page tables,
+ * which should not be installed in TTBR0_EL1. In this case we can leave the
+ * reserved page tables in place.
+ */
+static inline void cpu_uninstall_idmap(void)
+{
+	struct mm_struct *mm = current->active_mm;
+
+	cpu_set_reserved_ttbr0();
+	local_flush_tlb_all();
+	cpu_set_default_tcr_t0sz();
+
+	if (mm != &init_mm)
+		cpu_switch_mm(mm->pgd, mm);
+}
+
 /*
  * It would be nice to return ASIDs back to the allocator, but unfortunately
  * that introduces a race with a generation rollover where we could erroneously
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 8119479147db..f6621ba071f9 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -62,6 +62,7 @@
 #include <asm/memblock.h>
 #include <asm/efi.h>
 #include <asm/xen/hypervisor.h>
+#include <asm/mmu_context.h>
 
 phys_addr_t __fdt_pointer __initdata;
 
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index b1adc51b2c2e..68e7f79630d4 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -149,9 +149,7 @@ asmlinkage void secondary_start_kernel(void)
 	 * TTBR0 is only used for the identity mapping at this stage. Make it
 	 * point to zero page to avoid speculatively fetching new entries.
 	 */
-	cpu_set_reserved_ttbr0();
-	local_flush_tlb_all();
-	cpu_set_default_tcr_t0sz();
+	cpu_uninstall_idmap();
 
 	preempt_disable();
 	trace_hardirqs_off();
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 1095aa483a1c..66055392f445 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -60,7 +60,6 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *))
  */
 int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
 {
-	struct mm_struct *mm = current->active_mm;
 	int ret;
 	unsigned long flags;
 
@@ -87,22 +86,11 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
 	ret = __cpu_suspend_enter(arg, fn);
 	if (ret == 0) {
 		/*
-		 * We are resuming from reset with TTBR0_EL1 set to the
-		 * idmap to enable the MMU; set the TTBR0 to the reserved
-		 * page tables to prevent speculative TLB allocations, flush
-		 * the local tlb and set the default tcr_el1.t0sz so that
-		 * the TTBR0 address space set-up is properly restored.
-		 * If the current active_mm != &init_mm we entered cpu_suspend
-		 * with mappings in TTBR0 that must be restored, so we switch
-		 * them back to complete the address space configuration
-		 * restoration before returning.
+		 * We are resuming from reset with the idmap active in TTBR0_EL1.
+		 * We must uninstall the idmap and restore the expected MMU
+		 * state before we can possibly return to userspace.
 		 */
-		cpu_set_reserved_ttbr0();
-		local_flush_tlb_all();
-		cpu_set_default_tcr_t0sz();
-
-		if (mm != &init_mm)
-			cpu_switch_mm(mm->pgd, mm);
+		cpu_uninstall_idmap();
 
 		/*
 		 * Restore per-cpu offset before any kernel
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index e4932aa6c6e9..dcc06b23b37f 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -466,9 +466,7 @@ void __init paging_init(void)
 	 * TTBR0 is only used for the identity mapping at this stage. Make it
 	 * point to zero page to avoid speculatively fetching new entries.
 	 */
-	cpu_set_reserved_ttbr0();
-	local_flush_tlb_all();
-	cpu_set_default_tcr_t0sz();
+	cpu_uninstall_idmap();
 }
 
 /*

From 7ed029beef4adcab0ad59356579f6fea71655895 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:44:59 +0000
Subject: [PATCH 725/797] arm64: unmap idmap earlier

During boot we leave the idmap in place until paging_init, as we
previously had to wait for the zero page to become allocated and
accessible.

Now that we have a statically-allocated zero page, we can uninstall the
idmap much earlier in the boot process, making it far easier to spot
accidental use of physical addresses. This also brings the cold boot
path in line with the secondary boot path.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 86ccce896cb0aa800a7a6dcd29b41ffc4eeb1a75)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/setup.c | 6 ++++++
 arch/arm64/mm/mmu.c       | 6 ------
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index f6621ba071f9..cfed56f0ad26 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -314,6 +314,12 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	local_async_enable();
 
+	/*
+	 * TTBR0 is only used for the identity mapping at this stage. Make it
+	 * point to zero page to avoid speculatively fetching new entries.
+	 */
+	cpu_uninstall_idmap();
+
 	efi_init();
 	arm64_memblock_init();
 
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index dcc06b23b37f..8587ed9d81b6 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -461,12 +461,6 @@ void __init paging_init(void)
 	fixup_executable();
 
 	bootmem_init();
-
-	/*
-	 * TTBR0 is only used for the identity mapping at this stage. Make it
-	 * point to zero page to avoid speculatively fetching new entries.
-	 */
-	cpu_uninstall_idmap();
 }
 
 /*

From 34903bb8c69405ec6eb2b2d437fabd0571df94bb Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:45:00 +0000
Subject: [PATCH 726/797] arm64: add function to install the idmap

In some cases (e.g. when making invasive changes to the kernel page
tables) we will need to execute code from the idmap.

Add a new helper which may be used to install the idmap, complementing
the existing cpu_uninstall_idmap.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 609116d202a8c5fd3fe393eb85373cbee906df68)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/mmu_context.h | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index b1b2514d8883..944f2730a940 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -74,7 +74,7 @@ static inline bool __cpu_uses_extended_idmap(void)
 /*
  * Set TCR.T0SZ to its default value (based on VA_BITS)
  */
-static inline void cpu_set_default_tcr_t0sz(void)
+static inline void __cpu_set_tcr_t0sz(unsigned long t0sz)
 {
 	unsigned long tcr;
 
@@ -87,9 +87,12 @@ static inline void cpu_set_default_tcr_t0sz(void)
 	"	msr	tcr_el1, %0	;"
 	"	isb"
 	: "=&r" (tcr)
-	: "r"(TCR_T0SZ(VA_BITS)), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
+	: "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
 }
 
+#define cpu_set_default_tcr_t0sz()	__cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS))
+#define cpu_set_idmap_tcr_t0sz()	__cpu_set_tcr_t0sz(idmap_t0sz)
+
 /*
  * Remove the idmap from TTBR0_EL1 and install the pgd of the active mm.
  *
@@ -114,6 +117,15 @@ static inline void cpu_uninstall_idmap(void)
 		cpu_switch_mm(mm->pgd, mm);
 }
 
+static inline void cpu_install_idmap(void)
+{
+	cpu_set_reserved_ttbr0();
+	local_flush_tlb_all();
+	cpu_set_idmap_tcr_t0sz();
+
+	cpu_switch_mm(idmap_pg_dir, &init_mm);
+}
+
 /*
  * It would be nice to return ASIDs back to the allocator, but unfortunately
  * that introduces a race with a generation rollover where we could erroneously

From 34fc059805c7dfa19d0d9d1e008fe83f7744c0ed Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:45:01 +0000
Subject: [PATCH 727/797] arm64: mm: add code to safely replace TTBR1_EL1

If page tables are modified without suitable TLB maintenance, the ARM
architecture permits multiple TLB entries to be allocated for the same
VA. When this occurs, it is permitted that TLB conflict aborts are
raised in response to synchronous data/instruction accesses, and/or and
amalgamation of the TLB entries may be used as a result of a TLB lookup.

The presence of conflicting TLB entries may result in a variety of
behaviours detrimental to the system (e.g. erroneous physical addresses
may be used by I-cache fetches and/or page table walks). Some of these
cases may result in unexpected changes of hardware state, and/or result
in the (asynchronous) delivery of SError.

To avoid these issues, we must avoid situations where conflicting
entries may be allocated into TLBs. For user and module mappings we can
follow a strict break-before-make approach, but this cannot work for
modifications to the swapper page tables that cover the kernel text and
data.

Instead, this patch adds code which is intended to be executed from the
idmap, which can safely unmap the swapper page tables as it only
requires the idmap to be active. This enables us to uninstall the active
TTBR1_EL1 entry, invalidate TLBs, then install a new TTBR1_EL1 entry
without potentially unmapping code or data required for the sequence.
This avoids the risk of conflict, but requires that updates are staged
in a copy of the swapper page tables prior to being installed.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 50e1881ddde2a986c7d0d2150985239e5e3d7d96)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/mmu_context.h | 19 +++++++++++++++++++
 arch/arm64/mm/proc.S                 | 28 ++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+)

diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 944f2730a940..a00f7cf35bbd 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -126,6 +126,25 @@ static inline void cpu_install_idmap(void)
 	cpu_switch_mm(idmap_pg_dir, &init_mm);
 }
 
+/*
+ * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
+ * avoiding the possibility of conflicting TLB entries being allocated.
+ */
+static inline void cpu_replace_ttbr1(pgd_t *pgd)
+{
+	typedef void (ttbr_replace_func)(phys_addr_t);
+	extern ttbr_replace_func idmap_cpu_replace_ttbr1;
+	ttbr_replace_func *replace_phys;
+
+	phys_addr_t pgd_phys = virt_to_phys(pgd);
+
+	replace_phys = (void *)virt_to_phys(idmap_cpu_replace_ttbr1);
+
+	cpu_install_idmap();
+	replace_phys(pgd_phys);
+	cpu_uninstall_idmap();
+}
+
 /*
  * It would be nice to return ASIDs back to the allocator, but unfortunately
  * that introduces a race with a generation rollover where we could erroneously
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index c164d2cb35c0..0c19534a901e 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -140,6 +140,34 @@ ENTRY(cpu_do_switch_mm)
 	ret
 ENDPROC(cpu_do_switch_mm)
 
+	.pushsection ".idmap.text", "ax"
+/*
+ * void idmap_cpu_replace_ttbr1(phys_addr_t new_pgd)
+ *
+ * This is the low-level counterpart to cpu_replace_ttbr1, and should not be
+ * called by anything else. It can only be executed from a TTBR0 mapping.
+ */
+ENTRY(idmap_cpu_replace_ttbr1)
+	mrs	x2, daif
+	msr	daifset, #0xf
+
+	adrp	x1, empty_zero_page
+	msr	ttbr1_el1, x1
+	isb
+
+	tlbi	vmalle1
+	dsb	nsh
+	isb
+
+	msr	ttbr1_el1, x0
+	isb
+
+	msr	daif, x2
+
+	ret
+ENDPROC(idmap_cpu_replace_ttbr1)
+	.popsection
+
 /*
  *	__cpu_setup
  *

From 8b8513de45f823e10fce3b1563953be70483941b Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:45:02 +0000
Subject: [PATCH 728/797] arm64: kasan: avoid TLB conflicts

The page table modification performed during the KASAN init risks the
allocation of conflicting TLB entries, as it swaps a set of valid global
entries for another without suitable TLB maintenance.

The presence of conflicting TLB entries can result in the delivery of
synchronous TLB conflict aborts, or may result in the use of erroneous
data being returned in response to a TLB lookup. This can affect
explicit data accesses from software as well as translations performed
asynchronously (e.g. as part of page table walks or speculative I-cache
fetches), and can therefore result in a wide variety of problems.

To avoid this, use cpu_replace_ttbr1 to swap the page tables. This
ensures that when the new tables are installed there are no stale
entries from the old tables which may conflict. As all updates are made
to the tables while they are not active, the updates themselves are
safe.

At the same time, add the missing barrier to ensure that the tmp_pg_dir
entries updated via memcpy are visible to the page table walkers at the
point the tmp_pg_dir is installed. All other page table updates made as
part of KASAN initialisation have the requisite barriers due to the use
of the standard page table accessors.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit c1a88e9124a499939ebd8069d5e4d3937f019157)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/kasan_init.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index cab7a5be40aa..263b59020500 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -16,6 +16,7 @@
 #include <linux/memblock.h>
 #include <linux/start_kernel.h>
 
+#include <asm/mmu_context.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
@@ -108,15 +109,6 @@ static void __init clear_pgds(unsigned long start,
 		set_pgd(pgd_offset_k(start), __pgd(0));
 }
 
-static void __init cpu_set_ttbr1(unsigned long ttbr1)
-{
-	asm(
-	"	msr	ttbr1_el1, %0\n"
-	"	isb"
-	:
-	: "r" (ttbr1));
-}
-
 void __init kasan_init(void)
 {
 	struct memblock_region *reg;
@@ -130,8 +122,8 @@ void __init kasan_init(void)
 	 * setup will be finished.
 	 */
 	memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
-	cpu_set_ttbr1(__pa(tmp_pg_dir));
-	flush_tlb_all();
+	dsb(ishst);
+	cpu_replace_ttbr1(tmp_pg_dir);
 
 	clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
 
@@ -165,8 +157,7 @@ void __init kasan_init(void)
 			pfn_pte(virt_to_pfn(kasan_zero_page), PAGE_KERNEL_RO));
 
 	memset(kasan_zero_page, 0, PAGE_SIZE);
-	cpu_set_ttbr1(__pa(swapper_pg_dir));
-	flush_tlb_all();
+	cpu_replace_ttbr1(swapper_pg_dir);
 
 	/* At this point kasan is fully initialized. Enable error messages */
 	init_task.kasan_depth = 0;

From febef18a360df3f8b4b364167e8fe050b24ccf65 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:45:03 +0000
Subject: [PATCH 729/797] arm64: mm: move pte_* macros

For pmd, pud, and pgd levels of table, functions including p?d_index and
p?d_offset are defined after the p?d_page_vaddr function for the
immediately higher level of table.

The pte functions however are defined much earlier, even though several
rely on the later definition of pmd_page_vaddr. While this isn't
currently a problem as these are macros, it prevents the logical
grouping of later C functions (which cannot rely on prototypes for
functions not yet defined).

Move these definitions after pmd_page_vaddr, for consistency with the
placement of these functions for other levels of table.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 053520f7d3923cc6d37afb28f9887cb1e7d77454)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/pgtable.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 8a76e603d737..d439523a7910 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -136,16 +136,6 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 #define pte_clear(mm,addr,ptep)	set_pte(ptep, __pte(0))
 #define pte_page(pte)		(pfn_to_page(pte_pfn(pte)))
 
-/* Find an entry in the third-level page table. */
-#define pte_index(addr)		(((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
-
-#define pte_offset_kernel(dir,addr)	(pmd_page_vaddr(*(dir)) + pte_index(addr))
-
-#define pte_offset_map(dir,addr)	pte_offset_kernel((dir), (addr))
-#define pte_offset_map_nested(dir,addr)	pte_offset_kernel((dir), (addr))
-#define pte_unmap(pte)			do { } while (0)
-#define pte_unmap_nested(pte)		do { } while (0)
-
 /*
  * The following only work if pte_present(). Undefined behaviour otherwise.
  */
@@ -447,6 +437,16 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd)
 	return __va(pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK);
 }
 
+/* Find an entry in the third-level page table. */
+#define pte_index(addr)		(((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+
+#define pte_offset_kernel(dir,addr)	(pmd_page_vaddr(*(dir)) + pte_index(addr))
+
+#define pte_offset_map(dir,addr)	pte_offset_kernel((dir), (addr))
+#define pte_offset_map_nested(dir,addr)	pte_offset_kernel((dir), (addr))
+#define pte_unmap(pte)			do { } while (0)
+#define pte_unmap_nested(pte)		do { } while (0)
+
 #define pmd_page(pmd)		pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
 
 /*

From 77ea11473be30e0b90b32deb650b6403ad291a12 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:45:04 +0000
Subject: [PATCH 730/797] arm64: mm: add functions to walk page tables by PA

To allow us to walk tables allocated into the fixmap, we need to acquire
the physical address of a page, rather than the virtual address in the
linear map.

This patch adds new p??_page_paddr and p??_offset_phys functions to
acquire the physical address of a next-level table, and changes
p??_offset* into macros which simply convert this to a linear map VA.
This renders p??_page_vaddr unused, and hence they are removed.

At the pgd level, a new pgd_offset_raw function is added to find the
relevant PGD entry given the base of a PGD and a virtual address.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit dca56dca7124709f3dfca81afe61b4d98eb9cacf)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/pgtable.h | 39 +++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index d439523a7910..db608e7984c6 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -432,15 +432,16 @@ static inline void pmd_clear(pmd_t *pmdp)
 	set_pmd(pmdp, __pmd(0));
 }
 
-static inline pte_t *pmd_page_vaddr(pmd_t pmd)
+static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
 {
-	return __va(pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK);
+	return pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK;
 }
 
 /* Find an entry in the third-level page table. */
 #define pte_index(addr)		(((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 
-#define pte_offset_kernel(dir,addr)	(pmd_page_vaddr(*(dir)) + pte_index(addr))
+#define pte_offset_phys(dir,addr)	(pmd_page_paddr(*(dir)) + pte_index(addr) * sizeof(pte_t))
+#define pte_offset_kernel(dir,addr)	((pte_t *)__va(pte_offset_phys((dir), (addr))))
 
 #define pte_offset_map(dir,addr)	pte_offset_kernel((dir), (addr))
 #define pte_offset_map_nested(dir,addr)	pte_offset_kernel((dir), (addr))
@@ -475,21 +476,23 @@ static inline void pud_clear(pud_t *pudp)
 	set_pud(pudp, __pud(0));
 }
 
-static inline pmd_t *pud_page_vaddr(pud_t pud)
+static inline phys_addr_t pud_page_paddr(pud_t pud)
 {
-	return __va(pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK);
+	return pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK;
 }
 
 /* Find an entry in the second-level page table. */
 #define pmd_index(addr)		(((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
 
-static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
-{
-	return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr);
-}
+#define pmd_offset_phys(dir, addr)	(pud_page_paddr(*(dir)) + pmd_index(addr) * sizeof(pmd_t))
+#define pmd_offset(dir, addr)		((pmd_t *)__va(pmd_offset_phys((dir), (addr))))
 
 #define pud_page(pud)		pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK))
 
+#else
+
+#define pud_page_paddr(pud)	({ BUILD_BUG(); 0; })
+
 #endif	/* CONFIG_PGTABLE_LEVELS > 2 */
 
 #if CONFIG_PGTABLE_LEVELS > 3
@@ -511,21 +514,23 @@ static inline void pgd_clear(pgd_t *pgdp)
 	set_pgd(pgdp, __pgd(0));
 }
 
-static inline pud_t *pgd_page_vaddr(pgd_t pgd)
+static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
 {
-	return __va(pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK);
+	return pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK;
 }
 
 /* Find an entry in the frst-level page table. */
 #define pud_index(addr)		(((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
 
-static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr)
-{
-	return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(addr);
-}
+#define pud_offset_phys(dir, addr)	(pgd_page_paddr(*(dir)) + pud_index(addr) * sizeof(pud_t))
+#define pud_offset(dir, addr)		((pud_t *)__va(pud_offset_phys((dir), (addr))))
 
 #define pgd_page(pgd)		pfn_to_page(__phys_to_pfn(pgd_val(pgd) & PHYS_MASK))
 
+#else
+
+#define pgd_page_paddr(pgd)	({ BUILD_BUG(); 0;})
+
 #endif  /* CONFIG_PGTABLE_LEVELS > 3 */
 
 #define pgd_ERROR(pgd)		__pgd_error(__FILE__, __LINE__, pgd_val(pgd))
@@ -533,7 +538,9 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr)
 /* to find an entry in a page-table-directory */
 #define pgd_index(addr)		(((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
 
-#define pgd_offset(mm, addr)	((mm)->pgd+pgd_index(addr))
+#define pgd_offset_raw(pgd, addr)	((pgd) + pgd_index(addr))
+
+#define pgd_offset(mm, addr)	(pgd_offset_raw((mm)->pgd, (addr)))
 
 /* to find an entry in a kernel page-table-directory */
 #define pgd_offset_k(addr)	pgd_offset(&init_mm, addr)

From 6f74a379d1593fe728141747c6c594758cc5d328 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:45:05 +0000
Subject: [PATCH 731/797] arm64: mm: avoid redundant __pa(__va(x))

When we "upgrade" to a section mapping, we free any table we made
redundant by giving it back to memblock. To get the PA, we acquire the
physical address and convert this to a VA, then subsequently convert
this back to a PA.

This works currently, but will not work if the tables are not accessed
via linear map VAs (e.g. is we use fixmap slots).

This patch uses {pmd,pud}_page_paddr to acquire the PA. This avoids the
__pa(__va()) round trip, saving some work and avoiding reliance on the
linear mapping.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 316b39db06718d59d82736df9fc65cf05b467cc7)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/mmu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 8587ed9d81b6..0b6c8727bcd1 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -171,7 +171,7 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
 			if (!pmd_none(old_pmd)) {
 				flush_tlb_all();
 				if (pmd_table(old_pmd)) {
-					phys_addr_t table = __pa(pte_offset_map(&old_pmd, 0));
+					phys_addr_t table = pmd_page_paddr(old_pmd);
 					if (!WARN_ON_ONCE(slab_is_available()))
 						memblock_free(table, PAGE_SIZE);
 				}
@@ -232,7 +232,7 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
 			if (!pud_none(old_pud)) {
 				flush_tlb_all();
 				if (pud_table(old_pud)) {
-					phys_addr_t table = __pa(pmd_offset(&old_pud, 0));
+					phys_addr_t table = pud_page_paddr(old_pud);
 					if (!WARN_ON_ONCE(slab_is_available()))
 						memblock_free(table, PAGE_SIZE);
 				}

From 0061f781a0fe83adfe76f78f89653cbd41249a93 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:45:06 +0000
Subject: [PATCH 732/797] arm64: mm: add __{pud,pgd}_populate

We currently have __pmd_populate for creating a pmd table entry given
the physical address of a pte, but don't have equivalents for the pud or
pgd levels of table.

To enable us to manipulate tables which are mapped outside of the linear
mapping (where we have a PA, but not a linear map VA), it is useful to
have these functions.

This patch adds __{pud,pgd}_populate. As these should not be called when
the kernel uses folded {pmd,pud}s, in these cases they expand to
BUILD_BUG(). So long as the appropriate checks are made on the {pud,pgd}
entry prior to attempting population, these should be optimized out at
compile time.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 1e531cce68c92b46c7d29f36a72f9a3e5886678f)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/pgalloc.h | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index c15053902942..ff98585d085a 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -42,11 +42,20 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 	free_page((unsigned long)pmd);
 }
 
-static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot)
 {
-	set_pud(pud, __pud(__pa(pmd) | PMD_TYPE_TABLE));
+	set_pud(pud, __pud(pmd | prot));
 }
 
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+	__pud_populate(pud, __pa(pmd), PMD_TYPE_TABLE);
+}
+#else
+static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot)
+{
+	BUILD_BUG();
+}
 #endif	/* CONFIG_PGTABLE_LEVELS > 2 */
 
 #if CONFIG_PGTABLE_LEVELS > 3
@@ -62,11 +71,20 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 	free_page((unsigned long)pud);
 }
 
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot)
 {
-	set_pgd(pgd, __pgd(__pa(pud) | PUD_TYPE_TABLE));
+	set_pgd(pgdp, __pgd(pud | prot));
 }
 
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+{
+	__pgd_populate(pgd, __pa(pud), PUD_TYPE_TABLE);
+}
+#else
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot)
+{
+	BUILD_BUG();
+}
 #endif	/* CONFIG_PGTABLE_LEVELS > 3 */
 
 extern pgd_t *pgd_alloc(struct mm_struct *mm);

From 9568281b80e91974c04f6aaac50ecfd4dcf31df1 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:45:07 +0000
Subject: [PATCH 733/797] arm64: mm: add functions to walk tables in fixmap

As a preparatory step to allow us to allocate early page tables from
unmapped memory using memblock_alloc, add new p??_{set,clear}_fixmap*
functions which can be used to walk page tables outside of the linear
mapping by using fixmap slots.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 961faac114819a01e627fe9c9c82b830bb3849d4)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/fixmap.h  | 10 ++++++++++
 arch/arm64/include/asm/pgtable.h | 26 ++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index 309704544d22..1a617d46fce9 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -62,6 +62,16 @@ enum fixed_addresses {
 
 	FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
 	FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
+
+	/*
+	 * Used for kernel page table creation, so unmapped memory may be used
+	 * for tables.
+	 */
+	FIX_PTE,
+	FIX_PMD,
+	FIX_PUD,
+	FIX_PGD,
+
 	__end_of_fixed_addresses
 };
 
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index db608e7984c6..c99dfc588deb 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -59,6 +59,7 @@
 
 #ifndef __ASSEMBLY__
 
+#include <asm/fixmap.h>
 #include <linux/mmdebug.h>
 
 extern void __pte_error(const char *file, int line, unsigned long val);
@@ -448,6 +449,10 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
 #define pte_unmap(pte)			do { } while (0)
 #define pte_unmap_nested(pte)		do { } while (0)
 
+#define pte_set_fixmap(addr)		((pte_t *)set_fixmap_offset(FIX_PTE, addr))
+#define pte_set_fixmap_offset(pmd, addr)	pte_set_fixmap(pte_offset_phys(pmd, addr))
+#define pte_clear_fixmap()		clear_fixmap(FIX_PTE)
+
 #define pmd_page(pmd)		pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
 
 /*
@@ -487,12 +492,21 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
 #define pmd_offset_phys(dir, addr)	(pud_page_paddr(*(dir)) + pmd_index(addr) * sizeof(pmd_t))
 #define pmd_offset(dir, addr)		((pmd_t *)__va(pmd_offset_phys((dir), (addr))))
 
+#define pmd_set_fixmap(addr)		((pmd_t *)set_fixmap_offset(FIX_PMD, addr))
+#define pmd_set_fixmap_offset(pud, addr)	pmd_set_fixmap(pmd_offset_phys(pud, addr))
+#define pmd_clear_fixmap()		clear_fixmap(FIX_PMD)
+
 #define pud_page(pud)		pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK))
 
 #else
 
 #define pud_page_paddr(pud)	({ BUILD_BUG(); 0; })
 
+/* Match pmd_offset folding in <asm/generic/pgtable-nopmd.h> */
+#define pmd_set_fixmap(addr)		NULL
+#define pmd_set_fixmap_offset(pudp, addr)	((pmd_t *)pudp)
+#define pmd_clear_fixmap()
+
 #endif	/* CONFIG_PGTABLE_LEVELS > 2 */
 
 #if CONFIG_PGTABLE_LEVELS > 3
@@ -525,12 +539,21 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
 #define pud_offset_phys(dir, addr)	(pgd_page_paddr(*(dir)) + pud_index(addr) * sizeof(pud_t))
 #define pud_offset(dir, addr)		((pud_t *)__va(pud_offset_phys((dir), (addr))))
 
+#define pud_set_fixmap(addr)		((pud_t *)set_fixmap_offset(FIX_PUD, addr))
+#define pud_set_fixmap_offset(pgd, addr)	pud_set_fixmap(pud_offset_phys(pgd, addr))
+#define pud_clear_fixmap()		clear_fixmap(FIX_PUD)
+
 #define pgd_page(pgd)		pfn_to_page(__phys_to_pfn(pgd_val(pgd) & PHYS_MASK))
 
 #else
 
 #define pgd_page_paddr(pgd)	({ BUILD_BUG(); 0;})
 
+/* Match pud_offset folding in <asm/generic/pgtable-nopud.h> */
+#define pud_set_fixmap(addr)		NULL
+#define pud_set_fixmap_offset(pgdp, addr)	((pud_t *)pgdp)
+#define pud_clear_fixmap()
+
 #endif  /* CONFIG_PGTABLE_LEVELS > 3 */
 
 #define pgd_ERROR(pgd)		__pgd_error(__FILE__, __LINE__, pgd_val(pgd))
@@ -545,6 +568,9 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
 /* to find an entry in a kernel page-table-directory */
 #define pgd_offset_k(addr)	pgd_offset(&init_mm, addr)
 
+#define pgd_set_fixmap(addr)	((pgd_t *)set_fixmap_offset(FIX_PGD, addr))
+#define pgd_clear_fixmap()	clear_fixmap(FIX_PGD)
+
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
 	const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY |

From c790554bd459b56042e784162b5017c41120538e Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:45:08 +0000
Subject: [PATCH 734/797] arm64: mm: use fixmap when creating page tables

As a preparatory step to allow us to allocate early page tables from
unmapped memory using memblock_alloc, modify the __create_mapping
callees to map and unmap the tables they modify using fixmap entries.

All but the top-level pgd initialisation is performed via the fixmap.
Subsequent patches will inject the pgd physical address, and migrate to
using the FIX_PGD slot.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit f4710445458c0a1bd1c3c014ada2e7d7dc7b882f)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/mmu.c | 61 ++++++++++++++++++++++++++++++---------------
 1 file changed, 41 insertions(+), 20 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 0b6c8727bcd1..4f5a5fa3f8f4 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -63,19 +63,30 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 }
 EXPORT_SYMBOL(phys_mem_access_prot);
 
-static void __init *early_pgtable_alloc(void)
+static phys_addr_t __init early_pgtable_alloc(void)
 {
 	phys_addr_t phys;
 	void *ptr;
 
 	phys = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
 	BUG_ON(!phys);
-	ptr = __va(phys);
+
+	/*
+	 * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE
+	 * slot will be free, so we can (ab)use the FIX_PTE slot to initialise
+	 * any level of table.
+	 */
+	ptr = pte_set_fixmap(phys);
+
 	memset(ptr, 0, PAGE_SIZE);
 
-	/* Ensure the zeroed page is visible to the page table walker */
-	dsb(ishst);
-	return ptr;
+	/*
+	 * Implicit barriers also ensure the zeroed page is visible to the page
+	 * table walker
+	 */
+	pte_clear_fixmap();
+
+	return phys;
 }
 
 /*
@@ -99,24 +110,28 @@ static void split_pmd(pmd_t *pmd, pte_t *pte)
 static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
 				  unsigned long end, unsigned long pfn,
 				  pgprot_t prot,
-				  void *(*pgtable_alloc)(void))
+				  phys_addr_t (*pgtable_alloc)(void))
 {
 	pte_t *pte;
 
 	if (pmd_none(*pmd) || pmd_sect(*pmd)) {
-		pte = pgtable_alloc();
+		phys_addr_t pte_phys = pgtable_alloc();
+		pte = pte_set_fixmap(pte_phys);
 		if (pmd_sect(*pmd))
 			split_pmd(pmd, pte);
-		__pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE);
+		__pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE);
 		flush_tlb_all();
+		pte_clear_fixmap();
 	}
 	BUG_ON(pmd_bad(*pmd));
 
-	pte = pte_offset_kernel(pmd, addr);
+	pte = pte_set_fixmap_offset(pmd, addr);
 	do {
 		set_pte(pte, pfn_pte(pfn, prot));
 		pfn++;
 	} while (pte++, addr += PAGE_SIZE, addr != end);
+
+	pte_clear_fixmap();
 }
 
 static void split_pud(pud_t *old_pud, pmd_t *pmd)
@@ -134,7 +149,7 @@ static void split_pud(pud_t *old_pud, pmd_t *pmd)
 static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
 				  unsigned long addr, unsigned long end,
 				  phys_addr_t phys, pgprot_t prot,
-				  void *(*pgtable_alloc)(void))
+				  phys_addr_t (*pgtable_alloc)(void))
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -143,7 +158,8 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
 	 * Check for initial section mappings in the pgd/pud and remove them.
 	 */
 	if (pud_none(*pud) || pud_sect(*pud)) {
-		pmd = pgtable_alloc();
+		phys_addr_t pmd_phys = pgtable_alloc();
+		pmd = pmd_set_fixmap(pmd_phys);
 		if (pud_sect(*pud)) {
 			/*
 			 * need to have the 1G of mappings continue to be
@@ -151,12 +167,13 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
 			 */
 			split_pud(pud, pmd);
 		}
-		pud_populate(mm, pud, pmd);
+		__pud_populate(pud, pmd_phys, PUD_TYPE_TABLE);
 		flush_tlb_all();
+		pmd_clear_fixmap();
 	}
 	BUG_ON(pud_bad(*pud));
 
-	pmd = pmd_offset(pud, addr);
+	pmd = pmd_set_fixmap_offset(pud, addr);
 	do {
 		next = pmd_addr_end(addr, end);
 		/* try section mapping first */
@@ -182,6 +199,8 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
 		}
 		phys += next - addr;
 	} while (pmd++, addr = next, addr != end);
+
+	pmd_clear_fixmap();
 }
 
 static inline bool use_1G_block(unsigned long addr, unsigned long next,
@@ -199,18 +218,18 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next,
 static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
 				  unsigned long addr, unsigned long end,
 				  phys_addr_t phys, pgprot_t prot,
-				  void *(*pgtable_alloc)(void))
+				  phys_addr_t (*pgtable_alloc)(void))
 {
 	pud_t *pud;
 	unsigned long next;
 
 	if (pgd_none(*pgd)) {
-		pud = pgtable_alloc();
-		pgd_populate(mm, pgd, pud);
+		phys_addr_t pud_phys = pgtable_alloc();
+		__pgd_populate(pgd, pud_phys, PUD_TYPE_TABLE);
 	}
 	BUG_ON(pgd_bad(*pgd));
 
-	pud = pud_offset(pgd, addr);
+	pud = pud_set_fixmap_offset(pgd, addr);
 	do {
 		next = pud_addr_end(addr, end);
 
@@ -243,6 +262,8 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
 		}
 		phys += next - addr;
 	} while (pud++, addr = next, addr != end);
+
+	pud_clear_fixmap();
 }
 
 /*
@@ -252,7 +273,7 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
 static void  __create_mapping(struct mm_struct *mm, pgd_t *pgd,
 				    phys_addr_t phys, unsigned long virt,
 				    phys_addr_t size, pgprot_t prot,
-				    void *(*pgtable_alloc)(void))
+				    phys_addr_t (*pgtable_alloc)(void))
 {
 	unsigned long addr, length, end, next;
 
@@ -275,14 +296,14 @@ static void  __create_mapping(struct mm_struct *mm, pgd_t *pgd,
 	} while (pgd++, addr = next, addr != end);
 }
 
-static void *late_pgtable_alloc(void)
+static phys_addr_t late_pgtable_alloc(void)
 {
 	void *ptr = (void *)__get_free_page(PGALLOC_GFP);
 	BUG_ON(!ptr);
 
 	/* Ensure the zeroed page is visible to the page table walker */
 	dsb(ishst);
-	return ptr;
+	return __pa(ptr);
 }
 
 static void __init create_mapping(phys_addr_t phys, unsigned long virt,

From 8f64994ff3068b62b867188a12e366ee237da3b5 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:45:09 +0000
Subject: [PATCH 735/797] arm64: mm: allocate pagetables anywhere

Now that create_mapping uses fixmap slots to modify pte, pmd, and pud
entries, we can access page tables anywhere in physical memory,
regardless of the extent of the linear mapping.

Given that, we no longer need to limit memblock allocations during page
table creation, and can leave the limit as its default
MEMBLOCK_ALLOC_ANYWHERE.

We never add memory which will fall outside of the linear map range
given phys_offset and MAX_MEMBLOCK_ADDR are configured appropriately, so
any tables we create will fall in the linear map of the final tables.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit cdef5f6e9e0e5ee397759b664a9f875ff59ccf01)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/mmu.c | 35 -----------------------------------
 1 file changed, 35 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 4f5a5fa3f8f4..d50535a07c6e 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -384,20 +384,6 @@ static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
 static void __init map_mem(void)
 {
 	struct memblock_region *reg;
-	phys_addr_t limit;
-
-	/*
-	 * Temporarily limit the memblock range. We need to do this as
-	 * create_mapping requires puds, pmds and ptes to be allocated from
-	 * memory addressable from the initial direct kernel mapping.
-	 *
-	 * The initial direct kernel mapping, located at swapper_pg_dir, gives
-	 * us PUD_SIZE (with SECTION maps) or PMD_SIZE (without SECTION maps,
-	 * memory starting from PHYS_OFFSET (which must be aligned to 2MB as
-	 * per Documentation/arm64/booting.txt).
-	 */
-	limit = PHYS_OFFSET + SWAPPER_INIT_MAP_SIZE;
-	memblock_set_current_limit(limit);
 
 	/* map all the memory banks */
 	for_each_memblock(memory, reg) {
@@ -407,29 +393,8 @@ static void __init map_mem(void)
 		if (start >= end)
 			break;
 
-		if (ARM64_SWAPPER_USES_SECTION_MAPS) {
-			/*
-			 * For the first memory bank align the start address and
-			 * current memblock limit to prevent create_mapping() from
-			 * allocating pte page tables from unmapped memory. With
-			 * the section maps, if the first block doesn't end on section
-			 * size boundary, create_mapping() will try to allocate a pte
-			 * page, which may be returned from an unmapped area.
-			 * When section maps are not used, the pte page table for the
-			 * current limit is already present in swapper_pg_dir.
-			 */
-			if (start < limit)
-				start = ALIGN(start, SECTION_SIZE);
-			if (end < limit) {
-				limit = end & SECTION_MASK;
-				memblock_set_current_limit(limit);
-			}
-		}
 		__map_memblock(start, end);
 	}
-
-	/* Limit no longer required. */
-	memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
 }
 
 static void __init fixup_executable(void)

From 55ce0af58717f7651d5a393f687f2ea9a109e4f5 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:45:10 +0000
Subject: [PATCH 736/797] arm64: mm: allow passing a pgdir to alloc_init_*

To allow us to initialise pgdirs which are fixmapped, allow explicitly
passing a pgdir rather than an mm. A new __create_pgd_mapping function
is added for this, with existing __create_mapping callers migrated to
this.

The mm argument was previously only used at the top level. Now that it
is redundant at all levels, it is removed. To indicate its new found
similarity to alloc_init_{pud,pmd,pte}, __create_mapping is renamed to
init_pgd.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 11509a306bb6ea595878b2d246d2d56b1783e040)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/mmu.c | 33 +++++++++++++++++++--------------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index d50535a07c6e..570ba3e3d362 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -146,8 +146,7 @@ static void split_pud(pud_t *old_pud, pmd_t *pmd)
 	} while (pmd++, i++, i < PTRS_PER_PMD);
 }
 
-static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
-				  unsigned long addr, unsigned long end,
+static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
 				  phys_addr_t phys, pgprot_t prot,
 				  phys_addr_t (*pgtable_alloc)(void))
 {
@@ -215,8 +214,7 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next,
 	return true;
 }
 
-static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
-				  unsigned long addr, unsigned long end,
+static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
 				  phys_addr_t phys, pgprot_t prot,
 				  phys_addr_t (*pgtable_alloc)(void))
 {
@@ -257,7 +255,7 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
 				}
 			}
 		} else {
-			alloc_init_pmd(mm, pud, addr, next, phys, prot,
+			alloc_init_pmd(pud, addr, next, phys, prot,
 				       pgtable_alloc);
 		}
 		phys += next - addr;
@@ -270,8 +268,7 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
  * Create the page directory entries and any necessary page tables for the
  * mapping specified by 'md'.
  */
-static void  __create_mapping(struct mm_struct *mm, pgd_t *pgd,
-				    phys_addr_t phys, unsigned long virt,
+static void init_pgd(pgd_t *pgd, phys_addr_t phys, unsigned long virt,
 				    phys_addr_t size, pgprot_t prot,
 				    phys_addr_t (*pgtable_alloc)(void))
 {
@@ -291,7 +288,7 @@ static void  __create_mapping(struct mm_struct *mm, pgd_t *pgd,
 	end = addr + length;
 	do {
 		next = pgd_addr_end(addr, end);
-		alloc_init_pud(mm, pgd, addr, next, phys, prot, pgtable_alloc);
+		alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc);
 		phys += next - addr;
 	} while (pgd++, addr = next, addr != end);
 }
@@ -306,6 +303,14 @@ static phys_addr_t late_pgtable_alloc(void)
 	return __pa(ptr);
 }
 
+static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
+				 unsigned long virt, phys_addr_t size,
+				 pgprot_t prot,
+				 phys_addr_t (*alloc)(void))
+{
+	init_pgd(pgd_offset_raw(pgdir, virt), phys, virt, size, prot, alloc);
+}
+
 static void __init create_mapping(phys_addr_t phys, unsigned long virt,
 				  phys_addr_t size, pgprot_t prot)
 {
@@ -314,16 +319,16 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt,
 			&phys, virt);
 		return;
 	}
-	__create_mapping(&init_mm, pgd_offset_k(virt), phys, virt,
-			 size, prot, early_pgtable_alloc);
+	__create_pgd_mapping(init_mm.pgd, phys, virt, size, prot,
+			     early_pgtable_alloc);
 }
 
 void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
 			       unsigned long virt, phys_addr_t size,
 			       pgprot_t prot)
 {
-	__create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot,
-				late_pgtable_alloc);
+	__create_pgd_mapping(mm->pgd, phys, virt, size, prot,
+			     late_pgtable_alloc);
 }
 
 static void create_mapping_late(phys_addr_t phys, unsigned long virt,
@@ -335,8 +340,8 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt,
 		return;
 	}
 
-	return __create_mapping(&init_mm, pgd_offset_k(virt),
-				phys, virt, size, prot, late_pgtable_alloc);
+	__create_pgd_mapping(init_mm.pgd, phys, virt, size, prot,
+			     late_pgtable_alloc);
 }
 
 #ifdef CONFIG_DEBUG_RODATA

From 0060e7a78b1a3b208d178b285fb3912f4fd4d9ee Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:45:11 +0000
Subject: [PATCH 737/797] arm64: ensure _stext and _etext are page-aligned

Currently we have separate ALIGN_DEBUG_RO{,_MIN} directives to align
_etext and __init_begin. While we ensure that __init_begin is
page-aligned, we do not provide the same guarantee for _etext. This is
not problematic currently as the alignment of __init_begin is sufficient
to prevent issues when we modify permissions.

Subsequent patches will assume page alignment of segments of the kernel
we wish to map with different permissions. To ensure this, move _etext
after the ALIGN_DEBUG_RO_MIN for the init section. This renders the
prior ALIGN_DEBUG_RO irrelevant, and hence it is removed. Likewise,
upgrade to ALIGN_DEBUG_RO_MIN(PAGE_SIZE) for _stext.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit fca082bfb543ccaaff864fc0892379ccaa1711cd)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/vmlinux.lds.S | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index e3928f578891..b78a3c772294 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -95,7 +95,7 @@ SECTIONS
 		_text = .;
 		HEAD_TEXT
 	}
-	ALIGN_DEBUG_RO
+	ALIGN_DEBUG_RO_MIN(PAGE_SIZE)
 	.text : {			/* Real text segment		*/
 		_stext = .;		/* Text and read-only data	*/
 			__exception_text_start = .;
@@ -116,10 +116,9 @@ SECTIONS
 	RO_DATA(PAGE_SIZE)
 	EXCEPTION_TABLE(8)
 	NOTES
-	ALIGN_DEBUG_RO
-	_etext = .;			/* End of text and rodata section */
 
 	ALIGN_DEBUG_RO_MIN(PAGE_SIZE)
+	_etext = .;			/* End of text and rodata section */
 	__init_begin = .;
 
 	INIT_TEXT_SECTION(8)

From a8a81d65140e0bdfe27e53ae3529546b5dce6bd4 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:45:12 +0000
Subject: [PATCH 738/797] arm64: mm: create new fine-grained mappings at boot

At boot we may change the granularity of the tables mapping the kernel
(by splitting or making sections). This may happen when we create the
linear mapping (in __map_memblock), or at any point we try to apply
fine-grained permissions to the kernel (e.g. fixup_executable,
mark_rodata_ro, fixup_init).

Changing the active page tables in this manner may result in multiple
entries for the same address being allocated into TLBs, risking problems
such as TLB conflict aborts or issues derived from the amalgamation of
TLB entries. Generally, a break-before-make (BBM) approach is necessary
to avoid conflicts, but we cannot do this for the kernel tables as it
risks unmapping text or data being used to do so.

Instead, we can create a new set of tables from scratch in the safety of
the existing mappings, and subsequently migrate over to these using the
new cpu_replace_ttbr1 helper, which avoids the two sets of tables being
active simultaneously.

To avoid issues when we later modify permissions of the page tables
(e.g. in fixup_init), we must create the page tables at a granularity
such that later modification does not result in splitting of tables.

This patch applies this strategy, creating a new set of fine-grained
page tables from scratch, and safely migrating to them. The existing
fixmap and kasan shadow page tables are reused in the new fine-grained
tables.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 068a17a5805dfbca4bbf03e664ca6b19709cc7a8)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/kasan.h |   3 +
 arch/arm64/mm/kasan_init.c     |  15 ++++
 arch/arm64/mm/mmu.c            | 155 +++++++++++++++++++--------------
 3 files changed, 110 insertions(+), 63 deletions(-)

diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h
index 2774fa384c47..de0d21211c34 100644
--- a/arch/arm64/include/asm/kasan.h
+++ b/arch/arm64/include/asm/kasan.h
@@ -7,6 +7,7 @@
 
 #include <linux/linkage.h>
 #include <asm/memory.h>
+#include <asm/pgtable-types.h>
 
 /*
  * KASAN_SHADOW_START: beginning of the kernel virtual addresses.
@@ -28,10 +29,12 @@
 #define KASAN_SHADOW_OFFSET     (KASAN_SHADOW_END - (1ULL << (64 - 3)))
 
 void kasan_init(void);
+void kasan_copy_shadow(pgd_t *pgdir);
 asmlinkage void kasan_early_init(void);
 
 #else
 static inline void kasan_init(void) { }
+static inline void kasan_copy_shadow(pgd_t *pgdir) { }
 #endif
 
 #endif
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index 263b59020500..cc569a38bc76 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -97,6 +97,21 @@ asmlinkage void __init kasan_early_init(void)
 	kasan_map_early_shadow();
 }
 
+/*
+ * Copy the current shadow region into a new pgdir.
+ */
+void __init kasan_copy_shadow(pgd_t *pgdir)
+{
+	pgd_t *pgd, *pgd_new, *pgd_end;
+
+	pgd = pgd_offset_k(KASAN_SHADOW_START);
+	pgd_end = pgd_offset_k(KASAN_SHADOW_END);
+	pgd_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START);
+	do {
+		set_pgd(pgd_new, *pgd);
+	} while (pgd++, pgd_new++, pgd != pgd_end);
+}
+
 static void __init clear_pgds(unsigned long start,
 			unsigned long end)
 {
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 570ba3e3d362..4874d2fea1c9 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -33,6 +33,7 @@
 #include <asm/barrier.h>
 #include <asm/cputype.h>
 #include <asm/fixmap.h>
+#include <asm/kasan.h>
 #include <asm/kernel-pgtable.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
@@ -344,49 +345,42 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt,
 			     late_pgtable_alloc);
 }
 
-#ifdef CONFIG_DEBUG_RODATA
-static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
+static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end)
 {
-	/*
-	 * Set up the executable regions using the existing section mappings
-	 * for now. This will get more fine grained later once all memory
-	 * is mapped
-	 */
-	unsigned long kernel_x_start = round_down(__pa(_stext), SWAPPER_BLOCK_SIZE);
-	unsigned long kernel_x_end = round_up(__pa(__init_end), SWAPPER_BLOCK_SIZE);
 
-	if (end < kernel_x_start) {
-		create_mapping(start, __phys_to_virt(start),
-			end - start, PAGE_KERNEL);
-	} else if (start >= kernel_x_end) {
-		create_mapping(start, __phys_to_virt(start),
-			end - start, PAGE_KERNEL);
-	} else {
-		if (start < kernel_x_start)
-			create_mapping(start, __phys_to_virt(start),
-				kernel_x_start - start,
-				PAGE_KERNEL);
-		create_mapping(kernel_x_start,
-				__phys_to_virt(kernel_x_start),
-				kernel_x_end - kernel_x_start,
-				PAGE_KERNEL_EXEC);
-		if (kernel_x_end < end)
-			create_mapping(kernel_x_end,
-				__phys_to_virt(kernel_x_end),
-				end - kernel_x_end,
-				PAGE_KERNEL);
+	unsigned long kernel_start = __pa(_stext);
+	unsigned long kernel_end = __pa(_end);
+
+	/*
+	 * The kernel itself is mapped at page granularity. Map all other
+	 * memory, making sure we don't overwrite the existing kernel mappings.
+	 */
+
+	/* No overlap with the kernel. */
+	if (end < kernel_start || start >= kernel_end) {
+		__create_pgd_mapping(pgd, start, __phys_to_virt(start),
+				     end - start, PAGE_KERNEL,
+				     early_pgtable_alloc);
+		return;
 	}
 
+	/*
+	 * This block overlaps the kernel mapping. Map the portion(s) which
+	 * don't overlap.
+	 */
+	if (start < kernel_start)
+		__create_pgd_mapping(pgd, start,
+				     __phys_to_virt(start),
+				     kernel_start - start, PAGE_KERNEL,
+				     early_pgtable_alloc);
+	if (kernel_end < end)
+		__create_pgd_mapping(pgd, kernel_end,
+				     __phys_to_virt(kernel_end),
+				     end - kernel_end, PAGE_KERNEL,
+				     early_pgtable_alloc);
 }
-#else
-static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
-{
-	create_mapping(start, __phys_to_virt(start), end - start,
-			PAGE_KERNEL_EXEC);
-}
-#endif
 
-static void __init map_mem(void)
+static void __init map_mem(pgd_t *pgd)
 {
 	struct memblock_region *reg;
 
@@ -398,33 +392,10 @@ static void __init map_mem(void)
 		if (start >= end)
 			break;
 
-		__map_memblock(start, end);
+		__map_memblock(pgd, start, end);
 	}
 }
 
-static void __init fixup_executable(void)
-{
-#ifdef CONFIG_DEBUG_RODATA
-	/* now that we are actually fully mapped, make the start/end more fine grained */
-	if (!IS_ALIGNED((unsigned long)_stext, SWAPPER_BLOCK_SIZE)) {
-		unsigned long aligned_start = round_down(__pa(_stext),
-							 SWAPPER_BLOCK_SIZE);
-
-		create_mapping(aligned_start, __phys_to_virt(aligned_start),
-				__pa(_stext) - aligned_start,
-				PAGE_KERNEL);
-	}
-
-	if (!IS_ALIGNED((unsigned long)__init_end, SWAPPER_BLOCK_SIZE)) {
-		unsigned long aligned_end = round_up(__pa(__init_end),
-							  SWAPPER_BLOCK_SIZE);
-		create_mapping(__pa(__init_end), (unsigned long)__init_end,
-				aligned_end - __pa(__init_end),
-				PAGE_KERNEL);
-	}
-#endif
-}
-
 #ifdef CONFIG_DEBUG_RODATA
 void mark_rodata_ro(void)
 {
@@ -442,14 +413,72 @@ void fixup_init(void)
 			PAGE_KERNEL);
 }
 
+static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end,
+				    pgprot_t prot)
+{
+	phys_addr_t pa_start = __pa(va_start);
+	unsigned long size = va_end - va_start;
+
+	BUG_ON(!PAGE_ALIGNED(pa_start));
+	BUG_ON(!PAGE_ALIGNED(size));
+
+	__create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot,
+			     early_pgtable_alloc);
+}
+
+/*
+ * Create fine-grained mappings for the kernel.
+ */
+static void __init map_kernel(pgd_t *pgd)
+{
+
+	map_kernel_chunk(pgd, _stext, _etext, PAGE_KERNEL_EXEC);
+	map_kernel_chunk(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC);
+	map_kernel_chunk(pgd, _data, _end, PAGE_KERNEL);
+
+	/*
+	 * The fixmap falls in a separate pgd to the kernel, and doesn't live
+	 * in the carveout for the swapper_pg_dir. We can simply re-use the
+	 * existing dir for the fixmap.
+	 */
+	set_pgd(pgd_offset_raw(pgd, FIXADDR_START), *pgd_offset_k(FIXADDR_START));
+
+	kasan_copy_shadow(pgd);
+}
+
 /*
  * paging_init() sets up the page tables, initialises the zone memory
  * maps and sets up the zero page.
  */
 void __init paging_init(void)
 {
-	map_mem();
-	fixup_executable();
+	phys_addr_t pgd_phys = early_pgtable_alloc();
+	pgd_t *pgd = pgd_set_fixmap(pgd_phys);
+
+	map_kernel(pgd);
+	map_mem(pgd);
+
+	/*
+	 * We want to reuse the original swapper_pg_dir so we don't have to
+	 * communicate the new address to non-coherent secondaries in
+	 * secondary_entry, and so cpu_switch_mm can generate the address with
+	 * adrp+add rather than a load from some global variable.
+	 *
+	 * To do this we need to go via a temporary pgd.
+	 */
+	cpu_replace_ttbr1(__va(pgd_phys));
+	memcpy(swapper_pg_dir, pgd, PAGE_SIZE);
+	cpu_replace_ttbr1(swapper_pg_dir);
+
+	pgd_clear_fixmap();
+	memblock_free(pgd_phys, PAGE_SIZE);
+
+	/*
+	 * We only reuse the PGD from the swapper_pg_dir, not the pud + pmd
+	 * allocated with it.
+	 */
+	memblock_free(__pa(swapper_pg_dir) + PAGE_SIZE,
+		      SWAPPER_DIR_SIZE - PAGE_SIZE);
 
 	bootmem_init();
 }

From f00cf2ba83ca2b1ade50b27dd60d6f4294ddeef7 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Tue, 26 Jan 2016 11:10:38 +0000
Subject: [PATCH 739/797] arm64: kernel: implement ACPI parking protocol

The SBBR and ACPI specifications allow ACPI based systems that do not
implement PSCI (eg systems with no EL3) to boot through the ACPI parking
protocol specification[1].

This patch implements the ACPI parking protocol CPU operations, and adds
code that eases parsing the parking protocol data structures to the
ARM64 SMP initializion carried out at the same time as cpus enumeration.

To wake-up the CPUs from the parked state, this patch implements a
wakeup IPI for ARM64 (ie arch_send_wakeup_ipi_mask()) that mirrors the
ARM one, so that a specific IPI is sent for wake-up purpose in order
to distinguish it from other IPI sources.

Given the current ACPI MADT parsing API, the patch implements a glue
layer that helps passing MADT GICC data structure from SMP initialization
code to the parking protocol implementation somewhat overriding the CPU
operations interfaces. This to avoid creating a completely trasparent
DT/ACPI CPU operations layer that would require creating opaque
structure handling for CPUs data (DT represents CPU through DT nodes, ACPI
through static MADT table entries), which seems overkill given that ACPI
on ARM64 mandates only two booting protocols (PSCI and parking protocol),
so there is no need for further protocol additions.

Based on the original work by Mark Salter <msalter@redhat.com>

[1] https://acpica.org/sites/acpica/files/MP%20Startup%20for%20ARM%20platforms.docx

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Tested-by: Loc Ho <lho@apm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Hanjun Guo <hanjun.guo@linaro.org>
Cc: Sudeep Holla <sudeep.holla@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Al Stone <ahs3@redhat.com>
[catalin.marinas@arm.com: Added WARN_ONCE(!acpi_parking_protocol_valid() on the IPI]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

(cherry picked from commit 5e89c55e4ed81d7abb1ce8828db35fa389dc0e90)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/Kconfig                        |   9 ++
 arch/arm64/include/asm/acpi.h             |  19 ++-
 arch/arm64/include/asm/hardirq.h          |   2 +-
 arch/arm64/include/asm/smp.h              |   9 ++
 arch/arm64/kernel/Makefile                |   1 +
 arch/arm64/kernel/acpi_parking_protocol.c | 153 ++++++++++++++++++++++
 arch/arm64/kernel/cpu_ops.c               |  27 +++-
 arch/arm64/kernel/smp.c                   |  28 ++++
 8 files changed, 242 insertions(+), 6 deletions(-)
 create mode 100644 arch/arm64/kernel/acpi_parking_protocol.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index ffa3c549a4ba..4a1b665d90dc 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -706,6 +706,15 @@ endmenu
 
 menu "Boot options"
 
+config ARM64_ACPI_PARKING_PROTOCOL
+	bool "Enable support for the ARM64 ACPI parking protocol"
+	depends on ACPI
+	help
+	  Enable support for the ARM64 ACPI parking protocol. If disabled
+	  the kernel will not allow booting through the ARM64 ACPI parking
+	  protocol even if the corresponding data is present in the ACPI
+	  MADT table.
+
 config CMDLINE
 	string "Default kernel command string"
 	default ""
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index caafd63b8092..aee323b13802 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -87,9 +87,26 @@ void __init acpi_init_cpus(void);
 static inline void acpi_init_cpus(void) { }
 #endif /* CONFIG_ACPI */
 
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
+bool acpi_parking_protocol_valid(int cpu);
+void __init
+acpi_set_mailbox_entry(int cpu, struct acpi_madt_generic_interrupt *processor);
+#else
+static inline bool acpi_parking_protocol_valid(int cpu) { return false; }
+static inline void
+acpi_set_mailbox_entry(int cpu, struct acpi_madt_generic_interrupt *processor)
+{}
+#endif
+
 static inline const char *acpi_get_enable_method(int cpu)
 {
-	return acpi_psci_present() ? "psci" : NULL;
+	if (acpi_psci_present())
+		return "psci";
+
+	if (acpi_parking_protocol_valid(cpu))
+		return "parking-protocol";
+
+	return NULL;
 }
 
 #ifdef	CONFIG_ACPI_APEI
diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h
index a57601f9d17c..8740297dac77 100644
--- a/arch/arm64/include/asm/hardirq.h
+++ b/arch/arm64/include/asm/hardirq.h
@@ -20,7 +20,7 @@
 #include <linux/threads.h>
 #include <asm/irq.h>
 
-#define NR_IPI	5
+#define NR_IPI	6
 
 typedef struct {
 	unsigned int __softirq_pending;
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index d9c3d6a6100a..2013a4dc5124 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -64,6 +64,15 @@ extern void secondary_entry(void);
 extern void arch_send_call_function_single_ipi(int cpu);
 extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
+extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask);
+#else
+static inline void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
+{
+	BUILD_BUG();
+}
+#endif
+
 extern int __cpu_disable(void);
 
 extern void __cpu_die(unsigned int cpu);
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 474691f8b13a..c4e2f70c0aa0 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -41,6 +41,7 @@ arm64-obj-$(CONFIG_EFI)			+= efi.o efi-entry.stub.o
 arm64-obj-$(CONFIG_PCI)			+= pci.o
 arm64-obj-$(CONFIG_ARMV8_DEPRECATED)	+= armv8_deprecated.o
 arm64-obj-$(CONFIG_ACPI)		+= acpi.o
+arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL)	+= acpi_parking_protocol.o
 
 obj-y					+= $(arm64-obj-y) vdso/
 obj-m					+= $(arm64-obj-m)
diff --git a/arch/arm64/kernel/acpi_parking_protocol.c b/arch/arm64/kernel/acpi_parking_protocol.c
new file mode 100644
index 000000000000..4b1e5a7a98da
--- /dev/null
+++ b/arch/arm64/kernel/acpi_parking_protocol.c
@@ -0,0 +1,153 @@
+/*
+ * ARM64 ACPI Parking Protocol implementation
+ *
+ * Authors: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+ *	    Mark Salter <msalter@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/acpi.h>
+#include <linux/types.h>
+
+#include <asm/cpu_ops.h>
+
+struct cpu_mailbox_entry {
+	phys_addr_t mailbox_addr;
+	u8 version;
+	u8 gic_cpu_id;
+};
+
+static struct cpu_mailbox_entry cpu_mailbox_entries[NR_CPUS];
+
+void __init acpi_set_mailbox_entry(int cpu,
+				   struct acpi_madt_generic_interrupt *p)
+{
+	struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu];
+
+	cpu_entry->mailbox_addr = p->parked_address;
+	cpu_entry->version = p->parking_version;
+	cpu_entry->gic_cpu_id = p->cpu_interface_number;
+}
+
+bool acpi_parking_protocol_valid(int cpu)
+{
+	struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu];
+
+	return cpu_entry->mailbox_addr && cpu_entry->version;
+}
+
+static int acpi_parking_protocol_cpu_init(unsigned int cpu)
+{
+	pr_debug("%s: ACPI parked addr=%llx\n", __func__,
+		  cpu_mailbox_entries[cpu].mailbox_addr);
+
+	return 0;
+}
+
+static int acpi_parking_protocol_cpu_prepare(unsigned int cpu)
+{
+	return 0;
+}
+
+struct parking_protocol_mailbox {
+	__le32 cpu_id;
+	__le32 reserved;
+	__le64 entry_point;
+};
+
+static int acpi_parking_protocol_cpu_boot(unsigned int cpu)
+{
+	struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu];
+	struct parking_protocol_mailbox __iomem *mailbox;
+	__le32 cpu_id;
+
+	/*
+	 * Map mailbox memory with attribute device nGnRE (ie ioremap -
+	 * this deviates from the parking protocol specifications since
+	 * the mailboxes are required to be mapped nGnRnE; the attribute
+	 * discrepancy is harmless insofar as the protocol specification
+	 * is concerned).
+	 * If the mailbox is mistakenly allocated in the linear mapping
+	 * by FW ioremap will fail since the mapping will be prevented
+	 * by the kernel (it clashes with the linear mapping attributes
+	 * specifications).
+	 */
+	mailbox = ioremap(cpu_entry->mailbox_addr, sizeof(*mailbox));
+	if (!mailbox)
+		return -EIO;
+
+	cpu_id = readl_relaxed(&mailbox->cpu_id);
+	/*
+	 * Check if firmware has set-up the mailbox entry properly
+	 * before kickstarting the respective cpu.
+	 */
+	if (cpu_id != ~0U) {
+		iounmap(mailbox);
+		return -ENXIO;
+	}
+
+	/*
+	 * We write the entry point and cpu id as LE regardless of the
+	 * native endianness of the kernel. Therefore, any boot-loaders
+	 * that read this address need to convert this address to the
+	 * Boot-Loader's endianness before jumping.
+	 */
+	writeq_relaxed(__pa(secondary_entry), &mailbox->entry_point);
+	writel_relaxed(cpu_entry->gic_cpu_id, &mailbox->cpu_id);
+
+	arch_send_wakeup_ipi_mask(cpumask_of(cpu));
+
+	iounmap(mailbox);
+
+	return 0;
+}
+
+static void acpi_parking_protocol_cpu_postboot(void)
+{
+	int cpu = smp_processor_id();
+	struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu];
+	struct parking_protocol_mailbox __iomem *mailbox;
+	__le64 entry_point;
+
+	/*
+	 * Map mailbox memory with attribute device nGnRE (ie ioremap -
+	 * this deviates from the parking protocol specifications since
+	 * the mailboxes are required to be mapped nGnRnE; the attribute
+	 * discrepancy is harmless insofar as the protocol specification
+	 * is concerned).
+	 * If the mailbox is mistakenly allocated in the linear mapping
+	 * by FW ioremap will fail since the mapping will be prevented
+	 * by the kernel (it clashes with the linear mapping attributes
+	 * specifications).
+	 */
+	mailbox = ioremap(cpu_entry->mailbox_addr, sizeof(*mailbox));
+	if (!mailbox)
+		return;
+
+	entry_point = readl_relaxed(&mailbox->entry_point);
+	/*
+	 * Check if firmware has cleared the entry_point as expected
+	 * by the protocol specification.
+	 */
+	WARN_ON(entry_point);
+
+	iounmap(mailbox);
+}
+
+const struct cpu_operations acpi_parking_protocol_ops = {
+	.name		= "parking-protocol",
+	.cpu_init	= acpi_parking_protocol_cpu_init,
+	.cpu_prepare	= acpi_parking_protocol_cpu_prepare,
+	.cpu_boot	= acpi_parking_protocol_cpu_boot,
+	.cpu_postboot	= acpi_parking_protocol_cpu_postboot
+};
diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c
index b6bd7d447768..c7cfb8fe06f9 100644
--- a/arch/arm64/kernel/cpu_ops.c
+++ b/arch/arm64/kernel/cpu_ops.c
@@ -25,19 +25,30 @@
 #include <asm/smp_plat.h>
 
 extern const struct cpu_operations smp_spin_table_ops;
+extern const struct cpu_operations acpi_parking_protocol_ops;
 extern const struct cpu_operations cpu_psci_ops;
 
 const struct cpu_operations *cpu_ops[NR_CPUS];
 
-static const struct cpu_operations *supported_cpu_ops[] __initconst = {
+static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = {
 	&smp_spin_table_ops,
 	&cpu_psci_ops,
 	NULL,
 };
 
+static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = {
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
+	&acpi_parking_protocol_ops,
+#endif
+	&cpu_psci_ops,
+	NULL,
+};
+
 static const struct cpu_operations * __init cpu_get_ops(const char *name)
 {
-	const struct cpu_operations **ops = supported_cpu_ops;
+	const struct cpu_operations **ops;
+
+	ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops;
 
 	while (*ops) {
 		if (!strcmp(name, (*ops)->name))
@@ -75,8 +86,16 @@ static const char *__init cpu_read_enable_method(int cpu)
 		}
 	} else {
 		enable_method = acpi_get_enable_method(cpu);
-		if (!enable_method)
-			pr_err("Unsupported ACPI enable-method\n");
+		if (!enable_method) {
+			/*
+			 * In ACPI systems the boot CPU does not require
+			 * checking the enable method since for some
+			 * boot protocol (ie parking protocol) it need not
+			 * be initialized. Don't warn spuriously.
+			 */
+			if (cpu != 0)
+				pr_err("Unsupported ACPI enable-method\n");
+		}
 	}
 
 	return enable_method;
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 68e7f79630d4..24cb4f800033 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -70,6 +70,7 @@ enum ipi_msg_type {
 	IPI_CPU_STOP,
 	IPI_TIMER,
 	IPI_IRQ_WORK,
+	IPI_WAKEUP
 };
 
 /*
@@ -443,6 +444,17 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
 	/* map the logical cpu id to cpu MPIDR */
 	cpu_logical_map(cpu_count) = hwid;
 
+	/*
+	 * Set-up the ACPI parking protocol cpu entries
+	 * while initializing the cpu_logical_map to
+	 * avoid parsing MADT entries multiple times for
+	 * nothing (ie a valid cpu_logical_map entry should
+	 * contain a valid parking protocol data set to
+	 * initialize the cpu if the parking protocol is
+	 * the only available enable method).
+	 */
+	acpi_set_mailbox_entry(cpu_count, processor);
+
 	cpu_count++;
 }
 
@@ -625,6 +637,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = {
 	S(IPI_CPU_STOP, "CPU stop interrupts"),
 	S(IPI_TIMER, "Timer broadcast interrupts"),
 	S(IPI_IRQ_WORK, "IRQ work interrupts"),
+	S(IPI_WAKEUP, "CPU wake-up interrupts"),
 };
 
 static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
@@ -668,6 +681,13 @@ void arch_send_call_function_single_ipi(int cpu)
 	smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC);
 }
 
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
+void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
+{
+	smp_cross_call(mask, IPI_WAKEUP);
+}
+#endif
+
 #ifdef CONFIG_IRQ_WORK
 void arch_irq_work_raise(void)
 {
@@ -745,6 +765,14 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
 		break;
 #endif
 
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
+	case IPI_WAKEUP:
+		WARN_ONCE(!acpi_parking_protocol_valid(cpu),
+			  "CPU%u: Wake-up IPI outside the ACPI parking protocol\n",
+			  cpu);
+		break;
+#endif
+
 	default:
 		pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr);
 		break;

From a0e40450cf255994501d3f84081f95b1fb41623d Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 27 Jan 2016 10:50:19 +0100
Subject: [PATCH 740/797] arm64: allow vmalloc regions to be set with
 set_memory_*

The range of set_memory_* is currently restricted to the module address
range because of difficulties in breaking down larger block sizes.
vmalloc maps PAGE_SIZE pages so it is safe to use as well. Update the
function ranges and add a comment explaining why the range is restricted
the way it is.

Suggested-by: Laura Abbott <labbott@fedoraproject.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 95f5c80050ad723163aa80dc8bffd48ef4afc6d5)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/pageattr.c | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index cf6240741134..0795c3a36d8f 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -14,6 +14,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/vmalloc.h>
 
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
@@ -44,6 +45,7 @@ static int change_memory_common(unsigned long addr, int numpages,
 	unsigned long end = start + size;
 	int ret;
 	struct page_change_data data;
+	struct vm_struct *area;
 
 	if (!PAGE_ALIGNED(addr)) {
 		start &= PAGE_MASK;
@@ -51,10 +53,23 @@ static int change_memory_common(unsigned long addr, int numpages,
 		WARN_ON_ONCE(1);
 	}
 
-	if (start < MODULES_VADDR || start >= MODULES_END)
-		return -EINVAL;
-
-	if (end < MODULES_VADDR || end >= MODULES_END)
+	/*
+	 * Kernel VA mappings are always live, and splitting live section
+	 * mappings into page mappings may cause TLB conflicts. This means
+	 * we have to ensure that changing the permission bits of the range
+	 * we are operating on does not result in such splitting.
+	 *
+	 * Let's restrict ourselves to mappings created by vmalloc (or vmap).
+	 * Those are guaranteed to consist entirely of page mappings, and
+	 * splitting is never needed.
+	 *
+	 * So check whether the [addr, addr + size) interval is entirely
+	 * covered by precisely one VM area that has the VM_ALLOC flag set.
+	 */
+	area = find_vm_area((void *)addr);
+	if (!area ||
+	    end > (unsigned long)area->addr + area->size ||
+	    !(area->flags & VM_ALLOC))
 		return -EINVAL;
 
 	if (!numpages)

From 41cb2829d020e4fdaeb5eb9286153f4c7dc8e7dd Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 2 Feb 2016 12:46:23 +0000
Subject: [PATCH 741/797] arm64: prefetch: don't provide spin_lock_prefetch
 with LSE

The LSE atomics rely on us not dirtying data at L1 if we can avoid it,
otherwise many of the potential scalability benefits are lost.

This patch replaces spin_lock_prefetch with a nop when the LSE atomics
are in use, so that users don't shoot themselves in the foot by causing
needless coherence traffic at L1.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Tested-by: Andrew Pinski <apinski@cavium.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit cd5e10bdf3795d22f10787bb1991c43798c885d5)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/processor.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 4acb7ca94fcd..31b76fce4477 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -29,6 +29,7 @@
 
 #include <linux/string.h>
 
+#include <asm/alternative.h>
 #include <asm/fpsimd.h>
 #include <asm/hw_breakpoint.h>
 #include <asm/pgtable-hwdef.h>
@@ -177,9 +178,11 @@ static inline void prefetchw(const void *ptr)
 }
 
 #define ARCH_HAS_SPINLOCK_PREFETCH
-static inline void spin_lock_prefetch(const void *x)
+static inline void spin_lock_prefetch(const void *ptr)
 {
-	prefetchw(x);
+	asm volatile(ARM64_LSE_ATOMIC_INSN(
+		     "prfm pstl1strm, %a0",
+		     "nop") : : "p" (ptr));
 }
 
 #define HAVE_ARCH_PICK_MMAP_LAYOUT

From 742e490adaa444e9657528ef38dde35f2916c793 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 2 Feb 2016 12:46:24 +0000
Subject: [PATCH 742/797] arm64: prefetch: add alternative pattern for CPUs
 without a prefetcher

Most CPUs have a hardware prefetcher which generally performs better
without explicit prefetch instructions issued by software, however
some CPUs (e.g. Cavium ThunderX) rely solely on explicit prefetch
instructions.

This patch adds an alternative pattern (ARM64_HAS_NO_HW_PREFETCH) to
allow our library code to make use of explicit prefetch instructions
during things like copy routines only when the CPU does not have the
capability to perform the prefetching itself.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Tested-by: Andrew Pinski <apinski@cavium.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit d5370f754875460662abe8561388e019d90dd0c4)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/cpufeature.h |  3 ++-
 arch/arm64/include/asm/cputype.h    | 17 ++++++++++++++++-
 arch/arm64/kernel/cpu_errata.c      | 18 +++---------------
 arch/arm64/kernel/cpufeature.c      | 17 +++++++++++++++++
 4 files changed, 38 insertions(+), 17 deletions(-)

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 8f271b83f910..8d56bd8550dc 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -30,8 +30,9 @@
 #define ARM64_HAS_LSE_ATOMICS			5
 #define ARM64_WORKAROUND_CAVIUM_23154		6
 #define ARM64_WORKAROUND_834220			7
+#define ARM64_HAS_NO_HW_PREFETCH		8
 
-#define ARM64_NCAPS				8
+#define ARM64_NCAPS				9
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 1a5949364ed0..7540284a17fe 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -57,11 +57,22 @@
 #define MIDR_IMPLEMENTOR(midr)	\
 	(((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT)
 
-#define MIDR_CPU_PART(imp, partnum) \
+#define MIDR_CPU_MODEL(imp, partnum) \
 	(((imp)			<< MIDR_IMPLEMENTOR_SHIFT) | \
 	(0xf			<< MIDR_ARCHITECTURE_SHIFT) | \
 	((partnum)		<< MIDR_PARTNUM_SHIFT))
 
+#define MIDR_CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \
+			     MIDR_ARCHITECTURE_MASK)
+
+#define MIDR_IS_CPU_MODEL_RANGE(midr, model, rv_min, rv_max)		\
+({									\
+	u32 _model = (midr) & MIDR_CPU_MODEL_MASK;			\
+	u32 rv = (midr) & (MIDR_REVISION_MASK | MIDR_VARIANT_MASK);	\
+									\
+	_model == (model) && rv >= (rv_min) && rv <= (rv_max);		\
+ })
+
 #define ARM_CPU_IMP_ARM			0x41
 #define ARM_CPU_IMP_APM			0x50
 #define ARM_CPU_IMP_CAVIUM		0x43
@@ -75,6 +86,10 @@
 
 #define CAVIUM_CPU_PART_THUNDERX	0x0A1
 
+#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
+#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
+#define MIDR_THUNDERX	MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
+
 #ifndef __ASSEMBLY__
 
 /*
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index feb6b4efa641..e6bc988e8dbf 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -21,24 +21,12 @@
 #include <asm/cputype.h>
 #include <asm/cpufeature.h>
 
-#define MIDR_CORTEX_A53 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
-#define MIDR_CORTEX_A57 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
-#define MIDR_THUNDERX	MIDR_CPU_PART(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
-
-#define CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \
-			MIDR_ARCHITECTURE_MASK)
-
 static bool __maybe_unused
 is_affected_midr_range(const struct arm64_cpu_capabilities *entry)
 {
-	u32 midr = read_cpuid_id();
-
-	if ((midr & CPU_MODEL_MASK) != entry->midr_model)
-		return false;
-
-	midr &= MIDR_REVISION_MASK | MIDR_VARIANT_MASK;
-
-	return (midr >= entry->midr_range_min && midr <= entry->midr_range_max);
+	return MIDR_IS_CPU_MODEL_RANGE(read_cpuid_id(), entry->midr_model,
+				       entry->midr_range_min,
+				       entry->midr_range_max);
 }
 
 #define MIDR_RANGE(model, min, max) \
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 5c90aa490a2b..3615d7d7c9af 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -621,6 +621,18 @@ static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry)
 	return has_sre;
 }
 
+static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry)
+{
+	u32 midr = read_cpuid_id();
+	u32 rv_min, rv_max;
+
+	/* Cavium ThunderX pass 1.x and 2.x */
+	rv_min = 0;
+	rv_max = (1 << MIDR_VARIANT_SHIFT) | MIDR_REVISION_MASK;
+
+	return MIDR_IS_CPU_MODEL_RANGE(midr, MIDR_THUNDERX, rv_min, rv_max);
+}
+
 static const struct arm64_cpu_capabilities arm64_features[] = {
 	{
 		.desc = "GIC system register CPU interface",
@@ -651,6 +663,11 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.min_field_value = 2,
 	},
 #endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */
+	{
+		.desc = "Software prefetching using PRFM",
+		.capability = ARM64_HAS_NO_HW_PREFETCH,
+		.matches = has_no_hw_prefetch,
+	},
 	{},
 };
 

From 93c384820cf3c1db51073e746980866d2bce8af9 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 2 Feb 2016 12:46:25 +0000
Subject: [PATCH 743/797] arm64: lib: improve copy_page to deal with 128 bytes
 at a time

We want to avoid lots of different copy_page implementations, settling
for something that is "good enough" everywhere and hopefully easy to
understand and maintain whilst we're at it.

This patch reworks our copy_page implementation based on discussions
with Cavium on the list and benchmarking on Cortex-A processors so that:

  - The loop is unrolled to copy 128 bytes per iteration

  - The reads are offset so that we read from the next 128-byte block
    in the same iteration that we store the previous block

  - Explicit prefetch instructions are removed for now, since they hurt
    performance on CPUs with hardware prefetching

  - The loop exit condition is calculated at the start of the loop

Signed-off-by: Will Deacon <will.deacon@arm.com>
Tested-by: Andrew Pinski <apinski@cavium.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 223e23e8aa26b0bb62c597637e77295e14f6a62c)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/lib/copy_page.S | 46 +++++++++++++++++++++++++++++++-------
 1 file changed, 38 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S
index 512b9a7b980e..2534533ceb1d 100644
--- a/arch/arm64/lib/copy_page.S
+++ b/arch/arm64/lib/copy_page.S
@@ -27,20 +27,50 @@
  *	x1 - src
  */
 ENTRY(copy_page)
-	/* Assume cache line size is 64 bytes. */
-	prfm	pldl1strm, [x1, #64]
-1:	ldp	x2, x3, [x1]
+	ldp	x2, x3, [x1]
 	ldp	x4, x5, [x1, #16]
 	ldp	x6, x7, [x1, #32]
 	ldp	x8, x9, [x1, #48]
-	add	x1, x1, #64
-	prfm	pldl1strm, [x1, #64]
+	ldp	x10, x11, [x1, #64]
+	ldp	x12, x13, [x1, #80]
+	ldp	x14, x15, [x1, #96]
+	ldp	x16, x17, [x1, #112]
+
+	mov	x18, #(PAGE_SIZE - 128)
+	add	x1, x1, #128
+1:
+	subs	x18, x18, #128
+
+	stnp	x2, x3, [x0]
+	ldp	x2, x3, [x1]
+	stnp	x4, x5, [x0, #16]
+	ldp	x4, x5, [x1, #16]
+	stnp	x6, x7, [x0, #32]
+	ldp	x6, x7, [x1, #32]
+	stnp	x8, x9, [x0, #48]
+	ldp	x8, x9, [x1, #48]
+	stnp	x10, x11, [x0, #64]
+	ldp	x10, x11, [x1, #64]
+	stnp	x12, x13, [x0, #80]
+	ldp	x12, x13, [x1, #80]
+	stnp	x14, x15, [x0, #96]
+	ldp	x14, x15, [x1, #96]
+	stnp	x16, x17, [x0, #112]
+	ldp	x16, x17, [x1, #112]
+
+	add	x0, x0, #128
+	add	x1, x1, #128
+
+	b.gt	1b
+
 	stnp	x2, x3, [x0]
 	stnp	x4, x5, [x0, #16]
 	stnp	x6, x7, [x0, #32]
 	stnp	x8, x9, [x0, #48]
-	add	x0, x0, #64
-	tst	x1, #(PAGE_SIZE - 1)
-	b.ne	1b
+	stnp	x10, x11, [x0, #64]
+	stnp	x12, x13, [x0, #80]
+	stnp	x14, x15, [x0, #96]
+	stnp	x16, x17, [x0, #112]
+
 	ret
 ENDPROC(copy_page)

From e46018fe4fd741b5f58b6c3cf8b94ed34603c325 Mon Sep 17 00:00:00 2001
From: Andrew Pinski <apinski@cavium.com>
Date: Tue, 2 Feb 2016 12:46:26 +0000
Subject: [PATCH 744/797] arm64: lib: patch in prfm for copy_page if requested

On ThunderX T88 pass 1 and pass 2, there is no hardware prefetching so
we need to patch in explicit software prefetching instructions

Prefetching improves this code by 60% over the original code and 2x
over the code without prefetching for the affected hardware using the
benchmark code at https://github.com/apinski-cavium/copy_page_benchmark

Signed-off-by: Andrew Pinski <apinski@cavium.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Tested-by: Andrew Pinski <apinski@cavium.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 60e0a09db24adc8809696307e5d97cc4ba7cb3e0)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/lib/copy_page.S | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S
index 2534533ceb1d..4c1e700840b6 100644
--- a/arch/arm64/lib/copy_page.S
+++ b/arch/arm64/lib/copy_page.S
@@ -18,6 +18,8 @@
 #include <linux/const.h>
 #include <asm/assembler.h>
 #include <asm/page.h>
+#include <asm/cpufeature.h>
+#include <asm/alternative.h>
 
 /*
  * Copy a page from src to dest (both are page aligned)
@@ -27,6 +29,15 @@
  *	x1 - src
  */
 ENTRY(copy_page)
+alternative_if_not ARM64_HAS_NO_HW_PREFETCH
+	nop
+	nop
+alternative_else
+	# Prefetch two cache lines ahead.
+	prfm    pldl1strm, [x1, #128]
+	prfm    pldl1strm, [x1, #256]
+alternative_endif
+
 	ldp	x2, x3, [x1]
 	ldp	x4, x5, [x1, #16]
 	ldp	x6, x7, [x1, #32]
@@ -41,6 +52,12 @@ ENTRY(copy_page)
 1:
 	subs	x18, x18, #128
 
+alternative_if_not ARM64_HAS_NO_HW_PREFETCH
+	nop
+alternative_else
+	prfm    pldl1strm, [x1, #384]
+alternative_endif
+
 	stnp	x2, x3, [x0]
 	ldp	x2, x3, [x1]
 	stnp	x4, x5, [x0, #16]

From a97b93b11bd9f76b0cba55b8ff04a7489b087caf Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 10 Feb 2016 10:07:30 +0000
Subject: [PATCH 745/797] arm64: prefetch: add missing #include for
 spin_lock_prefetch

As of 52e662326e1e ("arm64: prefetch: don't provide spin_lock_prefetch
with LSE"), spin_lock_prefetch is patched at runtime when the LSE atomics
are in use. This relies on the ARM64_LSE_ATOMIC_INSN macro to drive
the alternatives framework, but that macro is only available via
asm/lse.h, which isn't explicitly included in processor.h. Consequently,
drivers can run into build failures such as:

   In file included from include/linux/prefetch.h:14:0,
                    from drivers/net/ethernet/intel/i40e/i40e_txrx.c:27:
   arch/arm64/include/asm/processor.h: In function 'spin_lock_prefetch':
   arch/arm64/include/asm/processor.h:183:15: error: expected string literal before 'ARM64_LSE_ATOMIC_INSN'
     asm volatile(ARM64_LSE_ATOMIC_INSN(

This patch add the missing include and gets things building again.

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit afb83cc3f0e4f86ea0e1cc3db7a90f58f1abd4d5)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/processor.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 31b76fce4477..5bb1d763d17a 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -32,6 +32,7 @@
 #include <asm/alternative.h>
 #include <asm/fpsimd.h>
 #include <asm/hw_breakpoint.h>
+#include <asm/lse.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/ptrace.h>
 #include <asm/types.h>

From 65e670213029f7df5dd20407eecbe691aa078930 Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@fedoraproject.org>
Date: Fri, 5 Feb 2016 16:24:46 -0800
Subject: [PATCH 746/797] arm64: Drop alloc function from create_mapping

create_mapping is only used in fixmap_remap_fdt. All the create_mapping
calls need to happen on existing translation table pages without
additional allocations. Rather than have an alloc function be called
and fail, just set it to NULL and catch its use. Also change
the name to create_mapping_noalloc to better capture what exactly is
going on.

Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Laura Abbott <labbott@fedoraproject.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 132233a759580f5ce9b1bfaac9073e47d03c460d)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/mmu.c | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 4874d2fea1c9..3096240e6eb8 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -116,7 +116,9 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
 	pte_t *pte;
 
 	if (pmd_none(*pmd) || pmd_sect(*pmd)) {
-		phys_addr_t pte_phys = pgtable_alloc();
+		phys_addr_t pte_phys;
+		BUG_ON(!pgtable_alloc);
+		pte_phys = pgtable_alloc();
 		pte = pte_set_fixmap(pte_phys);
 		if (pmd_sect(*pmd))
 			split_pmd(pmd, pte);
@@ -158,7 +160,9 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
 	 * Check for initial section mappings in the pgd/pud and remove them.
 	 */
 	if (pud_none(*pud) || pud_sect(*pud)) {
-		phys_addr_t pmd_phys = pgtable_alloc();
+		phys_addr_t pmd_phys;
+		BUG_ON(!pgtable_alloc);
+		pmd_phys = pgtable_alloc();
 		pmd = pmd_set_fixmap(pmd_phys);
 		if (pud_sect(*pud)) {
 			/*
@@ -223,7 +227,9 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
 	unsigned long next;
 
 	if (pgd_none(*pgd)) {
-		phys_addr_t pud_phys = pgtable_alloc();
+		phys_addr_t pud_phys;
+		BUG_ON(!pgtable_alloc);
+		pud_phys = pgtable_alloc();
 		__pgd_populate(pgd, pud_phys, PUD_TYPE_TABLE);
 	}
 	BUG_ON(pgd_bad(*pgd));
@@ -312,7 +318,12 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
 	init_pgd(pgd_offset_raw(pgdir, virt), phys, virt, size, prot, alloc);
 }
 
-static void __init create_mapping(phys_addr_t phys, unsigned long virt,
+/*
+ * This function can only be used to modify existing table entries,
+ * without allocating new levels of table. Note that this permits the
+ * creation of new section or page entries.
+ */
+static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
 				  phys_addr_t size, pgprot_t prot)
 {
 	if (virt < VMALLOC_START) {
@@ -321,7 +332,7 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt,
 		return;
 	}
 	__create_pgd_mapping(init_mm.pgd, phys, virt, size, prot,
-			     early_pgtable_alloc);
+			     NULL);
 }
 
 void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
@@ -678,7 +689,7 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
 	/*
 	 * Make sure that the FDT region can be mapped without the need to
 	 * allocate additional translation table pages, so that it is safe
-	 * to call create_mapping() this early.
+	 * to call create_mapping_noalloc() this early.
 	 *
 	 * On 64k pages, the FDT will be mapped using PTEs, so we need to
 	 * be in the same PMD as the rest of the fixmap.
@@ -694,8 +705,8 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
 	dt_virt = (void *)dt_virt_base + offset;
 
 	/* map the first chunk so we can read the size from the header */
-	create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
-		       SWAPPER_BLOCK_SIZE, prot);
+	create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE),
+			dt_virt_base, SWAPPER_BLOCK_SIZE, prot);
 
 	if (fdt_check_header(dt_virt) != 0)
 		return NULL;
@@ -705,7 +716,7 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
 		return NULL;
 
 	if (offset + size > SWAPPER_BLOCK_SIZE)
-		create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
+		create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
 			       round_up(offset + size, SWAPPER_BLOCK_SIZE), prot);
 
 	memblock_reserve(dt_phys, size);

From e4d0298cdff23a21e532291df7a4fb6e0a908be4 Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@fedoraproject.org>
Date: Fri, 5 Feb 2016 16:24:47 -0800
Subject: [PATCH 747/797] arm64: Add support for ARCH_SUPPORTS_DEBUG_PAGEALLOC

ARCH_SUPPORTS_DEBUG_PAGEALLOC provides a hook to map and unmap
pages for debugging purposes. This requires memory be mapped
with PAGE_SIZE mappings since breaking down larger mappings
at runtime will lead to TLB conflicts. Check if debug_pagealloc
is enabled at runtime and if so, map everyting with PAGE_SIZE
pages. Implement the functions to actually map/unmap the
pages at runtime.

Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Laura Abbott <labbott@fedoraproject.org>
[catalin.marinas@arm.com: static annotation block_mappings_allowed() and #ifdef]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

(cherry picked from commit 83863f25e4b8214e994ef8b5647aad614d74b45d)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/Kconfig       |  3 +++
 arch/arm64/mm/mmu.c      | 26 +++++++++++++++++++++--
 arch/arm64/mm/pageattr.c | 46 +++++++++++++++++++++++++++++++---------
 3 files changed, 63 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 4a1b665d90dc..98992dee9a29 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -507,6 +507,9 @@ config HOTPLUG_CPU
 source kernel/Kconfig.preempt
 source kernel/Kconfig.hz
 
+config ARCH_SUPPORTS_DEBUG_PAGEALLOC
+	def_bool y
+
 config ARCH_HAS_HOLES_MEMORYMODEL
 	def_bool y if SPARSEMEM
 
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 3096240e6eb8..d1fa678355c9 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -149,6 +149,26 @@ static void split_pud(pud_t *old_pud, pmd_t *pmd)
 	} while (pmd++, i++, i < PTRS_PER_PMD);
 }
 
+#ifdef CONFIG_DEBUG_PAGEALLOC
+static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void))
+{
+
+	/*
+	 * If debug_page_alloc is enabled we must map the linear map
+	 * using pages. However, other mappings created by
+	 * create_mapping_noalloc must use sections in some cases. Allow
+	 * sections to be used in those cases, where no pgtable_alloc
+	 * function is provided.
+	 */
+	return !pgtable_alloc || !debug_pagealloc_enabled();
+}
+#else
+static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void))
+{
+	return true;
+}
+#endif
+
 static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
 				  phys_addr_t phys, pgprot_t prot,
 				  phys_addr_t (*pgtable_alloc)(void))
@@ -181,7 +201,8 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
 	do {
 		next = pmd_addr_end(addr, end);
 		/* try section mapping first */
-		if (((addr | next | phys) & ~SECTION_MASK) == 0) {
+		if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
+		      block_mappings_allowed(pgtable_alloc)) {
 			pmd_t old_pmd =*pmd;
 			set_pmd(pmd, __pmd(phys |
 					   pgprot_val(mk_sect_prot(prot))));
@@ -241,7 +262,8 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
 		/*
 		 * For 4K granule only, attempt to put down a 1GB block
 		 */
-		if (use_1G_block(addr, next, phys)) {
+		if (use_1G_block(addr, next, phys) &&
+		    block_mappings_allowed(pgtable_alloc)) {
 			pud_t old_pud = *pud;
 			set_pud(pud, __pud(phys |
 					   pgprot_val(mk_sect_prot(prot))));
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 0795c3a36d8f..ca6d268e3313 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -37,14 +37,31 @@ static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr,
 	return 0;
 }
 
+/*
+ * This function assumes that the range is mapped with PAGE_SIZE pages.
+ */
+static int __change_memory_common(unsigned long start, unsigned long size,
+				pgprot_t set_mask, pgprot_t clear_mask)
+{
+	struct page_change_data data;
+	int ret;
+
+	data.set_mask = set_mask;
+	data.clear_mask = clear_mask;
+
+	ret = apply_to_page_range(&init_mm, start, size, change_page_range,
+					&data);
+
+	flush_tlb_kernel_range(start, start + size);
+	return ret;
+}
+
 static int change_memory_common(unsigned long addr, int numpages,
 				pgprot_t set_mask, pgprot_t clear_mask)
 {
 	unsigned long start = addr;
 	unsigned long size = PAGE_SIZE*numpages;
 	unsigned long end = start + size;
-	int ret;
-	struct page_change_data data;
 	struct vm_struct *area;
 
 	if (!PAGE_ALIGNED(addr)) {
@@ -75,14 +92,7 @@ static int change_memory_common(unsigned long addr, int numpages,
 	if (!numpages)
 		return 0;
 
-	data.set_mask = set_mask;
-	data.clear_mask = clear_mask;
-
-	ret = apply_to_page_range(&init_mm, start, size, change_page_range,
-					&data);
-
-	flush_tlb_kernel_range(start, end);
-	return ret;
+	return __change_memory_common(start, size, set_mask, clear_mask);
 }
 
 int set_memory_ro(unsigned long addr, int numpages)
@@ -114,3 +124,19 @@ int set_memory_x(unsigned long addr, int numpages)
 					__pgprot(PTE_PXN));
 }
 EXPORT_SYMBOL_GPL(set_memory_x);
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+void __kernel_map_pages(struct page *page, int numpages, int enable)
+{
+	unsigned long addr = (unsigned long) page_address(page);
+
+	if (enable)
+		__change_memory_common(addr, PAGE_SIZE * numpages,
+					__pgprot(PTE_VALID),
+					__pgprot(0));
+	else
+		__change_memory_common(addr, PAGE_SIZE * numpages,
+					__pgprot(0),
+					__pgprot(PTE_VALID));
+}
+#endif

From 5ca7d16080e32bb34f8d2ead86adde49d1c4c652 Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@fedoraproject.org>
Date: Fri, 5 Feb 2016 16:24:48 -0800
Subject: [PATCH 748/797] arm64: ptdump: Indicate whether memory should be
 faulting

With CONFIG_DEBUG_PAGEALLOC, pages do not have the valid bit
set when free in the buddy allocator. Add an indiciation to
the page table dumping code that the valid bit is not set,
'F' for fault, to make this easier to understand.

Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Laura Abbott <labbott@fedoraproject.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit d7e9d59494a9a5d83274f5af2148b82ca22dff3f)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/dump.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index 0adbebbc2803..0841b2bf0e6a 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -90,6 +90,11 @@ struct prot_bits {
 
 static const struct prot_bits pte_bits[] = {
 	{
+		.mask	= PTE_VALID,
+		.val	= PTE_VALID,
+		.set	= " ",
+		.clear	= "F",
+	}, {
 		.mask	= PTE_USER,
 		.val	= PTE_USER,
 		.set	= "USR",

From 1d1e6a82d643537e9b80cf58f887b2be616aa515 Mon Sep 17 00:00:00 2001
From: Yang Shi <yang.shi@linaro.org>
Date: Fri, 5 Feb 2016 15:50:18 -0800
Subject: [PATCH 749/797] arm64: ubsan: select ARCH_HAS_UBSAN_SANITIZE_ALL

To enable UBSAN on arm64, ARCH_HAS_UBSAN_SANITIZE_ALL need to be selected.

Basic kernel bootup test is passed on arm64 with CONFIG_UBSAN_SANITIZE_ALL
enabled.

Signed-off-by: Yang Shi <yang.shi@linaro.org>
Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit f0b7f8a4b44657386273a67179dd901c81cd11a6)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 98992dee9a29..1420102341d0 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -13,6 +13,7 @@ config ARM64
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
 	select ARCH_WANT_FRAME_POINTERS
+	select ARCH_HAS_UBSAN_SANITIZE_ALL
 	select ARM_AMBA
 	select ARM_ARCH_TIMER
 	select ARM_GIC

From 391a428880ad64f0ab9dcf48f4666ba2a6aa7e76 Mon Sep 17 00:00:00 2001
From: David Brown <david.brown@linaro.org>
Date: Wed, 10 Feb 2016 13:52:22 -0800
Subject: [PATCH 750/797] arm64: vdso: Mark vDSO code as read-only

Although the arm64 vDSO is cleanly separated by code/data with the
code being read-only in userspace mappings, the code page is still
writable from the kernel.  There have been exploits (such as
http://itszn.com/blog/?p=21) that take advantage of this on x86 to go
from a bad kernel write to full root.

Prevent this specific exploit on arm64 by putting the vDSO code page
in read-only memory as well.

Before the change:
[    3.138366] vdso: 2 pages (1 code @ ffffffc000a71000, 1 data @ ffffffc000a70000)
---[ Kernel Mapping ]---
0xffffffc000000000-0xffffffc000082000         520K     RW NX SHD AF            UXN MEM/NORMAL
0xffffffc000082000-0xffffffc000200000        1528K     ro x  SHD AF            UXN MEM/NORMAL
0xffffffc000200000-0xffffffc000800000           6M     ro x  SHD AF        BLK UXN MEM/NORMAL
0xffffffc000800000-0xffffffc0009b6000        1752K     ro x  SHD AF            UXN MEM/NORMAL
0xffffffc0009b6000-0xffffffc000c00000        2344K     RW NX SHD AF            UXN MEM/NORMAL
0xffffffc000c00000-0xffffffc008000000         116M     RW NX SHD AF        BLK UXN MEM/NORMAL
0xffffffc00c000000-0xffffffc07f000000        1840M     RW NX SHD AF        BLK UXN MEM/NORMAL
0xffffffc800000000-0xffffffc840000000           1G     RW NX SHD AF        BLK UXN MEM/NORMAL
0xffffffc840000000-0xffffffc87ae00000         942M     RW NX SHD AF        BLK UXN MEM/NORMAL
0xffffffc87ae00000-0xffffffc87ae70000         448K     RW NX SHD AF            UXN MEM/NORMAL
0xffffffc87af80000-0xffffffc87af8a000          40K     RW NX SHD AF            UXN MEM/NORMAL
0xffffffc87af8b000-0xffffffc87b000000         468K     RW NX SHD AF            UXN MEM/NORMAL
0xffffffc87b000000-0xffffffc87fe00000          78M     RW NX SHD AF        BLK UXN MEM/NORMAL
0xffffffc87fe00000-0xffffffc87ff50000        1344K     RW NX SHD AF            UXN MEM/NORMAL
0xffffffc87ff90000-0xffffffc87ffa0000          64K     RW NX SHD AF            UXN MEM/NORMAL
0xffffffc87fff0000-0xffffffc880000000          64K     RW NX SHD AF            UXN MEM/NORMAL

After:
[    3.138368] vdso: 2 pages (1 code @ ffffffc0006de000, 1 data @ ffffffc000a74000)
---[ Kernel Mapping ]---
0xffffffc000000000-0xffffffc000082000         520K     RW NX SHD AF            UXN MEM/NORMAL
0xffffffc000082000-0xffffffc000200000        1528K     ro x  SHD AF            UXN MEM/NORMAL
0xffffffc000200000-0xffffffc000800000           6M     ro x  SHD AF        BLK UXN MEM/NORMAL
0xffffffc000800000-0xffffffc0009b8000        1760K     ro x  SHD AF            UXN MEM/NORMAL
0xffffffc0009b8000-0xffffffc000c00000        2336K     RW NX SHD AF            UXN MEM/NORMAL
0xffffffc000c00000-0xffffffc008000000         116M     RW NX SHD AF        BLK UXN MEM/NORMAL
0xffffffc00c000000-0xffffffc07f000000        1840M     RW NX SHD AF        BLK UXN MEM/NORMAL
0xffffffc800000000-0xffffffc840000000           1G     RW NX SHD AF        BLK UXN MEM/NORMAL
0xffffffc840000000-0xffffffc87ae00000         942M     RW NX SHD AF        BLK UXN MEM/NORMAL
0xffffffc87ae00000-0xffffffc87ae70000         448K     RW NX SHD AF            UXN MEM/NORMAL
0xffffffc87af80000-0xffffffc87af8a000          40K     RW NX SHD AF            UXN MEM/NORMAL
0xffffffc87af8b000-0xffffffc87b000000         468K     RW NX SHD AF            UXN MEM/NORMAL
0xffffffc87b000000-0xffffffc87fe00000          78M     RW NX SHD AF        BLK UXN MEM/NORMAL
0xffffffc87fe00000-0xffffffc87ff50000        1344K     RW NX SHD AF            UXN MEM/NORMAL
0xffffffc87ff90000-0xffffffc87ffa0000          64K     RW NX SHD AF            UXN MEM/NORMAL
0xffffffc87fff0000-0xffffffc880000000          64K     RW NX SHD AF            UXN MEM/NORMAL

Inspired by https://lkml.org/lkml/2016/1/19/494 based on work by the
PaX Team, Brad Spengler, and Kees Cook.

Signed-off-by: David Brown <david.brown@linaro.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
[catalin.marinas@arm.com: removed superfluous __PAGE_ALIGNED_DATA]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

(cherry picked from commit 88d8a7994e564d209d4b2583496631c2357d386b)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/vdso/vdso.S | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/vdso/vdso.S b/arch/arm64/kernel/vdso/vdso.S
index 60c1db54b41a..82379a70ef03 100644
--- a/arch/arm64/kernel/vdso/vdso.S
+++ b/arch/arm64/kernel/vdso/vdso.S
@@ -21,9 +21,8 @@
 #include <linux/const.h>
 #include <asm/page.h>
 
-	__PAGE_ALIGNED_DATA
-
 	.globl vdso_start, vdso_end
+	.section .rodata
 	.balign PAGE_SIZE
 vdso_start:
 	.incbin "arch/arm64/kernel/vdso/vdso.so"

From 866817f9f155879d1d28f0944958a542bb70475c Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 15 Feb 2016 09:51:49 +0100
Subject: [PATCH 751/797] arm64: use local label prefixes for __reg_num symbols

The __reg_num_xNN symbols that are used to implement the msr_s and
mrs_s macros are recorded in the ELF metadata of each object file.
This does not affect the size of the final binary, but it does clutter
the output of tools like readelf, i.e.,

  $ readelf -a vmlinux |grep -c __reg_num_x
  50976

So let's use symbols with the .L prefix, these are strictly local,
and don't end up in the object files.

  $ readelf -a vmlinux |grep -c __reg_num_x
  0

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 7abc7d833c9eb16efc8a59239d3771a6e30be367)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/sysreg.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index d48ab5b41f52..76907c94b11f 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -194,32 +194,32 @@
 #ifdef __ASSEMBLY__
 
 	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
-	.equ	__reg_num_x\num, \num
+	.equ	.L__reg_num_x\num, \num
 	.endr
-	.equ	__reg_num_xzr, 31
+	.equ	.L__reg_num_xzr, 31
 
 	.macro	mrs_s, rt, sreg
-	.inst	0xd5200000|(\sreg)|(__reg_num_\rt)
+	.inst	0xd5200000|(\sreg)|(.L__reg_num_\rt)
 	.endm
 
 	.macro	msr_s, sreg, rt
-	.inst	0xd5000000|(\sreg)|(__reg_num_\rt)
+	.inst	0xd5000000|(\sreg)|(.L__reg_num_\rt)
 	.endm
 
 #else
 
 asm(
 "	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n"
-"	.equ	__reg_num_x\\num, \\num\n"
+"	.equ	.L__reg_num_x\\num, \\num\n"
 "	.endr\n"
-"	.equ	__reg_num_xzr, 31\n"
+"	.equ	.L__reg_num_xzr, 31\n"
 "\n"
 "	.macro	mrs_s, rt, sreg\n"
-"	.inst	0xd5200000|(\\sreg)|(__reg_num_\\rt)\n"
+"	.inst	0xd5200000|(\\sreg)|(.L__reg_num_\\rt)\n"
 "	.endm\n"
 "\n"
 "	.macro	msr_s, sreg, rt\n"
-"	.inst	0xd5000000|(\\sreg)|(__reg_num_\\rt)\n"
+"	.inst	0xd5000000|(\\sreg)|(.L__reg_num_\\rt)\n"
 "	.endm\n"
 );
 

From 0b3419007e096a706ac4894a2cf28cd97b6028e1 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 5 Feb 2016 14:58:46 +0000
Subject: [PATCH 752/797] arm64: cpufeature: Change read_cpuid() to use
 sysreg's mrs_s macro

Older assemblers may not have support for newer feature registers. To get
round this, sysreg.h provides a 'mrs_s' macro that takes a register
encoding and generates the raw instruction.

Change read_cpuid() to use mrs_s in all cases so that new registers
don't have to be a special case. Including sysreg.h means we need to move
the include and definition of read_cpuid() after the #ifndef __ASSEMBLY__
to avoid syntax errors in vmlinux.lds.

Signed-off-by: James Morse <james.morse@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 0f54b14e76f5302afe164dc911b049b5df836ff5)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/cpufeature.h |  2 +-
 arch/arm64/include/asm/cputype.h    | 20 ++++++-----
 arch/arm64/kernel/cpufeature.c      | 54 ++++++++++++++---------------
 arch/arm64/kernel/cpuinfo.c         | 50 +++++++++++++-------------
 arch/arm64/mm/context.c             |  2 +-
 5 files changed, 65 insertions(+), 63 deletions(-)

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 8d56bd8550dc..8131abfabb0a 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -177,7 +177,7 @@ u64 read_system_reg(u32 id);
 
 static inline bool cpu_supports_mixed_endian_el0(void)
 {
-	return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1));
+	return id_aa64mmfr0_mixed_endian_el0(read_cpuid(SYS_ID_AA64MMFR0_EL1));
 }
 
 static inline bool system_supports_mixed_endian_el0(void)
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 7540284a17fe..b3a83da152a7 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -32,12 +32,6 @@
 #define MPIDR_AFFINITY_LEVEL(mpidr, level) \
 	((mpidr >> MPIDR_LEVEL_SHIFT(level)) & MPIDR_LEVEL_MASK)
 
-#define read_cpuid(reg) ({						\
-	u64 __val;							\
-	asm("mrs	%0, " #reg : "=r" (__val));			\
-	__val;								\
-})
-
 #define MIDR_REVISION_MASK	0xf
 #define MIDR_REVISION(midr)	((midr) & MIDR_REVISION_MASK)
 #define MIDR_PARTNUM_SHIFT	4
@@ -92,6 +86,14 @@
 
 #ifndef __ASSEMBLY__
 
+#include <asm/sysreg.h>
+
+#define read_cpuid(reg) ({						\
+	u64 __val;							\
+	asm("mrs_s	%0, " __stringify(reg) : "=r" (__val));		\
+	__val;								\
+})
+
 /*
  * The CPU ID never changes at run time, so we might as well tell the
  * compiler that it's constant.  Use this function to read the CPU ID
@@ -99,12 +101,12 @@
  */
 static inline u32 __attribute_const__ read_cpuid_id(void)
 {
-	return read_cpuid(MIDR_EL1);
+	return read_cpuid(SYS_MIDR_EL1);
 }
 
 static inline u64 __attribute_const__ read_cpuid_mpidr(void)
 {
-	return read_cpuid(MPIDR_EL1);
+	return read_cpuid(SYS_MPIDR_EL1);
 }
 
 static inline unsigned int __attribute_const__ read_cpuid_implementor(void)
@@ -119,7 +121,7 @@ static inline unsigned int __attribute_const__ read_cpuid_part_number(void)
 
 static inline u32 __attribute_const__ read_cpuid_cachetype(void)
 {
-	return read_cpuid(CTR_EL0);
+	return read_cpuid(SYS_CTR_EL0);
 }
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 3615d7d7c9af..1ef10e784031 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -808,35 +808,35 @@ static inline void set_sys_caps_initialised(void)
 static u64 __raw_read_system_reg(u32 sys_id)
 {
 	switch (sys_id) {
-	case SYS_ID_PFR0_EL1:		return (u64)read_cpuid(ID_PFR0_EL1);
-	case SYS_ID_PFR1_EL1:		return (u64)read_cpuid(ID_PFR1_EL1);
-	case SYS_ID_DFR0_EL1:		return (u64)read_cpuid(ID_DFR0_EL1);
-	case SYS_ID_MMFR0_EL1:		return (u64)read_cpuid(ID_MMFR0_EL1);
-	case SYS_ID_MMFR1_EL1:		return (u64)read_cpuid(ID_MMFR1_EL1);
-	case SYS_ID_MMFR2_EL1:		return (u64)read_cpuid(ID_MMFR2_EL1);
-	case SYS_ID_MMFR3_EL1:		return (u64)read_cpuid(ID_MMFR3_EL1);
-	case SYS_ID_ISAR0_EL1:		return (u64)read_cpuid(ID_ISAR0_EL1);
-	case SYS_ID_ISAR1_EL1:		return (u64)read_cpuid(ID_ISAR1_EL1);
-	case SYS_ID_ISAR2_EL1:		return (u64)read_cpuid(ID_ISAR2_EL1);
-	case SYS_ID_ISAR3_EL1:		return (u64)read_cpuid(ID_ISAR3_EL1);
-	case SYS_ID_ISAR4_EL1:		return (u64)read_cpuid(ID_ISAR4_EL1);
-	case SYS_ID_ISAR5_EL1:		return (u64)read_cpuid(ID_ISAR4_EL1);
-	case SYS_MVFR0_EL1:		return (u64)read_cpuid(MVFR0_EL1);
-	case SYS_MVFR1_EL1:		return (u64)read_cpuid(MVFR1_EL1);
-	case SYS_MVFR2_EL1:		return (u64)read_cpuid(MVFR2_EL1);
+	case SYS_ID_PFR0_EL1:		return read_cpuid(SYS_ID_PFR0_EL1);
+	case SYS_ID_PFR1_EL1:		return read_cpuid(SYS_ID_PFR1_EL1);
+	case SYS_ID_DFR0_EL1:		return read_cpuid(SYS_ID_DFR0_EL1);
+	case SYS_ID_MMFR0_EL1:		return read_cpuid(SYS_ID_MMFR0_EL1);
+	case SYS_ID_MMFR1_EL1:		return read_cpuid(SYS_ID_MMFR1_EL1);
+	case SYS_ID_MMFR2_EL1:		return read_cpuid(SYS_ID_MMFR2_EL1);
+	case SYS_ID_MMFR3_EL1:		return read_cpuid(SYS_ID_MMFR3_EL1);
+	case SYS_ID_ISAR0_EL1:		return read_cpuid(SYS_ID_ISAR0_EL1);
+	case SYS_ID_ISAR1_EL1:		return read_cpuid(SYS_ID_ISAR1_EL1);
+	case SYS_ID_ISAR2_EL1:		return read_cpuid(SYS_ID_ISAR2_EL1);
+	case SYS_ID_ISAR3_EL1:		return read_cpuid(SYS_ID_ISAR3_EL1);
+	case SYS_ID_ISAR4_EL1:		return read_cpuid(SYS_ID_ISAR4_EL1);
+	case SYS_ID_ISAR5_EL1:		return read_cpuid(SYS_ID_ISAR4_EL1);
+	case SYS_MVFR0_EL1:		return read_cpuid(SYS_MVFR0_EL1);
+	case SYS_MVFR1_EL1:		return read_cpuid(SYS_MVFR1_EL1);
+	case SYS_MVFR2_EL1:		return read_cpuid(SYS_MVFR2_EL1);
 
-	case SYS_ID_AA64PFR0_EL1:	return (u64)read_cpuid(ID_AA64PFR0_EL1);
-	case SYS_ID_AA64PFR1_EL1:	return (u64)read_cpuid(ID_AA64PFR0_EL1);
-	case SYS_ID_AA64DFR0_EL1:	return (u64)read_cpuid(ID_AA64DFR0_EL1);
-	case SYS_ID_AA64DFR1_EL1:	return (u64)read_cpuid(ID_AA64DFR0_EL1);
-	case SYS_ID_AA64MMFR0_EL1:	return (u64)read_cpuid(ID_AA64MMFR0_EL1);
-	case SYS_ID_AA64MMFR1_EL1:	return (u64)read_cpuid(ID_AA64MMFR1_EL1);
-	case SYS_ID_AA64ISAR0_EL1:	return (u64)read_cpuid(ID_AA64ISAR0_EL1);
-	case SYS_ID_AA64ISAR1_EL1:	return (u64)read_cpuid(ID_AA64ISAR1_EL1);
+	case SYS_ID_AA64PFR0_EL1:	return read_cpuid(SYS_ID_AA64PFR0_EL1);
+	case SYS_ID_AA64PFR1_EL1:	return read_cpuid(SYS_ID_AA64PFR0_EL1);
+	case SYS_ID_AA64DFR0_EL1:	return read_cpuid(SYS_ID_AA64DFR0_EL1);
+	case SYS_ID_AA64DFR1_EL1:	return read_cpuid(SYS_ID_AA64DFR0_EL1);
+	case SYS_ID_AA64MMFR0_EL1:	return read_cpuid(SYS_ID_AA64MMFR0_EL1);
+	case SYS_ID_AA64MMFR1_EL1:	return read_cpuid(SYS_ID_AA64MMFR1_EL1);
+	case SYS_ID_AA64ISAR0_EL1:	return read_cpuid(SYS_ID_AA64ISAR0_EL1);
+	case SYS_ID_AA64ISAR1_EL1:	return read_cpuid(SYS_ID_AA64ISAR1_EL1);
 
-	case SYS_CNTFRQ_EL0:		return (u64)read_cpuid(CNTFRQ_EL0);
-	case SYS_CTR_EL0:		return (u64)read_cpuid(CTR_EL0);
-	case SYS_DCZID_EL0:		return (u64)read_cpuid(DCZID_EL0);
+	case SYS_CNTFRQ_EL0:		return read_cpuid(SYS_CNTFRQ_EL0);
+	case SYS_CTR_EL0:		return read_cpuid(SYS_CTR_EL0);
+	case SYS_DCZID_EL0:		return read_cpuid(SYS_DCZID_EL0);
 	default:
 		BUG();
 		return 0;
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 212ae6361d8b..76df22272804 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -201,35 +201,35 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
 {
 	info->reg_cntfrq = arch_timer_get_cntfrq();
 	info->reg_ctr = read_cpuid_cachetype();
-	info->reg_dczid = read_cpuid(DCZID_EL0);
+	info->reg_dczid = read_cpuid(SYS_DCZID_EL0);
 	info->reg_midr = read_cpuid_id();
 
-	info->reg_id_aa64dfr0 = read_cpuid(ID_AA64DFR0_EL1);
-	info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1);
-	info->reg_id_aa64isar0 = read_cpuid(ID_AA64ISAR0_EL1);
-	info->reg_id_aa64isar1 = read_cpuid(ID_AA64ISAR1_EL1);
-	info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
-	info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
-	info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1);
-	info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1);
+	info->reg_id_aa64dfr0 = read_cpuid(SYS_ID_AA64DFR0_EL1);
+	info->reg_id_aa64dfr1 = read_cpuid(SYS_ID_AA64DFR1_EL1);
+	info->reg_id_aa64isar0 = read_cpuid(SYS_ID_AA64ISAR0_EL1);
+	info->reg_id_aa64isar1 = read_cpuid(SYS_ID_AA64ISAR1_EL1);
+	info->reg_id_aa64mmfr0 = read_cpuid(SYS_ID_AA64MMFR0_EL1);
+	info->reg_id_aa64mmfr1 = read_cpuid(SYS_ID_AA64MMFR1_EL1);
+	info->reg_id_aa64pfr0 = read_cpuid(SYS_ID_AA64PFR0_EL1);
+	info->reg_id_aa64pfr1 = read_cpuid(SYS_ID_AA64PFR1_EL1);
 
-	info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1);
-	info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1);
-	info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1);
-	info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1);
-	info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1);
-	info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1);
-	info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1);
-	info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1);
-	info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1);
-	info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1);
-	info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1);
-	info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1);
-	info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1);
+	info->reg_id_dfr0 = read_cpuid(SYS_ID_DFR0_EL1);
+	info->reg_id_isar0 = read_cpuid(SYS_ID_ISAR0_EL1);
+	info->reg_id_isar1 = read_cpuid(SYS_ID_ISAR1_EL1);
+	info->reg_id_isar2 = read_cpuid(SYS_ID_ISAR2_EL1);
+	info->reg_id_isar3 = read_cpuid(SYS_ID_ISAR3_EL1);
+	info->reg_id_isar4 = read_cpuid(SYS_ID_ISAR4_EL1);
+	info->reg_id_isar5 = read_cpuid(SYS_ID_ISAR5_EL1);
+	info->reg_id_mmfr0 = read_cpuid(SYS_ID_MMFR0_EL1);
+	info->reg_id_mmfr1 = read_cpuid(SYS_ID_MMFR1_EL1);
+	info->reg_id_mmfr2 = read_cpuid(SYS_ID_MMFR2_EL1);
+	info->reg_id_mmfr3 = read_cpuid(SYS_ID_MMFR3_EL1);
+	info->reg_id_pfr0 = read_cpuid(SYS_ID_PFR0_EL1);
+	info->reg_id_pfr1 = read_cpuid(SYS_ID_PFR1_EL1);
 
-	info->reg_mvfr0 = read_cpuid(MVFR0_EL1);
-	info->reg_mvfr1 = read_cpuid(MVFR1_EL1);
-	info->reg_mvfr2 = read_cpuid(MVFR2_EL1);
+	info->reg_mvfr0 = read_cpuid(SYS_MVFR0_EL1);
+	info->reg_mvfr1 = read_cpuid(SYS_MVFR1_EL1);
+	info->reg_mvfr2 = read_cpuid(SYS_MVFR2_EL1);
 
 	cpuinfo_detect_icache_policy(info);
 
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index e87f53ff5f58..7275628ba59f 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -187,7 +187,7 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
 
 static int asids_init(void)
 {
-	int fld = cpuid_feature_extract_field(read_cpuid(ID_AA64MMFR0_EL1), 4);
+	int fld = cpuid_feature_extract_field(read_cpuid(SYS_ID_AA64MMFR0_EL1), 4);
 
 	switch (fld) {
 	default:

From f6c5d808273093c8220da4163c548932d18e8e36 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 5 Feb 2016 14:58:47 +0000
Subject: [PATCH 753/797] arm64: add ARMv8.2 id_aa64mmfr2 boiler plate

ARMv8.2 adds a new feature register id_aa64mmfr2. This patch adds the
cpu feature boiler plate used by the actual features in later patches.

Signed-off-by: James Morse <james.morse@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 406e308770a92bd33995b2e5b681e86358328bb0)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/cpu.h    |  1 +
 arch/arm64/include/asm/sysreg.h |  4 ++++
 arch/arm64/kernel/cpufeature.c  | 10 ++++++++++
 arch/arm64/kernel/cpuinfo.c     |  1 +
 4 files changed, 16 insertions(+)

diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index b5e9cee4b5f8..13a6103130cd 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -36,6 +36,7 @@ struct cpuinfo_arm64 {
 	u64		reg_id_aa64isar1;
 	u64		reg_id_aa64mmfr0;
 	u64		reg_id_aa64mmfr1;
+	u64		reg_id_aa64mmfr2;
 	u64		reg_id_aa64pfr0;
 	u64		reg_id_aa64pfr1;
 
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 76907c94b11f..4bc8655529df 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -70,6 +70,7 @@
 
 #define SYS_ID_AA64MMFR0_EL1		sys_reg(3, 0, 0, 7, 0)
 #define SYS_ID_AA64MMFR1_EL1		sys_reg(3, 0, 0, 7, 1)
+#define SYS_ID_AA64MMFR2_EL1		sys_reg(3, 0, 0, 7, 2)
 
 #define SYS_CNTFRQ_EL0			sys_reg(3, 3, 14, 0, 0)
 #define SYS_CTR_EL0			sys_reg(3, 3, 0, 0, 1)
@@ -135,6 +136,9 @@
 #define ID_AA64MMFR1_VMIDBITS_SHIFT	4
 #define ID_AA64MMFR1_HADBS_SHIFT	0
 
+/* id_aa64mmfr2 */
+#define ID_AA64MMFR2_UAO_SHIFT		4
+
 /* id_aa64dfr0 */
 #define ID_AA64DFR0_CTX_CMPS_SHIFT	28
 #define ID_AA64DFR0_WRPS_SHIFT		20
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 1ef10e784031..42918c797e8e 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -123,6 +123,11 @@ static struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
 	ARM64_FTR_END,
 };
 
+static struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_UAO_SHIFT, 4, 0),
+	ARM64_FTR_END,
+};
+
 static struct arm64_ftr_bits ftr_ctr[] = {
 	U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1),	/* RAO */
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 3, 0),
@@ -284,6 +289,7 @@ static struct arm64_ftr_reg arm64_ftr_regs[] = {
 	/* Op1 = 0, CRn = 0, CRm = 7 */
 	ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0),
 	ARM64_FTR_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1),
+	ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2),
 
 	/* Op1 = 3, CRn = 0, CRm = 0 */
 	ARM64_FTR_REG(SYS_CTR_EL0, ftr_ctr),
@@ -408,6 +414,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
 	init_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1);
 	init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0);
 	init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1);
+	init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2);
 	init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0);
 	init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
 	init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
@@ -517,6 +524,8 @@ void update_cpu_features(int cpu,
 				      info->reg_id_aa64mmfr0, boot->reg_id_aa64mmfr0);
 	taint |= check_update_ftr_reg(SYS_ID_AA64MMFR1_EL1, cpu,
 				      info->reg_id_aa64mmfr1, boot->reg_id_aa64mmfr1);
+	taint |= check_update_ftr_reg(SYS_ID_AA64MMFR2_EL1, cpu,
+				      info->reg_id_aa64mmfr2, boot->reg_id_aa64mmfr2);
 
 	/*
 	 * EL3 is not our concern.
@@ -831,6 +840,7 @@ static u64 __raw_read_system_reg(u32 sys_id)
 	case SYS_ID_AA64DFR1_EL1:	return read_cpuid(SYS_ID_AA64DFR0_EL1);
 	case SYS_ID_AA64MMFR0_EL1:	return read_cpuid(SYS_ID_AA64MMFR0_EL1);
 	case SYS_ID_AA64MMFR1_EL1:	return read_cpuid(SYS_ID_AA64MMFR1_EL1);
+	case SYS_ID_AA64MMFR2_EL1:	return read_cpuid(SYS_ID_AA64MMFR2_EL1);
 	case SYS_ID_AA64ISAR0_EL1:	return read_cpuid(SYS_ID_AA64ISAR0_EL1);
 	case SYS_ID_AA64ISAR1_EL1:	return read_cpuid(SYS_ID_AA64ISAR1_EL1);
 
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 76df22272804..966fbd52550b 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -210,6 +210,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
 	info->reg_id_aa64isar1 = read_cpuid(SYS_ID_AA64ISAR1_EL1);
 	info->reg_id_aa64mmfr0 = read_cpuid(SYS_ID_AA64MMFR0_EL1);
 	info->reg_id_aa64mmfr1 = read_cpuid(SYS_ID_AA64MMFR1_EL1);
+	info->reg_id_aa64mmfr2 = read_cpuid(SYS_ID_AA64MMFR2_EL1);
 	info->reg_id_aa64pfr0 = read_cpuid(SYS_ID_AA64PFR0_EL1);
 	info->reg_id_aa64pfr1 = read_cpuid(SYS_ID_AA64PFR1_EL1);
 

From 13e05550e107e46ef982e5c4347e4986aeeee7ec Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 5 Feb 2016 14:58:48 +0000
Subject: [PATCH 754/797] arm64: kernel: Add support for User Access Override

'User Access Override' is a new ARMv8.2 feature which allows the
unprivileged load and store instructions to be overridden to behave in
the normal way.

This patch converts {get,put}_user() and friends to use ldtr*/sttr*
instructions - so that they can only access EL0 memory, then enables
UAO when fs==KERNEL_DS so that these functions can access kernel memory.

This allows user space's read/write permissions to be checked against the
page tables, instead of testing addr<USER_DS, then using the kernel's
read/write permissions.

Signed-off-by: James Morse <james.morse@arm.com>
[catalin.marinas@arm.com: move uao_thread_switch() above dsb()]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

(cherry picked from commit 57f4959bad0a154aeca125b7d38d1d9471a12422)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/Kconfig                   | 21 ++++++++
 arch/arm64/include/asm/alternative.h | 72 ++++++++++++++++++++++++++++
 arch/arm64/include/asm/cpufeature.h  |  3 +-
 arch/arm64/include/asm/processor.h   |  1 +
 arch/arm64/include/asm/sysreg.h      |  3 ++
 arch/arm64/include/asm/thread_info.h |  6 +++
 arch/arm64/include/asm/uaccess.h     | 44 ++++++++++++-----
 arch/arm64/include/uapi/asm/ptrace.h |  1 +
 arch/arm64/kernel/cpufeature.c       | 11 +++++
 arch/arm64/kernel/process.c          | 19 ++++++++
 arch/arm64/lib/clear_user.S          |  8 ++--
 arch/arm64/lib/copy_from_user.S      |  8 ++--
 arch/arm64/lib/copy_in_user.S        | 16 +++----
 arch/arm64/lib/copy_to_user.S        |  8 ++--
 arch/arm64/mm/fault.c                | 31 +++++++++---
 15 files changed, 213 insertions(+), 39 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 1420102341d0..4df85b5a2045 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -706,6 +706,27 @@ config ARM64_LSE_ATOMICS
 
 endmenu
 
+config ARM64_UAO
+	bool "Enable support for User Access Override (UAO)"
+	default y
+	help
+	  User Access Override (UAO; part of the ARMv8.2 Extensions)
+	  causes the 'unprivileged' variant of the load/store instructions to
+	  be overriden to be privileged.
+
+	  This option changes get_user() and friends to use the 'unprivileged'
+	  variant of the load/store instructions. This ensures that user-space
+	  really did have access to the supplied memory. When addr_limit is
+	  set to kernel memory the UAO bit will be set, allowing privileged
+	  access to kernel memory.
+
+	  Choosing this option will cause copy_to_user() et al to use user-space
+	  memory permissions.
+
+	  The feature is detected at runtime, the kernel will use the
+	  regular load/store instructions if the cpu does not implement the
+	  feature.
+
 endmenu
 
 menu "Boot options"
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index e4962f04201e..a9fc24ec1aa9 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -1,6 +1,8 @@
 #ifndef __ASM_ALTERNATIVE_H
 #define __ASM_ALTERNATIVE_H
 
+#include <asm/cpufeature.h>
+
 #ifndef __ASSEMBLY__
 
 #include <linux/init.h>
@@ -63,6 +65,8 @@ void apply_alternatives(void *start, size_t length);
 
 #else
 
+#include <asm/assembler.h>
+
 .macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len
 	.word \orig_offset - .
 	.word \alt_offset - .
@@ -136,6 +140,74 @@ void apply_alternatives(void *start, size_t length);
 	alternative_insn insn1, insn2, cap, IS_ENABLED(cfg)
 
 
+/*
+ * Generate the assembly for UAO alternatives with exception table entries.
+ * This is complicated as there is no post-increment or pair versions of the
+ * unprivileged instructions, and USER() only works for single instructions.
+ */
+#ifdef CONFIG_ARM64_UAO
+	.macro uao_ldp l, reg1, reg2, addr, post_inc
+		alternative_if_not ARM64_HAS_UAO
+8888:			ldp	\reg1, \reg2, [\addr], \post_inc;
+8889:			nop;
+			nop;
+		alternative_else
+			ldtr	\reg1, [\addr];
+			ldtr	\reg2, [\addr, #8];
+			add	\addr, \addr, \post_inc;
+		alternative_endif
+
+		.section __ex_table,"a";
+		.align	3;
+		.quad	8888b,\l;
+		.quad	8889b,\l;
+		.previous;
+	.endm
+
+	.macro uao_stp l, reg1, reg2, addr, post_inc
+		alternative_if_not ARM64_HAS_UAO
+8888:			stp	\reg1, \reg2, [\addr], \post_inc;
+8889:			nop;
+			nop;
+		alternative_else
+			sttr	\reg1, [\addr];
+			sttr	\reg2, [\addr, #8];
+			add	\addr, \addr, \post_inc;
+		alternative_endif
+
+		.section __ex_table,"a";
+		.align	3;
+		.quad	8888b,\l;
+		.quad	8889b,\l;
+		.previous
+	.endm
+
+	.macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
+		alternative_if_not ARM64_HAS_UAO
+8888:			\inst	\reg, [\addr], \post_inc;
+			nop;
+		alternative_else
+			\alt_inst	\reg, [\addr];
+			add		\addr, \addr, \post_inc;
+		alternative_endif
+
+		.section __ex_table,"a";
+		.align	3;
+		.quad	8888b,\l;
+		.previous
+	.endm
+#else
+	.macro uao_ldp l, reg1, reg2, addr, post_inc
+		USER(\l, ldp \reg1, \reg2, [\addr], \post_inc)
+	.endm
+	.macro uao_stp l, reg1, reg2, addr, post_inc
+		USER(\l, stp \reg1, \reg2, [\addr], \post_inc)
+	.endm
+	.macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
+		USER(\l, \inst \reg, [\addr], \post_inc)
+	.endm
+#endif
+
 #endif  /*  __ASSEMBLY__  */
 
 /*
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 8131abfabb0a..a5df7cde616b 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -31,8 +31,9 @@
 #define ARM64_WORKAROUND_CAVIUM_23154		6
 #define ARM64_WORKAROUND_834220			7
 #define ARM64_HAS_NO_HW_PREFETCH		8
+#define ARM64_HAS_UAO				9
 
-#define ARM64_NCAPS				9
+#define ARM64_NCAPS				10
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 5bb1d763d17a..cef1cf398356 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -191,5 +191,6 @@ static inline void spin_lock_prefetch(const void *ptr)
 #endif
 
 void cpu_enable_pan(void *__unused);
+void cpu_enable_uao(void *__unused);
 
 #endif /* __ASM_PROCESSOR_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 4bc8655529df..b9fd8ec79033 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -77,9 +77,12 @@
 #define SYS_DCZID_EL0			sys_reg(3, 3, 0, 0, 7)
 
 #define REG_PSTATE_PAN_IMM		sys_reg(0, 0, 4, 0, 4)
+#define REG_PSTATE_UAO_IMM		sys_reg(0, 0, 4, 0, 3)
 
 #define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\
 				     (!!x)<<8 | 0x1f)
+#define SET_PSTATE_UAO(x) __inst_arm(0xd5000000 | REG_PSTATE_UAO_IMM |\
+				     (!!x)<<8 | 0x1f)
 
 /* SCTLR_EL1 */
 #define SCTLR_EL1_CP15BEN	(0x1 << 5)
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index abd64bd1f6d9..eba8db6838af 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -85,6 +85,12 @@ static inline struct thread_info *current_thread_info(void)
 	return (struct thread_info *)sp_el0;
 }
 
+/* Access struct thread_info of another thread */
+static inline struct thread_info *get_thread_info(unsigned long thread_stack)
+{
+	return (struct thread_info *)(thread_stack & ~(THREAD_SIZE - 1));
+}
+
 #define thread_saved_pc(tsk)	\
 	((unsigned long)(tsk->thread.cpu_context.pc))
 #define thread_saved_sp(tsk)	\
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index b2ede967fe7d..f973bdce8410 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -64,6 +64,16 @@ extern int fixup_exception(struct pt_regs *regs);
 static inline void set_fs(mm_segment_t fs)
 {
 	current_thread_info()->addr_limit = fs;
+
+	/*
+	 * Enable/disable UAO so that copy_to_user() etc can access
+	 * kernel memory with the unprivileged instructions.
+	 */
+	if (IS_ENABLED(CONFIG_ARM64_UAO) && fs == KERNEL_DS)
+		asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO));
+	else
+		asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO,
+				CONFIG_ARM64_UAO));
 }
 
 #define segment_eq(a, b)	((a) == (b))
@@ -113,9 +123,10 @@ static inline void set_fs(mm_segment_t fs)
  * The "__xxx_error" versions set the third argument to -EFAULT if an error
  * occurs, and leave it unchanged on success.
  */
-#define __get_user_asm(instr, reg, x, addr, err)			\
+#define __get_user_asm(instr, alt_instr, reg, x, addr, err, feature)	\
 	asm volatile(							\
-	"1:	" instr "	" reg "1, [%2]\n"			\
+	"1:"ALTERNATIVE(instr "     " reg "1, [%2]\n",			\
+			alt_instr " " reg "1, [%2]\n", feature)		\
 	"2:\n"								\
 	"	.section .fixup, \"ax\"\n"				\
 	"	.align	2\n"						\
@@ -138,16 +149,20 @@ do {									\
 			CONFIG_ARM64_PAN));				\
 	switch (sizeof(*(ptr))) {					\
 	case 1:								\
-		__get_user_asm("ldrb", "%w", __gu_val, (ptr), (err));	\
+		__get_user_asm("ldrb", "ldtrb", "%w", __gu_val, (ptr),  \
+			       (err), ARM64_HAS_UAO);			\
 		break;							\
 	case 2:								\
-		__get_user_asm("ldrh", "%w", __gu_val, (ptr), (err));	\
+		__get_user_asm("ldrh", "ldtrh", "%w", __gu_val, (ptr),  \
+			       (err), ARM64_HAS_UAO);			\
 		break;							\
 	case 4:								\
-		__get_user_asm("ldr", "%w", __gu_val, (ptr), (err));	\
+		__get_user_asm("ldr", "ldtr", "%w", __gu_val, (ptr),	\
+			       (err), ARM64_HAS_UAO);			\
 		break;							\
 	case 8:								\
-		__get_user_asm("ldr", "%",  __gu_val, (ptr), (err));	\
+		__get_user_asm("ldr", "ldtr", "%",  __gu_val, (ptr),	\
+			       (err), ARM64_HAS_UAO);			\
 		break;							\
 	default:							\
 		BUILD_BUG();						\
@@ -181,9 +196,10 @@ do {									\
 		((x) = 0, -EFAULT);					\
 })
 
-#define __put_user_asm(instr, reg, x, addr, err)			\
+#define __put_user_asm(instr, alt_instr, reg, x, addr, err, feature)	\
 	asm volatile(							\
-	"1:	" instr "	" reg "1, [%2]\n"			\
+	"1:"ALTERNATIVE(instr "     " reg "1, [%2]\n",			\
+			alt_instr " " reg "1, [%2]\n", feature)		\
 	"2:\n"								\
 	"	.section .fixup,\"ax\"\n"				\
 	"	.align	2\n"						\
@@ -205,16 +221,20 @@ do {									\
 			CONFIG_ARM64_PAN));				\
 	switch (sizeof(*(ptr))) {					\
 	case 1:								\
-		__put_user_asm("strb", "%w", __pu_val, (ptr), (err));	\
+		__put_user_asm("strb", "sttrb", "%w", __pu_val, (ptr),	\
+			       (err), ARM64_HAS_UAO);			\
 		break;							\
 	case 2:								\
-		__put_user_asm("strh", "%w", __pu_val, (ptr), (err));	\
+		__put_user_asm("strh", "sttrh", "%w", __pu_val, (ptr),	\
+			       (err), ARM64_HAS_UAO);			\
 		break;							\
 	case 4:								\
-		__put_user_asm("str",  "%w", __pu_val, (ptr), (err));	\
+		__put_user_asm("str", "sttr", "%w", __pu_val, (ptr),	\
+			       (err), ARM64_HAS_UAO);			\
 		break;							\
 	case 8:								\
-		__put_user_asm("str",  "%", __pu_val, (ptr), (err));	\
+		__put_user_asm("str", "sttr", "%", __pu_val, (ptr),	\
+			       (err), ARM64_HAS_UAO);			\
 		break;							\
 	default:							\
 		BUILD_BUG();						\
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index 208db3df135a..b5c3933ed441 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -45,6 +45,7 @@
 #define PSR_A_BIT	0x00000100
 #define PSR_D_BIT	0x00000200
 #define PSR_PAN_BIT	0x00400000
+#define PSR_UAO_BIT	0x00800000
 #define PSR_Q_BIT	0x08000000
 #define PSR_V_BIT	0x10000000
 #define PSR_C_BIT	0x20000000
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 42918c797e8e..ae22edf9d3c9 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -677,6 +677,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.capability = ARM64_HAS_NO_HW_PREFETCH,
 		.matches = has_no_hw_prefetch,
 	},
+#ifdef CONFIG_ARM64_UAO
+	{
+		.desc = "User Access Override",
+		.capability = ARM64_HAS_UAO,
+		.matches = has_cpuid_feature,
+		.sys_reg = SYS_ID_AA64MMFR2_EL1,
+		.field_pos = ID_AA64MMFR2_UAO_SHIFT,
+		.min_field_value = 1,
+		.enable = cpu_enable_uao,
+	},
+#endif /* CONFIG_ARM64_UAO */
 	{},
 };
 
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 88d742ba19d5..c1ca4ea065d4 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -46,6 +46,7 @@
 #include <linux/notifier.h>
 #include <trace/events/power.h>
 
+#include <asm/alternative.h>
 #include <asm/compat.h>
 #include <asm/cacheflush.h>
 #include <asm/fpsimd.h>
@@ -280,6 +281,9 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
 	} else {
 		memset(childregs, 0, sizeof(struct pt_regs));
 		childregs->pstate = PSR_MODE_EL1h;
+		if (IS_ENABLED(CONFIG_ARM64_UAO) &&
+		    cpus_have_cap(ARM64_HAS_UAO))
+			childregs->pstate |= PSR_UAO_BIT;
 		p->thread.cpu_context.x19 = stack_start;
 		p->thread.cpu_context.x20 = stk_sz;
 	}
@@ -308,6 +312,20 @@ static void tls_thread_switch(struct task_struct *next)
 	: : "r" (tpidr), "r" (tpidrro));
 }
 
+/* Restore the UAO state depending on next's addr_limit */
+static void uao_thread_switch(struct task_struct *next)
+{
+	unsigned long next_sp = next->thread.cpu_context.sp;
+
+	if (IS_ENABLED(CONFIG_ARM64_UAO) &&
+	    get_thread_info(next_sp)->addr_limit == KERNEL_DS)
+		asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO,
+			        CONFIG_ARM64_UAO));
+	else
+		asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO,
+				CONFIG_ARM64_UAO));
+}
+
 /*
  * Thread switching.
  */
@@ -320,6 +338,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
 	tls_thread_switch(next);
 	hw_breakpoint_thread_switch(next);
 	contextidr_thread_switch(next);
+	uao_thread_switch(next);
 
 	/*
 	 * Complete any pending TLB or cache maintenance on this CPU in case
diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
index a9723c71c52b..3f950b677c07 100644
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -39,20 +39,20 @@ ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
 	subs	x1, x1, #8
 	b.mi	2f
 1:
-USER(9f, str	xzr, [x0], #8	)
+uao_user_alternative 9f, str, sttr, xzr, x0, 8
 	subs	x1, x1, #8
 	b.pl	1b
 2:	adds	x1, x1, #4
 	b.mi	3f
-USER(9f, str	wzr, [x0], #4	)
+uao_user_alternative 9f, str, sttr, wzr, x0, 4
 	sub	x1, x1, #4
 3:	adds	x1, x1, #2
 	b.mi	4f
-USER(9f, strh	wzr, [x0], #2	)
+uao_user_alternative 9f, strh, sttrh, wzr, x0, 2
 	sub	x1, x1, #2
 4:	adds	x1, x1, #1
 	b.mi	5f
-USER(9f, strb	wzr, [x0]	)
+uao_user_alternative 9f, strb, sttrb, wzr, x0, 0
 5:	mov	x0, #0
 ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
 	    CONFIG_ARM64_PAN)
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
index 4699cd74f87e..1d982d64f1a7 100644
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -34,7 +34,7 @@
  */
 
 	.macro ldrb1 ptr, regB, val
-	USER(9998f, ldrb  \ptr, [\regB], \val)
+	uao_user_alternative 9998f, ldrb, ldtrb, \ptr, \regB, \val
 	.endm
 
 	.macro strb1 ptr, regB, val
@@ -42,7 +42,7 @@
 	.endm
 
 	.macro ldrh1 ptr, regB, val
-	USER(9998f, ldrh  \ptr, [\regB], \val)
+	uao_user_alternative 9998f, ldrh, ldtrh, \ptr, \regB, \val
 	.endm
 
 	.macro strh1 ptr, regB, val
@@ -50,7 +50,7 @@
 	.endm
 
 	.macro ldr1 ptr, regB, val
-	USER(9998f, ldr \ptr, [\regB], \val)
+	uao_user_alternative 9998f, ldr, ldtr, \ptr, \regB, \val
 	.endm
 
 	.macro str1 ptr, regB, val
@@ -58,7 +58,7 @@
 	.endm
 
 	.macro ldp1 ptr, regB, regC, val
-	USER(9998f, ldp \ptr, \regB, [\regC], \val)
+	uao_ldp 9998f, \ptr, \regB, \regC, \val
 	.endm
 
 	.macro stp1 ptr, regB, regC, val
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
index 81c8fc93c100..feaad1520dc1 100644
--- a/arch/arm64/lib/copy_in_user.S
+++ b/arch/arm64/lib/copy_in_user.S
@@ -35,35 +35,35 @@
  *	x0 - bytes not copied
  */
 	.macro ldrb1 ptr, regB, val
-	USER(9998f, ldrb  \ptr, [\regB], \val)
+	uao_user_alternative 9998f, ldrb, ldtrb, \ptr, \regB, \val
 	.endm
 
 	.macro strb1 ptr, regB, val
-	USER(9998f, strb \ptr, [\regB], \val)
+	uao_user_alternative 9998f, strb, sttrb, \ptr, \regB, \val
 	.endm
 
 	.macro ldrh1 ptr, regB, val
-	USER(9998f, ldrh  \ptr, [\regB], \val)
+	uao_user_alternative 9998f, ldrh, ldtrh, \ptr, \regB, \val
 	.endm
 
 	.macro strh1 ptr, regB, val
-	USER(9998f, strh \ptr, [\regB], \val)
+	uao_user_alternative 9998f, strh, sttrh, \ptr, \regB, \val
 	.endm
 
 	.macro ldr1 ptr, regB, val
-	USER(9998f, ldr \ptr, [\regB], \val)
+	uao_user_alternative 9998f, ldr, ldtr, \ptr, \regB, \val
 	.endm
 
 	.macro str1 ptr, regB, val
-	USER(9998f, str \ptr, [\regB], \val)
+	uao_user_alternative 9998f, str, sttr, \ptr, \regB, \val
 	.endm
 
 	.macro ldp1 ptr, regB, regC, val
-	USER(9998f, ldp \ptr, \regB, [\regC], \val)
+	uao_ldp 9998f, \ptr, \regB, \regC, \val
 	.endm
 
 	.macro stp1 ptr, regB, regC, val
-	USER(9998f, stp \ptr, \regB, [\regC], \val)
+	uao_stp 9998f, \ptr, \regB, \regC, \val
 	.endm
 
 end	.req	x5
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
index 7512bbbc07ac..2dae2cd2c481 100644
--- a/arch/arm64/lib/copy_to_user.S
+++ b/arch/arm64/lib/copy_to_user.S
@@ -37,7 +37,7 @@
 	.endm
 
 	.macro strb1 ptr, regB, val
-	USER(9998f, strb \ptr, [\regB], \val)
+	uao_user_alternative 9998f, strb, sttrb, \ptr, \regB, \val
 	.endm
 
 	.macro ldrh1 ptr, regB, val
@@ -45,7 +45,7 @@
 	.endm
 
 	.macro strh1 ptr, regB, val
-	USER(9998f, strh \ptr, [\regB], \val)
+	uao_user_alternative 9998f, strh, sttrh, \ptr, \regB, \val
 	.endm
 
 	.macro ldr1 ptr, regB, val
@@ -53,7 +53,7 @@
 	.endm
 
 	.macro str1 ptr, regB, val
-	USER(9998f, str \ptr, [\regB], \val)
+	uao_user_alternative 9998f, str, sttr, \ptr, \regB, \val
 	.endm
 
 	.macro ldp1 ptr, regB, regC, val
@@ -61,7 +61,7 @@
 	.endm
 
 	.macro stp1 ptr, regB, regC, val
-	USER(9998f, stp \ptr, \regB, [\regC], \val)
+	uao_stp 9998f, \ptr, \regB, \regC, \val
 	.endm
 
 end	.req	x5
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 92ddac1e8ca2..820d47353cf0 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -192,6 +192,14 @@ static int __do_page_fault(struct mm_struct *mm, unsigned long addr,
 	return fault;
 }
 
+static inline int permission_fault(unsigned int esr)
+{
+	unsigned int ec       = (esr & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT;
+	unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
+
+	return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM);
+}
+
 static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 				   struct pt_regs *regs)
 {
@@ -225,12 +233,10 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 		mm_flags |= FAULT_FLAG_WRITE;
 	}
 
-	/*
-	 * PAN bit set implies the fault happened in kernel space, but not
-	 * in the arch's user access functions.
-	 */
-	if (IS_ENABLED(CONFIG_ARM64_PAN) && (regs->pstate & PSR_PAN_BIT))
-		goto no_context;
+	if (permission_fault(esr) && (addr < USER_DS)) {
+		if (!search_exception_tables(regs->pc))
+			panic("Accessing user space memory outside uaccess.h routines");
+	}
 
 	/*
 	 * As per x86, we may deadlock here. However, since the kernel only
@@ -561,3 +567,16 @@ void cpu_enable_pan(void *__unused)
 	config_sctlr_el1(SCTLR_EL1_SPAN, 0);
 }
 #endif /* CONFIG_ARM64_PAN */
+
+#ifdef CONFIG_ARM64_UAO
+/*
+ * Kernel threads have fs=KERNEL_DS by default, and don't need to call
+ * set_fs(), devtmpfs in particular relies on this behaviour.
+ * We need to enable the feature at runtime (instead of adding it to
+ * PSR_MODE_EL1h) as the feature may not be implemented by the cpu.
+ */
+void cpu_enable_uao(void *__unused)
+{
+	asm(SET_PSTATE_UAO(1));
+}
+#endif /* CONFIG_ARM64_UAO */

From 31b28ec1f1051bf82515fe38d0ae1884f40783cf Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 5 Feb 2016 14:58:49 +0000
Subject: [PATCH 755/797] arm64: cpufeature: Test 'matches' pointer to find the
 end of the list

CPU feature code uses the desc field as a test to find the end of the list,
this means every entry must have a description. This generates noise for
entries in the list that aren't really features, but combinations of them.
e.g.
> CPU features: detected feature: Privileged Access Never
> CPU features: detected feature: PAN and not UAO

These combination features are needed for corner cases with alternatives,
where cpu features interact.

Change all walkers of the arm64_features[] and arm64_hwcaps[] lists to test
'matches' not 'desc', and only print 'desc' if it is non-NULL.

Signed-off-by: James Morse <james.morse@arm.com>
Reviewed-by : Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

(cherry picked from commit 644c2ae198412c956700e55a2acf80b2541f6aa5)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/cpufeature.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index ae22edf9d3c9..9cc8186cd14b 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -771,7 +771,7 @@ static void __init setup_cpu_hwcaps(void)
 	int i;
 	const struct arm64_cpu_capabilities *hwcaps = arm64_hwcaps;
 
-	for (i = 0; hwcaps[i].desc; i++)
+	for (i = 0; hwcaps[i].matches; i++)
 		if (hwcaps[i].matches(&hwcaps[i]))
 			cap_set_hwcap(&hwcaps[i]);
 }
@@ -781,11 +781,11 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
 {
 	int i;
 
-	for (i = 0; caps[i].desc; i++) {
+	for (i = 0; caps[i].matches; i++) {
 		if (!caps[i].matches(&caps[i]))
 			continue;
 
-		if (!cpus_have_cap(caps[i].capability))
+		if (!cpus_have_cap(caps[i].capability) && caps[i].desc)
 			pr_info("%s %s\n", info, caps[i].desc);
 		cpus_set_cap(caps[i].capability);
 	}
@@ -800,7 +800,7 @@ enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
 {
 	int i;
 
-	for (i = 0; caps[i].desc; i++)
+	for (i = 0; caps[i].matches; i++)
 		if (caps[i].enable && cpus_have_cap(caps[i].capability))
 			on_each_cpu(caps[i].enable, NULL, true);
 }
@@ -907,7 +907,7 @@ void verify_local_cpu_capabilities(void)
 		return;
 
 	caps = arm64_features;
-	for (i = 0; caps[i].desc; i++) {
+	for (i = 0; caps[i].matches; i++) {
 		if (!cpus_have_cap(caps[i].capability) || !caps[i].sys_reg)
 			continue;
 		/*
@@ -920,7 +920,7 @@ void verify_local_cpu_capabilities(void)
 			caps[i].enable(NULL);
 	}
 
-	for (i = 0, caps = arm64_hwcaps; caps[i].desc; i++) {
+	for (i = 0, caps = arm64_hwcaps; caps[i].matches; i++) {
 		if (!cpus_have_hwcap(&caps[i]))
 			continue;
 		if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i]))

From cdfec5aaf4a886521b7f54dfe2db61735558d546 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 5 Feb 2016 14:58:50 +0000
Subject: [PATCH 756/797] arm64: kernel: Don't toggle PAN on systems with UAO

If a CPU supports both Privileged Access Never (PAN) and User Access
Override (UAO), we don't need to disable/re-enable PAN round all
copy_to_user() like calls.

UAO alternatives cause these calls to use the 'unprivileged' load/store
instructions, which are overridden to be the privileged kind when
fs==KERNEL_DS.

This patch changes the copy_to_user() calls to have their PAN toggling
depend on a new composite 'feature' ARM64_ALT_PAN_NOT_UAO.

If both features are detected, PAN will be enabled, but the copy_to_user()
alternatives will not be applied. This means PAN will be enabled all the
time for these functions. If only PAN is detected, the toggling will be
enabled as normal.

This will save the time taken to disable/re-enable PAN, and allow us to
catch copy_to_user() accesses that occur with fs==KERNEL_DS.

Futex and swp-emulation code continue to hang their PAN toggling code on
ARM64_HAS_PAN.

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 705441960033e66b63524521f153fbb28c99ddbd)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/cpufeature.h |  3 ++-
 arch/arm64/include/asm/uaccess.h    |  8 ++++----
 arch/arm64/kernel/cpufeature.c      | 16 ++++++++++++++++
 arch/arm64/lib/clear_user.S         |  4 ++--
 arch/arm64/lib/copy_from_user.S     |  4 ++--
 arch/arm64/lib/copy_in_user.S       |  4 ++--
 arch/arm64/lib/copy_to_user.S       |  4 ++--
 arch/arm64/mm/fault.c               |  3 +++
 8 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index a5df7cde616b..37a53fc6b384 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -32,8 +32,9 @@
 #define ARM64_WORKAROUND_834220			7
 #define ARM64_HAS_NO_HW_PREFETCH		8
 #define ARM64_HAS_UAO				9
+#define ARM64_ALT_PAN_NOT_UAO			10
 
-#define ARM64_NCAPS				10
+#define ARM64_NCAPS				11
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index f973bdce8410..16ba0d5c9740 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -145,7 +145,7 @@ static inline void set_fs(mm_segment_t fs)
 do {									\
 	unsigned long __gu_val;						\
 	__chk_user_ptr(ptr);						\
-	asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN,	\
+	asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_ALT_PAN_NOT_UAO,\
 			CONFIG_ARM64_PAN));				\
 	switch (sizeof(*(ptr))) {					\
 	case 1:								\
@@ -168,7 +168,7 @@ do {									\
 		BUILD_BUG();						\
 	}								\
 	(x) = (__force __typeof__(*(ptr)))__gu_val;			\
-	asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN,	\
+	asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_ALT_PAN_NOT_UAO,\
 			CONFIG_ARM64_PAN));				\
 } while (0)
 
@@ -217,7 +217,7 @@ do {									\
 do {									\
 	__typeof__(*(ptr)) __pu_val = (x);				\
 	__chk_user_ptr(ptr);						\
-	asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN,	\
+	asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_ALT_PAN_NOT_UAO,\
 			CONFIG_ARM64_PAN));				\
 	switch (sizeof(*(ptr))) {					\
 	case 1:								\
@@ -239,7 +239,7 @@ do {									\
 	default:							\
 		BUILD_BUG();						\
 	}								\
-	asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN,	\
+	asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_ALT_PAN_NOT_UAO,\
 			CONFIG_ARM64_PAN));				\
 } while (0)
 
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 9cc8186cd14b..7566cad9fa1d 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -67,6 +67,10 @@ DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
 		.width = 0,				\
 	}
 
+/* meta feature for alternatives */
+static bool __maybe_unused
+cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry);
+
 static struct arm64_ftr_bits ftr_id_aa64isar0[] = {
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0),
@@ -688,6 +692,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.enable = cpu_enable_uao,
 	},
 #endif /* CONFIG_ARM64_UAO */
+#ifdef CONFIG_ARM64_PAN
+	{
+		.capability = ARM64_ALT_PAN_NOT_UAO,
+		.matches = cpufeature_pan_not_uao,
+	},
+#endif /* CONFIG_ARM64_PAN */
 	{},
 };
 
@@ -966,3 +976,9 @@ void __init setup_cpu_features(void)
 		pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n",
 			L1_CACHE_BYTES, cls);
 }
+
+static bool __maybe_unused
+cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry)
+{
+	return (cpus_have_cap(ARM64_HAS_PAN) && !cpus_have_cap(ARM64_HAS_UAO));
+}
diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
index 3f950b677c07..5d1cad3ce6d6 100644
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -33,7 +33,7 @@
  * Alignment fixed up by hardware.
  */
 ENTRY(__clear_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
 	    CONFIG_ARM64_PAN)
 	mov	x2, x1			// save the size for fixup return
 	subs	x1, x1, #8
@@ -54,7 +54,7 @@ uao_user_alternative 9f, strh, sttrh, wzr, x0, 2
 	b.mi	5f
 uao_user_alternative 9f, strb, sttrb, wzr, x0, 0
 5:	mov	x0, #0
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
 	    CONFIG_ARM64_PAN)
 	ret
 ENDPROC(__clear_user)
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
index 1d982d64f1a7..17e8306dca29 100644
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -67,11 +67,11 @@
 
 end	.req	x5
 ENTRY(__copy_from_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
 	    CONFIG_ARM64_PAN)
 	add	end, x0, x2
 #include "copy_template.S"
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
 	    CONFIG_ARM64_PAN)
 	mov	x0, #0				// Nothing to copy
 	ret
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
index feaad1520dc1..f7292dd08c84 100644
--- a/arch/arm64/lib/copy_in_user.S
+++ b/arch/arm64/lib/copy_in_user.S
@@ -68,11 +68,11 @@
 
 end	.req	x5
 ENTRY(__copy_in_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
 	    CONFIG_ARM64_PAN)
 	add	end, x0, x2
 #include "copy_template.S"
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
 	    CONFIG_ARM64_PAN)
 	mov	x0, #0
 	ret
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
index 2dae2cd2c481..21faae60f988 100644
--- a/arch/arm64/lib/copy_to_user.S
+++ b/arch/arm64/lib/copy_to_user.S
@@ -66,11 +66,11 @@
 
 end	.req	x5
 ENTRY(__copy_to_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
 	    CONFIG_ARM64_PAN)
 	add	end, x0, x2
 #include "copy_template.S"
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
 	    CONFIG_ARM64_PAN)
 	mov	x0, #0
 	ret
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 820d47353cf0..d0762a729d01 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -234,6 +234,9 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 	}
 
 	if (permission_fault(esr) && (addr < USER_DS)) {
+		if (get_thread_info(regs->sp)->addr_limit == KERNEL_DS)
+			panic("Accessing user space memory with fs=KERNEL_DS");
+
 		if (!search_exception_tables(regs->pc))
 			panic("Accessing user space memory outside uaccess.h routines");
 	}

From 9193df45aa67b769ee8af97609537a876a81baca Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 18 Feb 2016 15:50:04 +0000
Subject: [PATCH 757/797] arm64: Remove the get_thread_info() function

This function was introduced by previous commits implementing UAO.
However, it can be replaced with task_thread_info() in
uao_thread_switch() or get_fs() in do_page_fault() (the latter being
called only on the current context, so no need for using the saved
pt_regs).

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit e950631e84e7e38892ffbeee5e1816b270026b0e)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/thread_info.h |  6 ------
 arch/arm64/kernel/process.c          | 15 ++++++---------
 arch/arm64/mm/fault.c                |  2 +-
 3 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index eba8db6838af..abd64bd1f6d9 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -85,12 +85,6 @@ static inline struct thread_info *current_thread_info(void)
 	return (struct thread_info *)sp_el0;
 }
 
-/* Access struct thread_info of another thread */
-static inline struct thread_info *get_thread_info(unsigned long thread_stack)
-{
-	return (struct thread_info *)(thread_stack & ~(THREAD_SIZE - 1));
-}
-
 #define thread_saved_pc(tsk)	\
 	((unsigned long)(tsk->thread.cpu_context.pc))
 #define thread_saved_sp(tsk)	\
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index c1ca4ea065d4..80624829db61 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -315,15 +315,12 @@ static void tls_thread_switch(struct task_struct *next)
 /* Restore the UAO state depending on next's addr_limit */
 static void uao_thread_switch(struct task_struct *next)
 {
-	unsigned long next_sp = next->thread.cpu_context.sp;
-
-	if (IS_ENABLED(CONFIG_ARM64_UAO) &&
-	    get_thread_info(next_sp)->addr_limit == KERNEL_DS)
-		asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO,
-			        CONFIG_ARM64_UAO));
-	else
-		asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO,
-				CONFIG_ARM64_UAO));
+	if (IS_ENABLED(CONFIG_ARM64_UAO)) {
+		if (task_thread_info(next)->addr_limit == KERNEL_DS)
+			asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO));
+		else
+			asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO));
+	}
 }
 
 /*
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index d0762a729d01..a8eafeceb08a 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -234,7 +234,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 	}
 
 	if (permission_fault(esr) && (addr < USER_DS)) {
-		if (get_thread_info(regs->sp)->addr_limit == KERNEL_DS)
+		if (get_fs() == KERNEL_DS)
 			panic("Accessing user space memory with fs=KERNEL_DS");
 
 		if (!search_exception_tables(regs->pc))

From c73cfcaf47182e53c779181031f20e6d3793e07b Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 16 Feb 2016 13:52:32 +0100
Subject: [PATCH 758/797] of/fdt: make memblock minimum physical address arch
 configurable

By default, early_init_dt_add_memory_arch() ignores memory below
the base of the kernel image since it won't be addressable via the
linear mapping. However, this is not appropriate anymore once we
decouple the kernel text mapping from the linear mapping, so archs
may want to drop the low limit entirely. So allow the minimum to be
overridden by setting MIN_MEMBLOCK_ADDR.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 270522a04f7a9911983878fa37da467f9ff1c938)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/of/fdt.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 655f79db7899..1f98156f8996 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -976,13 +976,16 @@ int __init early_init_dt_scan_chosen(unsigned long node, const char *uname,
 }
 
 #ifdef CONFIG_HAVE_MEMBLOCK
+#ifndef MIN_MEMBLOCK_ADDR
+#define MIN_MEMBLOCK_ADDR	__pa(PAGE_OFFSET)
+#endif
 #ifndef MAX_MEMBLOCK_ADDR
 #define MAX_MEMBLOCK_ADDR	((phys_addr_t)~0)
 #endif
 
 void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size)
 {
-	const u64 phys_offset = __pa(PAGE_OFFSET);
+	const u64 phys_offset = MIN_MEMBLOCK_ADDR;
 
 	if (!PAGE_ALIGNED(base)) {
 		if (size < PAGE_SIZE - (base & ~PAGE_MASK)) {

From b01c68c7494903dca326579248d3757c715b84f8 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 16 Feb 2016 13:52:33 +0100
Subject: [PATCH 759/797] of/fdt: factor out assignment of
 initrd_start/initrd_end

Since architectures may not yet have their linear mapping up and running
when the initrd address is discovered from the DT, factor out the
assignment of initrd_start and initrd_end, so that an architecture can
override it and use the translation it needs.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 369bc9abf22bf026e8645a4dd746b90649a2f6ee)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/of/fdt.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 1f98156f8996..3e90bce70545 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -760,6 +760,16 @@ const void * __init of_flat_dt_match_machine(const void *default_match,
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
+#ifndef __early_init_dt_declare_initrd
+static void __early_init_dt_declare_initrd(unsigned long start,
+					   unsigned long end)
+{
+	initrd_start = (unsigned long)__va(start);
+	initrd_end = (unsigned long)__va(end);
+	initrd_below_start_ok = 1;
+}
+#endif
+
 /**
  * early_init_dt_check_for_initrd - Decode initrd location from flat tree
  * @node: reference to node containing initrd location ('chosen')
@@ -782,9 +792,7 @@ static void __init early_init_dt_check_for_initrd(unsigned long node)
 		return;
 	end = of_read_number(prop, len/4);
 
-	initrd_start = (unsigned long)__va(start);
-	initrd_end = (unsigned long)__va(end);
-	initrd_below_start_ok = 1;
+	__early_init_dt_declare_initrd(start, end);
 
 	pr_debug("initrd_start=0x%llx  initrd_end=0x%llx\n",
 		 (unsigned long long)start, (unsigned long long)end);

From 2894f328e3bbb808606c82fa1e2ea300089df728 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 16 Feb 2016 13:52:34 +0100
Subject: [PATCH 760/797] arm64: prevent potential circular header dependencies
 in asm/bug.h

Currently, using BUG_ON() in header files is cumbersome, due to the fact
that asm/bug.h transitively includes a lot of other header files, resulting
in the actual BUG_ON() invocation appearing before its definition in the
preprocessor input. So let's reverse the #include dependency between
asm/bug.h and asm/debug-monitors.h, by moving the definition of BUG_BRK_IMM
from the latter to the former. Also fix up one user of asm/debug-monitors.h
which relied on a transitive include.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 03336b1df9929e5d9c28fd9768948b6151cb046c)
Signed-off-by: Alex Shi <alex.shi@linaro.org>

Conflicts:
	skip arch/arm64/kvm/hyp/debug-sr.c
---
 arch/arm64/include/asm/bug.h            | 2 +-
 arch/arm64/include/asm/debug-monitors.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/bug.h b/arch/arm64/include/asm/bug.h
index 4a748ce9ba1a..679d49221998 100644
--- a/arch/arm64/include/asm/bug.h
+++ b/arch/arm64/include/asm/bug.h
@@ -18,7 +18,7 @@
 #ifndef _ARCH_ARM64_ASM_BUG_H
 #define _ARCH_ARM64_ASM_BUG_H
 
-#include <asm/debug-monitors.h>
+#define BUG_BRK_IMM			0x800
 
 #ifdef CONFIG_GENERIC_BUG
 #define HAVE_ARCH_BUG
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 279c85b5ec09..e893a1fca9c2 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -20,6 +20,7 @@
 
 #include <linux/errno.h>
 #include <linux/types.h>
+#include <asm/bug.h>
 #include <asm/esr.h>
 #include <asm/insn.h>
 #include <asm/ptrace.h>
@@ -57,7 +58,6 @@
 #define FAULT_BRK_IMM			0x100
 #define KGDB_DYN_DBG_BRK_IMM		0x400
 #define KGDB_COMPILED_DBG_BRK_IMM	0x401
-#define BUG_BRK_IMM			0x800
 
 /*
  * BRK instruction encoding

From 37cbc7db8e4fa9b66e15cf8661383a6b51c9a3e7 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 16 Feb 2016 13:52:35 +0100
Subject: [PATCH 761/797] arm64: add support for ioremap() block mappings

This wires up the existing generic huge-vmap feature, which allows
ioremap() to use PMD or PUD sized block mappings. It also adds support
to the unmap path for dealing with block mappings, which will allow us
to unmap the __init region using unmap_kernel_range() in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 324420bf91f60582bb481133db9547111768ef17)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 .../features/vm/huge-vmap/arch-support.txt    |  2 +-
 arch/arm64/Kconfig                            |  1 +
 arch/arm64/include/asm/memory.h               |  6 +++
 arch/arm64/mm/mmu.c                           | 41 +++++++++++++++++++
 4 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/Documentation/features/vm/huge-vmap/arch-support.txt b/Documentation/features/vm/huge-vmap/arch-support.txt
index af6816bccb43..df1d1f3c9af2 100644
--- a/Documentation/features/vm/huge-vmap/arch-support.txt
+++ b/Documentation/features/vm/huge-vmap/arch-support.txt
@@ -9,7 +9,7 @@
     |       alpha: | TODO |
     |         arc: | TODO |
     |         arm: | TODO |
-    |       arm64: | TODO |
+    |       arm64: |  ok  |
     |       avr32: | TODO |
     |    blackfin: | TODO |
     |         c6x: | TODO |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 4df85b5a2045..8cd8d06ece4a 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -49,6 +49,7 @@ config ARM64
 	select HAVE_ALIGNED_STRUCT_PAGE if SLUB
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_BITREVERSE
+	select HAVE_ARCH_HUGE_VMAP
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP && !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
 	select HAVE_ARCH_KGDB
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 853953cd1f08..c65aad7b13dc 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -100,6 +100,12 @@
 #define MT_S2_NORMAL		0xf
 #define MT_S2_DEVICE_nGnRE	0x1
 
+#ifdef CONFIG_ARM64_4K_PAGES
+#define IOREMAP_MAX_ORDER	(PUD_SHIFT)
+#else
+#define IOREMAP_MAX_ORDER	(PMD_SHIFT)
+#endif
+
 #ifndef __ASSEMBLY__
 
 extern phys_addr_t		memstart_addr;
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index d1fa678355c9..b4afa9fbb00f 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -745,3 +745,44 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
 
 	return dt_virt;
 }
+
+int __init arch_ioremap_pud_supported(void)
+{
+	/* only 4k granule supports level 1 block mappings */
+	return IS_ENABLED(CONFIG_ARM64_4K_PAGES);
+}
+
+int __init arch_ioremap_pmd_supported(void)
+{
+	return 1;
+}
+
+int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot)
+{
+	BUG_ON(phys & ~PUD_MASK);
+	set_pud(pud, __pud(phys | PUD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))));
+	return 1;
+}
+
+int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot)
+{
+	BUG_ON(phys & ~PMD_MASK);
+	set_pmd(pmd, __pmd(phys | PMD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))));
+	return 1;
+}
+
+int pud_clear_huge(pud_t *pud)
+{
+	if (!pud_sect(*pud))
+		return 0;
+	pud_clear(pud);
+	return 1;
+}
+
+int pmd_clear_huge(pmd_t *pmd)
+{
+	if (!pmd_sect(*pmd))
+		return 0;
+	pmd_clear(pmd);
+	return 1;
+}

From 1dd59fe47656335cd3c913e378718eb49b7b1b38 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 16 Feb 2016 13:52:36 +0100
Subject: [PATCH 762/797] arm64: introduce KIMAGE_VADDR as the virtual base of
 the kernel region

This introduces the preprocessor symbol KIMAGE_VADDR which will serve as
the symbolic virtual base of the kernel region, i.e., the kernel's virtual
offset will be KIMAGE_VADDR + TEXT_OFFSET. For now, we define it as being
equal to PAGE_OFFSET, but in the future, it will be moved below it once
we move the kernel virtual mapping out of the linear mapping.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit ab893fb9f1b17f02139bce547bb4b69e96b9ae16)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/memory.h | 10 ++++++++--
 arch/arm64/kernel/head.S        |  2 +-
 arch/arm64/kernel/vmlinux.lds.S |  4 ++--
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index c65aad7b13dc..aebc739f5a11 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -51,7 +51,8 @@
 #define VA_BITS			(CONFIG_ARM64_VA_BITS)
 #define VA_START		(UL(0xffffffffffffffff) << VA_BITS)
 #define PAGE_OFFSET		(UL(0xffffffffffffffff) << (VA_BITS - 1))
-#define MODULES_END		(PAGE_OFFSET)
+#define KIMAGE_VADDR		(PAGE_OFFSET)
+#define MODULES_END		(KIMAGE_VADDR)
 #define MODULES_VADDR		(MODULES_END - SZ_64M)
 #define PCI_IO_END		(MODULES_VADDR - SZ_2M)
 #define PCI_IO_START		(PCI_IO_END - PCI_IO_SIZE)
@@ -75,8 +76,13 @@
  * private definitions which should NOT be used outside memory.h
  * files.  Use virt_to_phys/phys_to_virt/__pa/__va instead.
  */
-#define __virt_to_phys(x)	(((phys_addr_t)(x) - PAGE_OFFSET + PHYS_OFFSET))
+#define __virt_to_phys(x) ({						\
+	phys_addr_t __x = (phys_addr_t)(x);				\
+	__x >= PAGE_OFFSET ? (__x - PAGE_OFFSET + PHYS_OFFSET) :	\
+			     (__x - KIMAGE_VADDR + PHYS_OFFSET); })
+
 #define __phys_to_virt(x)	((unsigned long)((x) - PHYS_OFFSET + PAGE_OFFSET))
+#define __phys_to_kimg(x)	((unsigned long)((x) - PHYS_OFFSET + KIMAGE_VADDR))
 
 /*
  * Convert a page to/from a physical address
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 53b9f9f128c2..04d38a058b19 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -389,7 +389,7 @@ __create_page_tables:
 	 * Map the kernel image (starting with PHYS_OFFSET).
 	 */
 	mov	x0, x26				// swapper_pg_dir
-	mov	x5, #PAGE_OFFSET
+	ldr	x5, =KIMAGE_VADDR
 	create_pgd_entry x0, x5, x3, x6
 	ldr	x6, =KERNEL_END			// __va(KERNEL_END)
 	mov	x3, x24				// phys offset
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index b78a3c772294..282e3e64a17e 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -89,7 +89,7 @@ SECTIONS
 		*(.discard.*)
 	}
 
-	. = PAGE_OFFSET + TEXT_OFFSET;
+	. = KIMAGE_VADDR + TEXT_OFFSET;
 
 	.head.text : {
 		_text = .;
@@ -186,4 +186,4 @@ ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
 /*
  * If padding is applied before .head.text, virt<->phys conversions will fail.
  */
-ASSERT(_text == (PAGE_OFFSET + TEXT_OFFSET), "HEAD is misaligned")
+ASSERT(_text == (KIMAGE_VADDR + TEXT_OFFSET), "HEAD is misaligned")

From 4545faf8e5b81592ba597141d432ca7e2b52a43e Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 16 Feb 2016 13:52:37 +0100
Subject: [PATCH 763/797] arm64: pgtable: implement static [pte|pmd|pud]_offset
 variants

The page table accessors pte_offset(), pud_offset() and pmd_offset()
rely on __va translations, so they can only be used after the linear
mapping has been installed. For the early fixmap and kasan init routines,
whose page tables are allocated statically in the kernel image, these
functions will return bogus values. So implement pte_offset_kimg(),
pmd_offset_kimg() and pud_offset_kimg(), which can be used instead
before any page tables have been allocated dynamically.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 6533945a32c762c5db70d7a3ec251a040b2d9661)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/pgtable.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index c99dfc588deb..9a560b368910 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -455,6 +455,9 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
 
 #define pmd_page(pmd)		pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
 
+/* use ONLY for statically allocated translation tables */
+#define pte_offset_kimg(dir,addr)	((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr))))
+
 /*
  * Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
@@ -498,6 +501,9 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
 
 #define pud_page(pud)		pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK))
 
+/* use ONLY for statically allocated translation tables */
+#define pmd_offset_kimg(dir,addr)	((pmd_t *)__phys_to_kimg(pmd_offset_phys((dir), (addr))))
+
 #else
 
 #define pud_page_paddr(pud)	({ BUILD_BUG(); 0; })
@@ -507,6 +513,8 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
 #define pmd_set_fixmap_offset(pudp, addr)	((pmd_t *)pudp)
 #define pmd_clear_fixmap()
 
+#define pmd_offset_kimg(dir,addr)	((pmd_t *)dir)
+
 #endif	/* CONFIG_PGTABLE_LEVELS > 2 */
 
 #if CONFIG_PGTABLE_LEVELS > 3
@@ -545,6 +553,9 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
 
 #define pgd_page(pgd)		pfn_to_page(__phys_to_pfn(pgd_val(pgd) & PHYS_MASK))
 
+/* use ONLY for statically allocated translation tables */
+#define pud_offset_kimg(dir,addr)	((pud_t *)__phys_to_kimg(pud_offset_phys((dir), (addr))))
+
 #else
 
 #define pgd_page_paddr(pgd)	({ BUILD_BUG(); 0;})
@@ -554,6 +565,8 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
 #define pud_set_fixmap_offset(pgdp, addr)	((pud_t *)pgdp)
 #define pud_clear_fixmap()
 
+#define pud_offset_kimg(dir,addr)	((pud_t *)dir)
+
 #endif  /* CONFIG_PGTABLE_LEVELS > 3 */
 
 #define pgd_ERROR(pgd)		__pgd_error(__FILE__, __LINE__, pgd_val(pgd))

From ade984b5fcdda640ca25a1606f9cdaf279e8b4c7 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 16 Feb 2016 13:52:38 +0100
Subject: [PATCH 764/797] arm64: decouple early fixmap init from linear mapping

Since the early fixmap page tables are populated using pages that are
part of the static footprint of the kernel, they are covered by the
initial kernel mapping, and we can refer to them without using __va/__pa
translations, which are tied to the linear mapping.

Since the fixmap page tables are disjoint from the kernel mapping up
to the top level pgd entry, we can refer to bm_pte[] directly, and there
is no need to walk the page tables and perform __pa()/__va() translations
at each step.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 157962f5a8f236cab898b68bdaa69ce68922f0bf)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/mmu.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index b4afa9fbb00f..0f58a45df1f3 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -614,7 +614,7 @@ static inline pud_t * fixmap_pud(unsigned long addr)
 
 	BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
 
-	return pud_offset(pgd, addr);
+	return pud_offset_kimg(pgd, addr);
 }
 
 static inline pmd_t * fixmap_pmd(unsigned long addr)
@@ -623,16 +623,12 @@ static inline pmd_t * fixmap_pmd(unsigned long addr)
 
 	BUG_ON(pud_none(*pud) || pud_bad(*pud));
 
-	return pmd_offset(pud, addr);
+	return pmd_offset_kimg(pud, addr);
 }
 
 static inline pte_t * fixmap_pte(unsigned long addr)
 {
-	pmd_t *pmd = fixmap_pmd(addr);
-
-	BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd));
-
-	return pte_offset_kernel(pmd, addr);
+	return &bm_pte[pte_index(addr)];
 }
 
 void __init early_fixmap_init(void)
@@ -644,14 +640,14 @@ void __init early_fixmap_init(void)
 
 	pgd = pgd_offset_k(addr);
 	pgd_populate(&init_mm, pgd, bm_pud);
-	pud = pud_offset(pgd, addr);
+	pud = fixmap_pud(addr);
 	pud_populate(&init_mm, pud, bm_pmd);
-	pmd = pmd_offset(pud, addr);
+	pmd = fixmap_pmd(addr);
 	pmd_populate_kernel(&init_mm, pmd, bm_pte);
 
 	/*
 	 * The boot-ioremap range spans multiple pmds, for which
-	 * we are not preparted:
+	 * we are not prepared:
 	 */
 	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
 		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));

From 44b9620e6822e2acb0d65507d89cd0658055843c Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 16 Feb 2016 13:52:39 +0100
Subject: [PATCH 765/797] arm64: kvm: deal with kernel symbols outside of
 linear mapping

KVM on arm64 uses a fixed offset between the linear mapping at EL1 and
the HYP mapping at EL2. Before we can move the kernel virtual mapping
out of the linear mapping, we have to make sure that references to kernel
symbols that are accessed via the HYP mapping are translated to their
linear equivalent.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit a0bf9776cd0be4490d4675d4108e13379849fc7f)
Signed-off-by: Alex Shi <alex.shi@linaro.org>

Conflicts:
	skip new funcs create_hyp_mappings(__start_rodata,
	in arch/arm/kvm/arm.c and keep funcs in arch/arm64/kvm/hyp.S
---
 arch/arm/include/asm/kvm_asm.h    |  2 ++
 arch/arm/kvm/arm.c                |  5 +++--
 arch/arm64/include/asm/kvm_asm.h  | 17 +++++++++++++++++
 arch/arm64/include/asm/kvm_host.h |  8 +++++---
 arch/arm64/kvm/hyp.S              |  6 +++---
 5 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 194c91b610ff..c35c349da069 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -79,6 +79,8 @@
 #define rr_lo_hi(a1, a2) a1, a2
 #endif
 
+#define kvm_ksym_ref(kva)	(kva)
+
 #ifndef __ASSEMBLY__
 struct kvm;
 struct kvm_vcpu;
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index e06fd299de08..70e6d557c75f 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -969,7 +969,7 @@ static void cpu_init_hyp_mode(void *dummy)
 	pgd_ptr = kvm_mmu_get_httbr();
 	stack_page = __this_cpu_read(kvm_arm_hyp_stack_page);
 	hyp_stack_ptr = stack_page + PAGE_SIZE;
-	vector_ptr = (unsigned long)__kvm_hyp_vector;
+	vector_ptr = (unsigned long)kvm_ksym_ref(__kvm_hyp_vector);
 
 	__cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr);
 
@@ -1061,7 +1061,8 @@ static int init_hyp_mode(void)
 	/*
 	 * Map the Hyp-code called directly from the host
 	 */
-	err = create_hyp_mappings(__kvm_hyp_code_start, __kvm_hyp_code_end);
+	err = create_hyp_mappings(kvm_ksym_ref(__kvm_hyp_code_start),
+				  kvm_ksym_ref(__kvm_hyp_code_end));
 	if (err) {
 		kvm_err("Cannot map world-switch code\n");
 		goto out_free_mappings;
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 5e377101f919..e95c39543629 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -102,7 +102,24 @@
 #define KVM_ARM64_DEBUG_DIRTY_SHIFT	0
 #define KVM_ARM64_DEBUG_DIRTY		(1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
 
+#define kvm_ksym_ref(sym)		((void *)&sym + kvm_ksym_shift)
+
 #ifndef __ASSEMBLY__
+#if __GNUC__ > 4
+#define kvm_ksym_shift			(PAGE_OFFSET - KIMAGE_VADDR)
+#else
+/*
+ * GCC versions 4.9 and older will fold the constant below into the addend of
+ * the reference to 'sym' above if kvm_ksym_shift is declared static or if the
+ * constant is used directly. However, since we use the small code model for
+ * the core kernel, the reference to 'sym' will be emitted as a adrp/add pair,
+ * with a +/- 4 GB range, resulting in linker relocation errors if the shift
+ * is sufficiently large. So prevent the compiler from folding the shift into
+ * the addend, by making the shift a variable with external linkage.
+ */
+__weak u64 kvm_ksym_shift = PAGE_OFFSET - KIMAGE_VADDR;
+#endif
+
 struct kvm;
 struct kvm_vcpu;
 
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index a35ce7266aac..90c6368ad7c8 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -222,7 +222,7 @@ static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
 struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
 
-u64 kvm_call_hyp(void *hypfn, ...);
+u64 __kvm_call_hyp(void *hypfn, ...);
 void force_vm_exit(const cpumask_t *mask);
 void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
 
@@ -243,8 +243,8 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
 	 * Call initialization code, and switch to the full blown
 	 * HYP code.
 	 */
-	kvm_call_hyp((void *)boot_pgd_ptr, pgd_ptr,
-		     hyp_stack_ptr, vector_ptr);
+	__kvm_call_hyp((void *)boot_pgd_ptr, pgd_ptr,
+		       hyp_stack_ptr, vector_ptr);
 }
 
 static inline void kvm_arch_hardware_disable(void) {}
@@ -258,4 +258,6 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
 void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
 void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
 
+#define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__)
+
 #endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 86c289832272..309e3479dc2c 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -923,7 +923,7 @@ __hyp_panic_str:
 	.align	2
 
 /*
- * u64 kvm_call_hyp(void *hypfn, ...);
+ * u64 __kvm_call_hyp(void *hypfn, ...);
  *
  * This is not really a variadic function in the classic C-way and care must
  * be taken when calling this to ensure parameters are passed in registers
@@ -940,10 +940,10 @@ __hyp_panic_str:
  * used to implement __hyp_get_vectors in the same way as in
  * arch/arm64/kernel/hyp_stub.S.
  */
-ENTRY(kvm_call_hyp)
+ENTRY(__kvm_call_hyp)
 	hvc	#0
 	ret
-ENDPROC(kvm_call_hyp)
+ENDPROC(__kvm_call_hyp)
 
 .macro invalid_vector	label, target
 	.align	2

From 49d5b2c298815fcaee00779e69a741ad1e80e740 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 16 Feb 2016 13:52:40 +0100
Subject: [PATCH 766/797] arm64: move kernel image to base of vmalloc area

This moves the module area to right before the vmalloc area, and moves
the kernel image to the base of the vmalloc area. This is an intermediate
step towards implementing KASLR, which allows the kernel image to be
located anywhere in the vmalloc area.

Since other subsystems such as hibernate may still need to refer to the
kernel text or data segments via their linears addresses, both are mapped
in the linear region as well. The linear alias of the text region is
mapped read-only/non-executable to prevent inadvertent modification or
execution.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit f9040773b7bbbd9e98eb6184a263512a7cfc133f)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/kasan.h   |   2 +-
 arch/arm64/include/asm/memory.h  |  21 ++++--
 arch/arm64/include/asm/pgtable.h |  10 +--
 arch/arm64/mm/dump.c             |  12 ++--
 arch/arm64/mm/init.c             |  23 +++----
 arch/arm64/mm/kasan_init.c       |  27 +++++++-
 arch/arm64/mm/mmu.c              | 110 +++++++++++++++++++++----------
 7 files changed, 137 insertions(+), 68 deletions(-)

diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h
index de0d21211c34..71ad0f93eb71 100644
--- a/arch/arm64/include/asm/kasan.h
+++ b/arch/arm64/include/asm/kasan.h
@@ -14,7 +14,7 @@
  * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/8 of kernel virtual addresses.
  */
 #define KASAN_SHADOW_START      (VA_START)
-#define KASAN_SHADOW_END        (KASAN_SHADOW_START + (1UL << (VA_BITS - 3)))
+#define KASAN_SHADOW_END        (KASAN_SHADOW_START + KASAN_SHADOW_SIZE)
 
 /*
  * This value is used to map an address to the corresponding shadow
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index aebc739f5a11..4388651d1f0d 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -45,16 +45,15 @@
  * VA_START - the first kernel virtual address.
  * TASK_SIZE - the maximum size of a user space task.
  * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area.
- * The module space lives between the addresses given by TASK_SIZE
- * and PAGE_OFFSET - it must be within 128MB of the kernel text.
  */
 #define VA_BITS			(CONFIG_ARM64_VA_BITS)
 #define VA_START		(UL(0xffffffffffffffff) << VA_BITS)
 #define PAGE_OFFSET		(UL(0xffffffffffffffff) << (VA_BITS - 1))
-#define KIMAGE_VADDR		(PAGE_OFFSET)
-#define MODULES_END		(KIMAGE_VADDR)
-#define MODULES_VADDR		(MODULES_END - SZ_64M)
-#define PCI_IO_END		(MODULES_VADDR - SZ_2M)
+#define KIMAGE_VADDR		(MODULES_END)
+#define MODULES_END		(MODULES_VADDR + MODULES_VSIZE)
+#define MODULES_VADDR		(VA_START + KASAN_SHADOW_SIZE)
+#define MODULES_VSIZE		(SZ_64M)
+#define PCI_IO_END		(PAGE_OFFSET - SZ_2M)
 #define PCI_IO_START		(PCI_IO_END - PCI_IO_SIZE)
 #define FIXADDR_TOP		(PCI_IO_START - SZ_2M)
 #define TASK_SIZE_64		(UL(1) << VA_BITS)
@@ -71,6 +70,16 @@
 
 #define TASK_UNMAPPED_BASE	(PAGE_ALIGN(TASK_SIZE / 4))
 
+/*
+ * The size of the KASAN shadow region. This should be 1/8th of the
+ * size of the entire kernel virtual address space.
+ */
+#ifdef CONFIG_KASAN
+#define KASAN_SHADOW_SIZE	(UL(1) << (VA_BITS - 3))
+#else
+#define KASAN_SHADOW_SIZE	(0)
+#endif
+
 /*
  * Physical vs virtual RAM address space conversion.  These are
  * private definitions which should NOT be used outside memory.h
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 9a560b368910..c3c2518eecfe 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -36,19 +36,13 @@
  *
  * VMEMAP_SIZE: allows the whole linear region to be covered by a struct page array
  *	(rounded up to PUD_SIZE).
- * VMALLOC_START: beginning of the kernel VA space
+ * VMALLOC_START: beginning of the kernel vmalloc space
  * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space,
  *	fixed mappings and modules
  */
 #define VMEMMAP_SIZE		ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE)
 
-#ifndef CONFIG_KASAN
-#define VMALLOC_START		(VA_START)
-#else
-#include <asm/kasan.h>
-#define VMALLOC_START		(KASAN_SHADOW_END + SZ_64K)
-#endif
-
+#define VMALLOC_START		(MODULES_END)
 #define VMALLOC_END		(PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
 
 #define VMEMMAP_START		(VMALLOC_END + SZ_64K)
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index 0841b2bf0e6a..6be918478f85 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -35,7 +35,9 @@ struct addr_marker {
 };
 
 enum address_markers_idx {
-	VMALLOC_START_NR = 0,
+	MODULES_START_NR = 0,
+	MODULES_END_NR,
+	VMALLOC_START_NR,
 	VMALLOC_END_NR,
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 	VMEMMAP_START_NR,
@@ -45,12 +47,12 @@ enum address_markers_idx {
 	FIXADDR_END_NR,
 	PCI_START_NR,
 	PCI_END_NR,
-	MODULES_START_NR,
-	MODULES_END_NR,
 	KERNEL_SPACE_NR,
 };
 
 static struct addr_marker address_markers[] = {
+	{ MODULES_VADDR,	"Modules start" },
+	{ MODULES_END,		"Modules end" },
 	{ VMALLOC_START,	"vmalloc() Area" },
 	{ VMALLOC_END,		"vmalloc() End" },
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
@@ -61,9 +63,7 @@ static struct addr_marker address_markers[] = {
 	{ FIXADDR_TOP,		"Fixmap end" },
 	{ PCI_IO_START,		"PCI I/O start" },
 	{ PCI_IO_END,		"PCI I/O end" },
-	{ MODULES_VADDR,	"Modules start" },
-	{ MODULES_END,		"Modules end" },
-	{ PAGE_OFFSET,		"Kernel Mapping" },
+	{ PAGE_OFFSET,		"Linear Mapping" },
 	{ -1,			NULL },
 };
 
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index dba32ceff17a..ac4d8159d6f3 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -36,6 +36,7 @@
 #include <linux/swiotlb.h>
 
 #include <asm/fixmap.h>
+#include <asm/kasan.h>
 #include <asm/memory.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
@@ -302,22 +303,26 @@ void __init mem_init(void)
 #ifdef CONFIG_KASAN
 		  "    kasan   : 0x%16lx - 0x%16lx   (%6ld GB)\n"
 #endif
+		  "    modules : 0x%16lx - 0x%16lx   (%6ld MB)\n"
 		  "    vmalloc : 0x%16lx - 0x%16lx   (%6ld GB)\n"
+		  "      .init : 0x%p" " - 0x%p" "   (%6ld KB)\n"
+		  "      .text : 0x%p" " - 0x%p" "   (%6ld KB)\n"
+		  "      .data : 0x%p" " - 0x%p" "   (%6ld KB)\n"
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 		  "    vmemmap : 0x%16lx - 0x%16lx   (%6ld GB maximum)\n"
 		  "              0x%16lx - 0x%16lx   (%6ld MB actual)\n"
 #endif
 		  "    fixed   : 0x%16lx - 0x%16lx   (%6ld KB)\n"
 		  "    PCI I/O : 0x%16lx - 0x%16lx   (%6ld MB)\n"
-		  "    modules : 0x%16lx - 0x%16lx   (%6ld MB)\n"
-		  "    memory  : 0x%16lx - 0x%16lx   (%6ld MB)\n"
-		  "      .init : 0x%p" " - 0x%p" "   (%6ld KB)\n"
-		  "      .text : 0x%p" " - 0x%p" "   (%6ld KB)\n"
-		  "      .data : 0x%p" " - 0x%p" "   (%6ld KB)\n",
+		  "    memory  : 0x%16lx - 0x%16lx   (%6ld MB)\n",
 #ifdef CONFIG_KASAN
 		  MLG(KASAN_SHADOW_START, KASAN_SHADOW_END),
 #endif
+		  MLM(MODULES_VADDR, MODULES_END),
 		  MLG(VMALLOC_START, VMALLOC_END),
+		  MLK_ROUNDUP(__init_begin, __init_end),
+		  MLK_ROUNDUP(_text, _etext),
+		  MLK_ROUNDUP(_sdata, _edata),
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 		  MLG(VMEMMAP_START,
 		      VMEMMAP_START + VMEMMAP_SIZE),
@@ -326,11 +331,7 @@ void __init mem_init(void)
 #endif
 		  MLK(FIXADDR_START, FIXADDR_TOP),
 		  MLM(PCI_IO_START, PCI_IO_END),
-		  MLM(MODULES_VADDR, MODULES_END),
-		  MLM(PAGE_OFFSET, (unsigned long)high_memory),
-		  MLK_ROUNDUP(__init_begin, __init_end),
-		  MLK_ROUNDUP(_text, _etext),
-		  MLK_ROUNDUP(_sdata, _edata));
+		  MLM(PAGE_OFFSET, (unsigned long)high_memory));
 
 #undef MLK
 #undef MLM
@@ -358,8 +359,8 @@ void __init mem_init(void)
 
 void free_initmem(void)
 {
-	fixup_init();
 	free_initmem_default(0);
+	fixup_init();
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index cc569a38bc76..7f10cc91fa8a 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -17,9 +17,11 @@
 #include <linux/start_kernel.h>
 
 #include <asm/mmu_context.h>
+#include <asm/kernel-pgtable.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
+#include <asm/sections.h>
 #include <asm/tlbflush.h>
 
 static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
@@ -33,7 +35,7 @@ static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,
 	if (pmd_none(*pmd))
 		pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte);
 
-	pte = pte_offset_kernel(pmd, addr);
+	pte = pte_offset_kimg(pmd, addr);
 	do {
 		next = addr + PAGE_SIZE;
 		set_pte(pte, pfn_pte(virt_to_pfn(kasan_zero_page),
@@ -51,7 +53,7 @@ static void __init kasan_early_pmd_populate(pud_t *pud,
 	if (pud_none(*pud))
 		pud_populate(&init_mm, pud, kasan_zero_pmd);
 
-	pmd = pmd_offset(pud, addr);
+	pmd = pmd_offset_kimg(pud, addr);
 	do {
 		next = pmd_addr_end(addr, end);
 		kasan_early_pte_populate(pmd, addr, next);
@@ -68,7 +70,7 @@ static void __init kasan_early_pud_populate(pgd_t *pgd,
 	if (pgd_none(*pgd))
 		pgd_populate(&init_mm, pgd, kasan_zero_pud);
 
-	pud = pud_offset(pgd, addr);
+	pud = pud_offset_kimg(pgd, addr);
 	do {
 		next = pud_addr_end(addr, end);
 		kasan_early_pmd_populate(pud, addr, next);
@@ -126,9 +128,13 @@ static void __init clear_pgds(unsigned long start,
 
 void __init kasan_init(void)
 {
+	u64 kimg_shadow_start, kimg_shadow_end;
 	struct memblock_region *reg;
 	int i;
 
+	kimg_shadow_start = (u64)kasan_mem_to_shadow(_text);
+	kimg_shadow_end = (u64)kasan_mem_to_shadow(_end);
+
 	/*
 	 * We are going to perform proper setup of shadow memory.
 	 * At first we should unmap early shadow (clear_pgds() call bellow).
@@ -142,8 +148,23 @@ void __init kasan_init(void)
 
 	clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
 
+	vmemmap_populate(kimg_shadow_start, kimg_shadow_end, NUMA_NO_NODE);
+
+	/*
+	 * vmemmap_populate() has populated the shadow region that covers the
+	 * kernel image with SWAPPER_BLOCK_SIZE mappings, so we have to round
+	 * the start and end addresses to SWAPPER_BLOCK_SIZE as well, to prevent
+	 * kasan_populate_zero_shadow() from replacing the PMD block mappings
+	 * with PMD table mappings at the edges of the shadow region for the
+	 * kernel image.
+	 */
+	if (ARM64_SWAPPER_USES_SECTION_MAPS)
+		kimg_shadow_end = round_up(kimg_shadow_end, SWAPPER_BLOCK_SIZE);
+
 	kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
 			kasan_mem_to_shadow((void *)MODULES_VADDR));
+	kasan_populate_zero_shadow((void *)kimg_shadow_end,
+			kasan_mem_to_shadow((void *)PAGE_OFFSET));
 
 	for_each_memblock(memory, reg) {
 		void *start = (void *)__phys_to_virt(reg->base);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 0f58a45df1f3..895a8457259c 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -53,6 +53,10 @@ u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
 unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
 EXPORT_SYMBOL(empty_zero_page);
 
+static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
+static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused;
+static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused;
+
 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 			      unsigned long size, pgprot_t vma_prot)
 {
@@ -380,16 +384,15 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt,
 
 static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end)
 {
-
 	unsigned long kernel_start = __pa(_stext);
-	unsigned long kernel_end = __pa(_end);
+	unsigned long kernel_end = __pa(_etext);
 
 	/*
-	 * The kernel itself is mapped at page granularity. Map all other
-	 * memory, making sure we don't overwrite the existing kernel mappings.
+	 * Take care not to create a writable alias for the
+	 * read-only text and rodata sections of the kernel image.
 	 */
 
-	/* No overlap with the kernel. */
+	/* No overlap with the kernel text */
 	if (end < kernel_start || start >= kernel_end) {
 		__create_pgd_mapping(pgd, start, __phys_to_virt(start),
 				     end - start, PAGE_KERNEL,
@@ -398,8 +401,8 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end
 	}
 
 	/*
-	 * This block overlaps the kernel mapping. Map the portion(s) which
-	 * don't overlap.
+	 * This block overlaps the kernel text mapping.
+	 * Map the portion(s) which don't overlap.
 	 */
 	if (start < kernel_start)
 		__create_pgd_mapping(pgd, start,
@@ -411,6 +414,16 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end
 				     __phys_to_virt(kernel_end),
 				     end - kernel_end, PAGE_KERNEL,
 				     early_pgtable_alloc);
+
+	/*
+	 * Map the linear alias of the [_stext, _etext) interval as
+	 * read-only/non-executable. This makes the contents of the
+	 * region accessible to subsystems such as hibernate, but
+	 * protects it from inadvertent modification or execution.
+	 */
+	__create_pgd_mapping(pgd, kernel_start, __phys_to_virt(kernel_start),
+			     kernel_end - kernel_start, PAGE_KERNEL_RO,
+			     early_pgtable_alloc);
 }
 
 static void __init map_mem(pgd_t *pgd)
@@ -429,25 +442,28 @@ static void __init map_mem(pgd_t *pgd)
 	}
 }
 
-#ifdef CONFIG_DEBUG_RODATA
 void mark_rodata_ro(void)
 {
+	if (!IS_ENABLED(CONFIG_DEBUG_RODATA))
+		return;
+
 	create_mapping_late(__pa(_stext), (unsigned long)_stext,
 				(unsigned long)_etext - (unsigned long)_stext,
 				PAGE_KERNEL_ROX);
-
 }
-#endif
 
 void fixup_init(void)
 {
-	create_mapping_late(__pa(__init_begin), (unsigned long)__init_begin,
-			(unsigned long)__init_end - (unsigned long)__init_begin,
-			PAGE_KERNEL);
+	/*
+	 * Unmap the __init region but leave the VM area in place. This
+	 * prevents the region from being reused for kernel modules, which
+	 * is not supported by kallsyms.
+	 */
+	unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin));
 }
 
 static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end,
-				    pgprot_t prot)
+				    pgprot_t prot, struct vm_struct *vma)
 {
 	phys_addr_t pa_start = __pa(va_start);
 	unsigned long size = va_end - va_start;
@@ -457,6 +473,14 @@ static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end,
 
 	__create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot,
 			     early_pgtable_alloc);
+
+	vma->addr	= va_start;
+	vma->phys_addr	= pa_start;
+	vma->size	= size;
+	vma->flags	= VM_MAP;
+	vma->caller	= __builtin_return_address(0);
+
+	vm_area_add_early(vma);
 }
 
 /*
@@ -464,17 +488,35 @@ static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end,
  */
 static void __init map_kernel(pgd_t *pgd)
 {
+	static struct vm_struct vmlinux_text, vmlinux_init, vmlinux_data;
 
-	map_kernel_chunk(pgd, _stext, _etext, PAGE_KERNEL_EXEC);
-	map_kernel_chunk(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC);
-	map_kernel_chunk(pgd, _data, _end, PAGE_KERNEL);
+	map_kernel_chunk(pgd, _stext, _etext, PAGE_KERNEL_EXEC, &vmlinux_text);
+	map_kernel_chunk(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC,
+			 &vmlinux_init);
+	map_kernel_chunk(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data);
 
-	/*
-	 * The fixmap falls in a separate pgd to the kernel, and doesn't live
-	 * in the carveout for the swapper_pg_dir. We can simply re-use the
-	 * existing dir for the fixmap.
-	 */
-	set_pgd(pgd_offset_raw(pgd, FIXADDR_START), *pgd_offset_k(FIXADDR_START));
+	if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) {
+		/*
+		 * The fixmap falls in a separate pgd to the kernel, and doesn't
+		 * live in the carveout for the swapper_pg_dir. We can simply
+		 * re-use the existing dir for the fixmap.
+		 */
+		set_pgd(pgd_offset_raw(pgd, FIXADDR_START),
+			*pgd_offset_k(FIXADDR_START));
+	} else if (CONFIG_PGTABLE_LEVELS > 3) {
+		/*
+		 * The fixmap shares its top level pgd entry with the kernel
+		 * mapping. This can really only occur when we are running
+		 * with 16k/4 levels, so we can simply reuse the pud level
+		 * entry instead.
+		 */
+		BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
+		set_pud(pud_set_fixmap_offset(pgd, FIXADDR_START),
+			__pud(__pa(bm_pmd) | PUD_TYPE_TABLE));
+		pud_clear_fixmap();
+	} else {
+		BUG();
+	}
 
 	kasan_copy_shadow(pgd);
 }
@@ -600,14 +642,6 @@ void vmemmap_free(unsigned long start, unsigned long end)
 }
 #endif	/* CONFIG_SPARSEMEM_VMEMMAP */
 
-static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
-#if CONFIG_PGTABLE_LEVELS > 2
-static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
-#endif
-#if CONFIG_PGTABLE_LEVELS > 3
-static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
-#endif
-
 static inline pud_t * fixmap_pud(unsigned long addr)
 {
 	pgd_t *pgd = pgd_offset_k(addr);
@@ -639,8 +673,18 @@ void __init early_fixmap_init(void)
 	unsigned long addr = FIXADDR_START;
 
 	pgd = pgd_offset_k(addr);
-	pgd_populate(&init_mm, pgd, bm_pud);
-	pud = fixmap_pud(addr);
+	if (CONFIG_PGTABLE_LEVELS > 3 && !pgd_none(*pgd)) {
+		/*
+		 * We only end up here if the kernel mapping and the fixmap
+		 * share the top level pgd entry, which should only happen on
+		 * 16k/4 levels configurations.
+		 */
+		BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
+		pud = pud_offset_kimg(pgd, addr);
+	} else {
+		pgd_populate(&init_mm, pgd, bm_pud);
+		pud = fixmap_pud(addr);
+	}
 	pud_populate(&init_mm, pud, bm_pmd);
 	pmd = fixmap_pmd(addr);
 	pmd_populate_kernel(&init_mm, pmd, bm_pte);

From 368a063148f5d4e5b40e65b9954a9b98b1a7cc3c Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 16 Feb 2016 13:52:41 +0100
Subject: [PATCH 767/797] arm64: defer __va translation of initrd_start and
 initrd_end

Before deferring the assignment of memstart_addr in a subsequent patch, to
the moment where all memory has been discovered and possibly clipped based
on the size of the linear region and the presence of a mem= command line
parameter, we need to ensure that memstart_addr is not used to perform __va
translations before it is assigned.

One such use is in the generic early DT discovery of the initrd location,
which is recorded as a virtual address in the globals initrd_start and
initrd_end. So wire up the generic support to declare the initrd addresses,
and implement it without __va() translations, and perform the translation
after memstart_addr has been assigned.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit a89dea585371a9d5d85499db47c93f129be8e0c4)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/memory.h |  8 ++++++++
 arch/arm64/mm/init.c            | 13 +++++++++----
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 4388651d1f0d..18b7e77c7495 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -121,6 +121,14 @@
 #define IOREMAP_MAX_ORDER	(PMD_SHIFT)
 #endif
 
+#ifdef CONFIG_BLK_DEV_INITRD
+#define __early_init_dt_declare_initrd(__start, __end)			\
+	do {								\
+		initrd_start = (__start);				\
+		initrd_end = (__end);					\
+	} while (0)
+#endif
+
 #ifndef __ASSEMBLY__
 
 extern phys_addr_t		memstart_addr;
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index ac4d8159d6f3..92acbee2bb8b 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -59,8 +59,8 @@ static int __init early_initrd(char *p)
 	if (*endp == ',') {
 		size = memparse(endp + 1, NULL);
 
-		initrd_start = (unsigned long)__va(start);
-		initrd_end = (unsigned long)__va(start + size);
+		initrd_start = start;
+		initrd_end = start + size;
 	}
 	return 0;
 }
@@ -168,8 +168,13 @@ void __init arm64_memblock_init(void)
 	 */
 	memblock_reserve(__pa(_text), _end - _text);
 #ifdef CONFIG_BLK_DEV_INITRD
-	if (initrd_start)
-		memblock_reserve(__virt_to_phys(initrd_start), initrd_end - initrd_start);
+	if (initrd_start) {
+		memblock_reserve(initrd_start, initrd_end - initrd_start);
+
+		/* the generic initrd code expects virtual addresses */
+		initrd_start = __phys_to_virt(initrd_start);
+		initrd_end = __phys_to_virt(initrd_end);
+	}
 #endif
 
 	early_init_fdt_scan_reserved_mem();

From 72b991537db5f4c361b540cbf0059c7268d848c4 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 16 Feb 2016 13:52:42 +0100
Subject: [PATCH 768/797] arm64: allow kernel Image to be loaded anywhere in
 physical memory

This relaxes the kernel Image placement requirements, so that it
may be placed at any 2 MB aligned offset in physical memory.

This is accomplished by ignoring PHYS_OFFSET when installing
memblocks, and accounting for the apparent virtual offset of
the kernel Image. As a result, virtual address references
below PAGE_OFFSET are correctly mapped onto physical references
into the kernel Image regardless of where it sits in memory.

Special care needs to be taken for dealing with memory limits passed
via mem=, since the generic implementation clips memory top down, which
may clip the kernel image itself if it is loaded high up in memory. To
deal with this case, we simply add back the memory covering the kernel
image, which may result in more memory to be retained than was passed
as a mem= parameter.

Since mem= should not be considered a production feature, a panic notifier
handler is installed that dumps the memory limit at panic time if one was
set.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit a7f8de168ace487fa7b88cb154e413cf40e87fc6)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 Documentation/arm64/booting.txt         | 20 +++++---
 arch/arm64/include/asm/boot.h           |  6 +++
 arch/arm64/include/asm/kernel-pgtable.h | 12 +++++
 arch/arm64/include/asm/kvm_asm.h        | 17 +------
 arch/arm64/include/asm/memory.h         | 18 +++----
 arch/arm64/kernel/head.S                |  6 ++-
 arch/arm64/kernel/image.h               | 13 +++--
 arch/arm64/mm/init.c                    | 63 ++++++++++++++++++++++++-
 arch/arm64/mm/mmu.c                     |  3 ++
 9 files changed, 119 insertions(+), 39 deletions(-)

diff --git a/Documentation/arm64/booting.txt b/Documentation/arm64/booting.txt
index 701d39d3171a..56d6d8b796db 100644
--- a/Documentation/arm64/booting.txt
+++ b/Documentation/arm64/booting.txt
@@ -109,7 +109,13 @@ Header notes:
 			1 - 4K
 			2 - 16K
 			3 - 64K
-  Bits 3-63:	Reserved.
+  Bit 3:	Kernel physical placement
+			0 - 2MB aligned base should be as close as possible
+			    to the base of DRAM, since memory below it is not
+			    accessible via the linear mapping
+			1 - 2MB aligned base may be anywhere in physical
+			    memory
+  Bits 4-63:	Reserved.
 
 - When image_size is zero, a bootloader should attempt to keep as much
   memory as possible free for use by the kernel immediately after the
@@ -117,14 +123,14 @@ Header notes:
   depending on selected features, and is effectively unbound.
 
 The Image must be placed text_offset bytes from a 2MB aligned base
-address near the start of usable system RAM and called there. Memory
-below that base address is currently unusable by Linux, and therefore it
-is strongly recommended that this location is the start of system RAM.
-The region between the 2 MB aligned base address and the start of the
-image has no special significance to the kernel, and may be used for
-other purposes.
+address anywhere in usable system RAM and called there. The region
+between the 2 MB aligned base address and the start of the image has no
+special significance to the kernel, and may be used for other purposes.
 At least image_size bytes from the start of the image must be free for
 use by the kernel.
+NOTE: versions prior to v4.6 cannot make use of memory below the
+physical offset of the Image so it is recommended that the Image be
+placed as close as possible to the start of system RAM.
 
 Any memory described to the kernel (even that below the start of the
 image) which is not marked as reserved from the kernel (e.g., with a
diff --git a/arch/arm64/include/asm/boot.h b/arch/arm64/include/asm/boot.h
index 81151b67b26b..ebf2481889c3 100644
--- a/arch/arm64/include/asm/boot.h
+++ b/arch/arm64/include/asm/boot.h
@@ -11,4 +11,10 @@
 #define MIN_FDT_ALIGN		8
 #define MAX_FDT_SIZE		SZ_2M
 
+/*
+ * arm64 requires the kernel image to placed
+ * TEXT_OFFSET bytes beyond a 2 MB aligned base
+ */
+#define MIN_KIMG_ALIGN		SZ_2M
+
 #endif
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index a459714ee29e..5c6375d8528b 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -79,5 +79,17 @@
 #define SWAPPER_MM_MMUFLAGS	(PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
 #endif
 
+/*
+ * To make optimal use of block mappings when laying out the linear
+ * mapping, round down the base of physical memory to a size that can
+ * be mapped efficiently, i.e., either PUD_SIZE (4k granule) or PMD_SIZE
+ * (64k granule), or a multiple that can be mapped using contiguous bits
+ * in the page tables: 32 * PMD_SIZE (16k granule)
+ */
+#ifdef CONFIG_ARM64_64K_PAGES
+#define ARM64_MEMSTART_ALIGN	SZ_512M
+#else
+#define ARM64_MEMSTART_ALIGN	SZ_1G
+#endif
 
 #endif	/* __ASM_KERNEL_PGTABLE_H */
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index e95c39543629..419bc6661b5c 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -102,24 +102,9 @@
 #define KVM_ARM64_DEBUG_DIRTY_SHIFT	0
 #define KVM_ARM64_DEBUG_DIRTY		(1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
 
-#define kvm_ksym_ref(sym)		((void *)&sym + kvm_ksym_shift)
+#define kvm_ksym_ref(sym)		phys_to_virt((u64)&sym - kimage_voffset)
 
 #ifndef __ASSEMBLY__
-#if __GNUC__ > 4
-#define kvm_ksym_shift			(PAGE_OFFSET - KIMAGE_VADDR)
-#else
-/*
- * GCC versions 4.9 and older will fold the constant below into the addend of
- * the reference to 'sym' above if kvm_ksym_shift is declared static or if the
- * constant is used directly. However, since we use the small code model for
- * the core kernel, the reference to 'sym' will be emitted as a adrp/add pair,
- * with a +/- 4 GB range, resulting in linker relocation errors if the shift
- * is sufficiently large. So prevent the compiler from folding the shift into
- * the addend, by making the shift a variable with external linkage.
- */
-__weak u64 kvm_ksym_shift = PAGE_OFFSET - KIMAGE_VADDR;
-#endif
-
 struct kvm;
 struct kvm_vcpu;
 
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 18b7e77c7495..3239e4d78e0d 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -24,6 +24,7 @@
 #include <linux/compiler.h>
 #include <linux/const.h>
 #include <linux/types.h>
+#include <asm/bug.h>
 #include <asm/sizes.h>
 
 /*
@@ -88,10 +89,10 @@
 #define __virt_to_phys(x) ({						\
 	phys_addr_t __x = (phys_addr_t)(x);				\
 	__x >= PAGE_OFFSET ? (__x - PAGE_OFFSET + PHYS_OFFSET) :	\
-			     (__x - KIMAGE_VADDR + PHYS_OFFSET); })
+			     (__x - kimage_voffset); })
 
 #define __phys_to_virt(x)	((unsigned long)((x) - PHYS_OFFSET + PAGE_OFFSET))
-#define __phys_to_kimg(x)	((unsigned long)((x) - PHYS_OFFSET + KIMAGE_VADDR))
+#define __phys_to_kimg(x)	((unsigned long)((x) + kimage_voffset))
 
 /*
  * Convert a page to/from a physical address
@@ -133,15 +134,16 @@
 
 extern phys_addr_t		memstart_addr;
 /* PHYS_OFFSET - the physical address of the start of memory. */
-#define PHYS_OFFSET		({ memstart_addr; })
+#define PHYS_OFFSET		({ BUG_ON(memstart_addr & 1); memstart_addr; })
+
+/* the offset between the kernel virtual and physical mappings */
+extern u64			kimage_voffset;
 
 /*
- * The maximum physical address that the linear direct mapping
- * of system RAM can cover. (PAGE_OFFSET can be interpreted as
- * a 2's complement signed quantity and negated to derive the
- * maximum size of the linear mapping.)
+ * Allow all memory at the discovery stage. We will clip it later.
  */
-#define MAX_MEMBLOCK_ADDR	({ memstart_addr - PAGE_OFFSET - 1; })
+#define MIN_MEMBLOCK_ADDR	0
+#define MAX_MEMBLOCK_ADDR	U64_MAX
 
 /*
  * PFNs are used to describe any physical page; this means
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 04d38a058b19..05b98289093e 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -428,7 +428,11 @@ __mmap_switched:
 	and	x4, x4, #~(THREAD_SIZE - 1)
 	msr	sp_el0, x4			// Save thread_info
 	str_l	x21, __fdt_pointer, x5		// Save FDT pointer
-	str_l	x24, memstart_addr, x6		// Save PHYS_OFFSET
+
+	ldr	x4, =KIMAGE_VADDR		// Save the offset between
+	sub	x4, x4, x24			// the kernel virtual and
+	str_l	x4, kimage_voffset, x5		// physical mappings
+
 	mov	x29, #0
 #ifdef CONFIG_KASAN
 	bl	kasan_early_init
diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index 999633bd7294..c9c62cab25a4 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -42,15 +42,18 @@
 #endif
 
 #ifdef CONFIG_CPU_BIG_ENDIAN
-#define __HEAD_FLAG_BE	1
+#define __HEAD_FLAG_BE		1
 #else
-#define __HEAD_FLAG_BE	0
+#define __HEAD_FLAG_BE		0
 #endif
 
-#define __HEAD_FLAG_PAGE_SIZE ((PAGE_SHIFT - 10) / 2)
+#define __HEAD_FLAG_PAGE_SIZE	((PAGE_SHIFT - 10) / 2)
 
-#define __HEAD_FLAGS	((__HEAD_FLAG_BE << 0) |	\
-			 (__HEAD_FLAG_PAGE_SIZE << 1))
+#define __HEAD_FLAG_PHYS_BASE	1
+
+#define __HEAD_FLAGS		((__HEAD_FLAG_BE << 0) |	\
+				 (__HEAD_FLAG_PAGE_SIZE << 1) |	\
+				 (__HEAD_FLAG_PHYS_BASE << 3))
 
 /*
  * These will output as part of the Image header, which should be little-endian
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 92acbee2bb8b..2c7a3c2868e4 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -35,8 +35,10 @@
 #include <linux/efi.h>
 #include <linux/swiotlb.h>
 
+#include <asm/boot.h>
 #include <asm/fixmap.h>
 #include <asm/kasan.h>
+#include <asm/kernel-pgtable.h>
 #include <asm/memory.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
@@ -46,7 +48,13 @@
 
 #include "mm.h"
 
-phys_addr_t memstart_addr __read_mostly = 0;
+/*
+ * We need to be able to catch inadvertent references to memstart_addr
+ * that occur (potentially in generic code) before arm64_memblock_init()
+ * executes, which assigns it its actual value. So use a default value
+ * that cannot be mistaken for a real physical address.
+ */
+phys_addr_t memstart_addr __read_mostly = ~0ULL;
 phys_addr_t arm64_dma_phys_limit __read_mostly;
 
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -160,7 +168,33 @@ early_param("mem", early_mem);
 
 void __init arm64_memblock_init(void)
 {
-	memblock_enforce_memory_limit(memory_limit);
+	const s64 linear_region_size = -(s64)PAGE_OFFSET;
+
+	/*
+	 * Select a suitable value for the base of physical memory.
+	 */
+	memstart_addr = round_down(memblock_start_of_DRAM(),
+				   ARM64_MEMSTART_ALIGN);
+
+	/*
+	 * Remove the memory that we will not be able to cover with the
+	 * linear mapping. Take care not to clip the kernel which may be
+	 * high in memory.
+	 */
+	memblock_remove(max(memstart_addr + linear_region_size, __pa(_end)),
+			ULLONG_MAX);
+	if (memblock_end_of_DRAM() > linear_region_size)
+		memblock_remove(0, memblock_end_of_DRAM() - linear_region_size);
+
+	/*
+	 * Apply the memory limit if it was set. Since the kernel may be loaded
+	 * high up in memory, add back the kernel region that must be accessible
+	 * via the linear mapping.
+	 */
+	if (memory_limit != (phys_addr_t)ULLONG_MAX) {
+		memblock_enforce_memory_limit(memory_limit);
+		memblock_add(__pa(_text), (u64)(_end - _text));
+	}
 
 	/*
 	 * Register the kernel text, kernel data, initrd, and initial
@@ -386,3 +420,28 @@ static int __init keepinitrd_setup(char *__unused)
 
 __setup("keepinitrd", keepinitrd_setup);
 #endif
+
+/*
+ * Dump out memory limit information on panic.
+ */
+static int dump_mem_limit(struct notifier_block *self, unsigned long v, void *p)
+{
+	if (memory_limit != (phys_addr_t)ULLONG_MAX) {
+		pr_emerg("Memory Limit: %llu MB\n", memory_limit >> 20);
+	} else {
+		pr_emerg("Memory Limit: none\n");
+	}
+	return 0;
+}
+
+static struct notifier_block mem_limit_notifier = {
+	.notifier_call = dump_mem_limit,
+};
+
+static int __init register_mem_limit_dumper(void)
+{
+	atomic_notifier_chain_register(&panic_notifier_list,
+				       &mem_limit_notifier);
+	return 0;
+}
+__initcall(register_mem_limit_dumper);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 895a8457259c..fb5c872fe3d6 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -46,6 +46,9 @@
 
 u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
 
+u64 kimage_voffset __read_mostly;
+EXPORT_SYMBOL(kimage_voffset);
+
 /*
  * Empty_zero_page is a special page that is used for zero-initialized data
  * and COW.

From a67099df67dec4550a650aa3b871e2b0ecd20957 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 19 Feb 2016 14:28:58 +0000
Subject: [PATCH 769/797] arm64: User die() instead of panic() in
 do_page_fault()

The former gives better error reporting on unhandled permission faults
(introduced by the UAO patches).

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 70c8abc28762d04e36c92e07eee2ce6ab41049cb)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/fault.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index a8eafeceb08a..44e56de23f79 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -235,10 +235,10 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 
 	if (permission_fault(esr) && (addr < USER_DS)) {
 		if (get_fs() == KERNEL_DS)
-			panic("Accessing user space memory with fs=KERNEL_DS");
+			die("Accessing user space memory with fs=KERNEL_DS", regs, esr);
 
 		if (!search_exception_tables(regs->pc))
-			panic("Accessing user space memory outside uaccess.h routines");
+			die("Accessing user space memory outside uaccess.h routines", regs, esr);
 	}
 
 	/*

From bf7cb966b2f5ef5a456a25d84d2b64c79730a07a Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 22 Feb 2016 18:46:03 +0100
Subject: [PATCH 770/797] arm64: mm: only perform memstart_addr sanity check if
 DEBUG_VM

Checking whether memstart_addr has been assigned every time it is
referenced adds a branch instruction that may hurt performance if
the reference in question occurs on a hot path. So only perform the
check if CONFIG_DEBUG_VM=y.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
[catalin.marinas@arm.com: replaced #ifdef with VM_BUG_ON]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

(cherry picked from commit a92405f082d43267575444a6927085e4c8a69e4e)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/memory.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 3239e4d78e0d..460d09bf9442 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -132,9 +132,11 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/mmdebug.h>
+
 extern phys_addr_t		memstart_addr;
 /* PHYS_OFFSET - the physical address of the start of memory. */
-#define PHYS_OFFSET		({ BUG_ON(memstart_addr & 1); memstart_addr; })
+#define PHYS_OFFSET		({ VM_BUG_ON(memstart_addr & 1); memstart_addr; })
 
 /* the offset between the kernel virtual and physical mappings */
 extern u64			kimage_voffset;

From d3bb0180b3ecbcff076ca7e41d4ad4fa0ee4c9d7 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 22 Feb 2016 18:46:04 +0100
Subject: [PATCH 771/797] arm64: mm: use bit ops rather than arithmetic in
 pa/va translations

Since PAGE_OFFSET is chosen such that it cuts the kernel VA space right
in half, and since the size of the kernel VA space itself is always a
power of 2, we can treat PAGE_OFFSET as a bitmask and replace the
additions/subtractions with 'or' and 'and-not' operations.

For the comparison against PAGE_OFFSET, a mov/cmp/branch sequence ends
up getting replaced with a single tbz instruction. For the additions and
subtractions, we save a mov instruction since the mask is folded into the
instruction's immediate field.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 8439e62a15614e8fcd43835d57b7245cd9870dc5)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/memory.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 460d09bf9442..eb798156cf56 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -88,10 +88,10 @@
  */
 #define __virt_to_phys(x) ({						\
 	phys_addr_t __x = (phys_addr_t)(x);				\
-	__x >= PAGE_OFFSET ? (__x - PAGE_OFFSET + PHYS_OFFSET) :	\
-			     (__x - kimage_voffset); })
+	__x & BIT(VA_BITS - 1) ? (__x & ~PAGE_OFFSET) + PHYS_OFFSET :	\
+				 (__x - kimage_voffset); })
 
-#define __phys_to_virt(x)	((unsigned long)((x) - PHYS_OFFSET + PAGE_OFFSET))
+#define __phys_to_virt(x)	((unsigned long)((x) - PHYS_OFFSET) | PAGE_OFFSET)
 #define __phys_to_kimg(x)	((unsigned long)((x) + kimage_voffset))
 
 /*
@@ -132,6 +132,7 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/bitops.h>
 #include <linux/mmdebug.h>
 
 extern phys_addr_t		memstart_addr;

From 11e7d3ccfae5510815c72cec4066d5d5acbfa718 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 23 Feb 2016 08:56:45 +0100
Subject: [PATCH 772/797] arm64: move brk immediate argument definitions to
 separate header

Instead of reversing the header dependency between asm/bug.h and
asm/debug-monitors.h, split off the brk instruction immediate value
defines into a new header asm/brk-imm.h, and include it from both.

This solves the circular dependency issue that prevents BUG() from
being used in some header files, and keeps the definitions together.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit f98deee9a9f8c47d05a0f64d86440882dca772ff)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/brk-imm.h        | 25 +++++++++++++++++++++++++
 arch/arm64/include/asm/bug.h            |  2 +-
 arch/arm64/include/asm/debug-monitors.h | 14 +-------------
 3 files changed, 27 insertions(+), 14 deletions(-)
 create mode 100644 arch/arm64/include/asm/brk-imm.h

diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h
new file mode 100644
index 000000000000..ed693c5bcec0
--- /dev/null
+++ b/arch/arm64/include/asm/brk-imm.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_BRK_IMM_H
+#define __ASM_BRK_IMM_H
+
+/*
+ * #imm16 values used for BRK instruction generation
+ * Allowed values for kgdb are 0x400 - 0x7ff
+ * 0x100: for triggering a fault on purpose (reserved)
+ * 0x400: for dynamic BRK instruction
+ * 0x401: for compile time BRK instruction
+ * 0x800: kernel-mode BUG() and WARN() traps
+ */
+#define FAULT_BRK_IMM			0x100
+#define KGDB_DYN_DBG_BRK_IMM		0x400
+#define KGDB_COMPILED_DBG_BRK_IMM	0x401
+#define BUG_BRK_IMM			0x800
+
+#endif
diff --git a/arch/arm64/include/asm/bug.h b/arch/arm64/include/asm/bug.h
index 679d49221998..561190d15881 100644
--- a/arch/arm64/include/asm/bug.h
+++ b/arch/arm64/include/asm/bug.h
@@ -18,7 +18,7 @@
 #ifndef _ARCH_ARM64_ASM_BUG_H
 #define _ARCH_ARM64_ASM_BUG_H
 
-#define BUG_BRK_IMM			0x800
+#include <asm/brk-imm.h>
 
 #ifdef CONFIG_GENERIC_BUG
 #define HAVE_ARCH_BUG
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index e893a1fca9c2..2fcb9b7c876c 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -20,7 +20,7 @@
 
 #include <linux/errno.h>
 #include <linux/types.h>
-#include <asm/bug.h>
+#include <asm/brk-imm.h>
 #include <asm/esr.h>
 #include <asm/insn.h>
 #include <asm/ptrace.h>
@@ -47,18 +47,6 @@
  */
 #define BREAK_INSTR_SIZE		AARCH64_INSN_SIZE
 
-/*
- * #imm16 values used for BRK instruction generation
- * Allowed values for kgbd are 0x400 - 0x7ff
- * 0x100: for triggering a fault on purpose (reserved)
- * 0x400: for dynamic BRK instruction
- * 0x401: for compile time BRK instruction
- * 0x800: kernel-mode BUG() and WARN() traps
- */
-#define FAULT_BRK_IMM			0x100
-#define KGDB_DYN_DBG_BRK_IMM		0x400
-#define KGDB_COMPILED_DBG_BRK_IMM	0x401
-
 /*
  * BRK instruction encoding
  * The #imm16 value should be placed at bits[20:5] within BRK ins

From 6537906675622c231257664593ed3174b117b3ef Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 24 Nov 2015 12:37:35 +0100
Subject: [PATCH 773/797] arm64: add support for module PLTs

This adds support for emitting PLTs at module load time for relative
branches that are out of range. This is a prerequisite for KASLR, which
may place the kernel and the modules anywhere in the vmalloc area,
making it more likely that branch target offsets exceed the maximum
range of +/- 128 MB.

In this version, I removed the distinction between relocations against
.init executable sections and ordinary executable sections. The reason
is that it is hardly worth the trouble, given that .init.text usually
does not contain that many far branches, and this version now only
reserves PLT entry space for jump and call relocations against undefined
symbols (since symbols defined in the same module can be assumed to be
within +/- 128 MB)

For example, the mac80211.ko module (which is fairly sizable at ~400 KB)
built with -mcmodel=large gives the following relocation counts:

                    relocs    branches   unique     !local
  .text              3925       3347       518        219
  .init.text           11          8         7          1
  .exit.text            4          4         4          1
  .text.unlikely       81         67        36         17

('unique' means branches to unique type/symbol/addend combos, of which
!local is the subset referring to undefined symbols)

IOW, we are only emitting a single PLT entry for the .init sections, and
we are better off just adding it to the core PLT section instead.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit fd045f6cd98ec4953147b318418bd45e441e52a3)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/Kconfig              |   9 ++
 arch/arm64/Makefile             |   6 +-
 arch/arm64/include/asm/module.h |  11 ++
 arch/arm64/kernel/Makefile      |   1 +
 arch/arm64/kernel/module-plts.c | 201 ++++++++++++++++++++++++++++++++
 arch/arm64/kernel/module.c      |  22 ++++
 arch/arm64/kernel/module.lds    |   3 +
 7 files changed, 252 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm64/kernel/module-plts.c
 create mode 100644 arch/arm64/kernel/module.lds

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 8cd8d06ece4a..22db20491733 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -365,6 +365,7 @@ config ARM64_ERRATUM_843419
 	bool "Cortex-A53: 843419: A load or store might access an incorrect address"
 	depends on MODULES
 	default y
+	select ARM64_MODULE_CMODEL_LARGE
 	help
 	  This option builds kernel modules using the large memory model in
 	  order to avoid the use of the ADRP instruction, which can cause
@@ -728,6 +729,14 @@ config ARM64_UAO
 	  regular load/store instructions if the cpu does not implement the
 	  feature.
 
+config ARM64_MODULE_CMODEL_LARGE
+	bool
+
+config ARM64_MODULE_PLTS
+	bool
+	select ARM64_MODULE_CMODEL_LARGE
+	select HAVE_MOD_ARCH_SPECIFIC
+
 endmenu
 
 menu "Boot options"
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 548a2939d7e6..71054a38decf 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -43,10 +43,14 @@ endif
 
 CHECKFLAGS	+= -D__aarch64__
 
-ifeq ($(CONFIG_ARM64_ERRATUM_843419), y)
+ifeq ($(CONFIG_ARM64_MODULE_CMODEL_LARGE), y)
 KBUILD_CFLAGS_MODULE	+= -mcmodel=large
 endif
 
+ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
+KBUILD_LDFLAGS_MODULE	+= -T $(srctree)/arch/arm64/kernel/module.lds
+endif
+
 # Default value
 head-y		:= arch/arm64/kernel/head.o
 
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index e80e232b730e..8652fb613304 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -20,4 +20,15 @@
 
 #define MODULE_ARCH_VERMAGIC	"aarch64"
 
+#ifdef CONFIG_ARM64_MODULE_PLTS
+struct mod_arch_specific {
+	struct elf64_shdr	*plt;
+	int			plt_num_entries;
+	int			plt_max_entries;
+};
+#endif
+
+u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela,
+			  Elf64_Sym *sym);
+
 #endif /* __ASM_MODULE_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index c4e2f70c0aa0..8d971f9c6ed5 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -30,6 +30,7 @@ arm64-obj-$(CONFIG_COMPAT)		+= sys32.o kuser32.o signal32.o 	\
 					   ../../arm/kernel/opcodes.o
 arm64-obj-$(CONFIG_FUNCTION_TRACER)	+= ftrace.o entry-ftrace.o
 arm64-obj-$(CONFIG_MODULES)		+= arm64ksyms.o module.o
+arm64-obj-$(CONFIG_ARM64_MODULE_PLTS)	+= module-plts.o
 arm64-obj-$(CONFIG_PERF_EVENTS)		+= perf_regs.o perf_callchain.o
 arm64-obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o
 arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
new file mode 100644
index 000000000000..1ce90d8450ae
--- /dev/null
+++ b/arch/arm64/kernel/module-plts.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2014-2016 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/elf.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sort.h>
+
+struct plt_entry {
+	/*
+	 * A program that conforms to the AArch64 Procedure Call Standard
+	 * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or
+	 * IP1 (x17) may be inserted at any branch instruction that is
+	 * exposed to a relocation that supports long branches. Since that
+	 * is exactly what we are dealing with here, we are free to use x16
+	 * as a scratch register in the PLT veneers.
+	 */
+	__le32	mov0;	/* movn	x16, #0x....			*/
+	__le32	mov1;	/* movk	x16, #0x...., lsl #16		*/
+	__le32	mov2;	/* movk	x16, #0x...., lsl #32		*/
+	__le32	br;	/* br	x16				*/
+};
+
+u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela,
+			  Elf64_Sym *sym)
+{
+	struct plt_entry *plt = (struct plt_entry *)mod->arch.plt->sh_addr;
+	int i = mod->arch.plt_num_entries;
+	u64 val = sym->st_value + rela->r_addend;
+
+	/*
+	 * We only emit PLT entries against undefined (SHN_UNDEF) symbols,
+	 * which are listed in the ELF symtab section, but without a type
+	 * or a size.
+	 * So, similar to how the module loader uses the Elf64_Sym::st_value
+	 * field to store the resolved addresses of undefined symbols, let's
+	 * borrow the Elf64_Sym::st_size field (whose value is never used by
+	 * the module loader, even for symbols that are defined) to record
+	 * the address of a symbol's associated PLT entry as we emit it for a
+	 * zero addend relocation (which is the only kind we have to deal with
+	 * in practice). This allows us to find duplicates without having to
+	 * go through the table every time.
+	 */
+	if (rela->r_addend == 0 && sym->st_size != 0) {
+		BUG_ON(sym->st_size < (u64)plt || sym->st_size >= (u64)&plt[i]);
+		return sym->st_size;
+	}
+
+	mod->arch.plt_num_entries++;
+	BUG_ON(mod->arch.plt_num_entries > mod->arch.plt_max_entries);
+
+	/*
+	 * MOVK/MOVN/MOVZ opcode:
+	 * +--------+------------+--------+-----------+-------------+---------+
+	 * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] |
+	 * +--------+------------+--------+-----------+-------------+---------+
+	 *
+	 * Rd     := 0x10 (x16)
+	 * hw     := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32)
+	 * opc    := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ)
+	 * sf     := 1 (64-bit variant)
+	 */
+	plt[i] = (struct plt_entry){
+		cpu_to_le32(0x92800010 | (((~val      ) & 0xffff)) << 5),
+		cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5),
+		cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5),
+		cpu_to_le32(0xd61f0200)
+	};
+
+	if (rela->r_addend == 0)
+		sym->st_size = (u64)&plt[i];
+
+	return (u64)&plt[i];
+}
+
+#define cmp_3way(a,b)	((a) < (b) ? -1 : (a) > (b))
+
+static int cmp_rela(const void *a, const void *b)
+{
+	const Elf64_Rela *x = a, *y = b;
+	int i;
+
+	/* sort by type, symbol index and addend */
+	i = cmp_3way(ELF64_R_TYPE(x->r_info), ELF64_R_TYPE(y->r_info));
+	if (i == 0)
+		i = cmp_3way(ELF64_R_SYM(x->r_info), ELF64_R_SYM(y->r_info));
+	if (i == 0)
+		i = cmp_3way(x->r_addend, y->r_addend);
+	return i;
+}
+
+static bool duplicate_rel(const Elf64_Rela *rela, int num)
+{
+	/*
+	 * Entries are sorted by type, symbol index and addend. That means
+	 * that, if a duplicate entry exists, it must be in the preceding
+	 * slot.
+	 */
+	return num > 0 && cmp_rela(rela + num, rela + num - 1) == 0;
+}
+
+static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num)
+{
+	unsigned int ret = 0;
+	Elf64_Sym *s;
+	int i;
+
+	for (i = 0; i < num; i++) {
+		switch (ELF64_R_TYPE(rela[i].r_info)) {
+		case R_AARCH64_JUMP26:
+		case R_AARCH64_CALL26:
+			/*
+			 * We only have to consider branch targets that resolve
+			 * to undefined symbols. This is not simply a heuristic,
+			 * it is a fundamental limitation, since the PLT itself
+			 * is part of the module, and needs to be within 128 MB
+			 * as well, so modules can never grow beyond that limit.
+			 */
+			s = syms + ELF64_R_SYM(rela[i].r_info);
+			if (s->st_shndx != SHN_UNDEF)
+				break;
+
+			/*
+			 * Jump relocations with non-zero addends against
+			 * undefined symbols are supported by the ELF spec, but
+			 * do not occur in practice (e.g., 'jump n bytes past
+			 * the entry point of undefined function symbol f').
+			 * So we need to support them, but there is no need to
+			 * take them into consideration when trying to optimize
+			 * this code. So let's only check for duplicates when
+			 * the addend is zero: this allows us to record the PLT
+			 * entry address in the symbol table itself, rather than
+			 * having to search the list for duplicates each time we
+			 * emit one.
+			 */
+			if (rela[i].r_addend != 0 || !duplicate_rel(rela, i))
+				ret++;
+			break;
+		}
+	}
+	return ret;
+}
+
+int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
+			      char *secstrings, struct module *mod)
+{
+	unsigned long plt_max_entries = 0;
+	Elf64_Sym *syms = NULL;
+	int i;
+
+	/*
+	 * Find the empty .plt section so we can expand it to store the PLT
+	 * entries. Record the symtab address as well.
+	 */
+	for (i = 0; i < ehdr->e_shnum; i++) {
+		if (strcmp(".plt", secstrings + sechdrs[i].sh_name) == 0)
+			mod->arch.plt = sechdrs + i;
+		else if (sechdrs[i].sh_type == SHT_SYMTAB)
+			syms = (Elf64_Sym *)sechdrs[i].sh_addr;
+	}
+
+	if (!mod->arch.plt) {
+		pr_err("%s: module PLT section missing\n", mod->name);
+		return -ENOEXEC;
+	}
+	if (!syms) {
+		pr_err("%s: module symtab section missing\n", mod->name);
+		return -ENOEXEC;
+	}
+
+	for (i = 0; i < ehdr->e_shnum; i++) {
+		Elf64_Rela *rels = (void *)ehdr + sechdrs[i].sh_offset;
+		int numrels = sechdrs[i].sh_size / sizeof(Elf64_Rela);
+		Elf64_Shdr *dstsec = sechdrs + sechdrs[i].sh_info;
+
+		if (sechdrs[i].sh_type != SHT_RELA)
+			continue;
+
+		/* ignore relocations that operate on non-exec sections */
+		if (!(dstsec->sh_flags & SHF_EXECINSTR))
+			continue;
+
+		/* sort by type, symbol index and addend */
+		sort(rels, numrels, sizeof(Elf64_Rela), cmp_rela, NULL);
+
+		plt_max_entries += count_plts(syms, rels, numrels);
+	}
+
+	mod->arch.plt->sh_type = SHT_NOBITS;
+	mod->arch.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+	mod->arch.plt->sh_addralign = L1_CACHE_BYTES;
+	mod->arch.plt->sh_size = plt_max_entries * sizeof(struct plt_entry);
+	mod->arch.plt_num_entries = 0;
+	mod->arch.plt_max_entries = plt_max_entries;
+	return 0;
+}
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 93e970231ca9..a9dde97f5ca5 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -38,6 +38,21 @@ void *module_alloc(unsigned long size)
 				GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
 				NUMA_NO_NODE, __builtin_return_address(0));
 
+	if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
+	    !IS_ENABLED(CONFIG_KASAN))
+		/*
+		 * KASAN can only deal with module allocations being served
+		 * from the reserved module region, since the remainder of
+		 * the vmalloc region is already backed by zero shadow pages,
+		 * and punching holes into it is non-trivial. Since the module
+		 * region is not randomized when KASAN is enabled, it is even
+		 * less likely that the module region gets exhausted, so we
+		 * can simply omit this fallback in that case.
+		 */
+		p = __vmalloc_node_range(size, MODULE_ALIGN, VMALLOC_START,
+				VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
+				NUMA_NO_NODE, __builtin_return_address(0));
+
 	if (p && (kasan_module_alloc(p, size) < 0)) {
 		vfree(p);
 		return NULL;
@@ -361,6 +376,13 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 		case R_AARCH64_CALL26:
 			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26,
 					     AARCH64_INSN_IMM_26);
+
+			if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
+			    ovf == -ERANGE) {
+				val = module_emit_plt_entry(me, &rel[i], sym);
+				ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2,
+						     26, AARCH64_INSN_IMM_26);
+			}
 			break;
 
 		default:
diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds
new file mode 100644
index 000000000000..8949f6c6f729
--- /dev/null
+++ b/arch/arm64/kernel/module.lds
@@ -0,0 +1,3 @@
+SECTIONS {
+	.plt (NOLOAD) : { BYTE(0) }
+}

From 9bd7f88a1dd7b6c513b6f7cf39154e69d0cbf62e Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Sat, 26 Dec 2015 13:48:02 +0100
Subject: [PATCH 774/797] arm64: avoid R_AARCH64_ABS64 relocations for Image
 header fields

Unfortunately, the current way of using the linker to emit build time
constants into the Image header will no longer work once we switch to
the use of PIE executables. The reason is that such constants are emitted
into the binary using R_AARCH64_ABS64 relocations, which are resolved at
runtime, not at build time, and the places targeted by those relocations
will contain zeroes before that.

So refactor the endian swapping linker script constant generation code so
that it emits the upper and lower 32-bit words separately.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 6ad1fe5d9077a1ab40bf74b61994d2e770b00b14)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/assembler.h | 11 ++++++++++
 arch/arm64/kernel/head.S           |  6 +++---
 arch/arm64/kernel/image.h          | 32 ++++++++++++++++++------------
 3 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index bb7b72734c24..ba5aff6c830e 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -215,4 +215,15 @@ lr	.req	x30		// link register
 	.size	__pi_##x, . - x;	\
 	ENDPROC(x)
 
+	/*
+	 * Emit a 64-bit absolute little endian symbol reference in a way that
+	 * ensures that it will be resolved at build time, even when building a
+	 * PIE binary. This requires cooperation from the linker script, which
+	 * must emit the lo32/hi32 halves individually.
+	 */
+	.macro	le64sym, sym
+	.long	\sym\()_lo32
+	.long	\sym\()_hi32
+	.endm
+
 #endif	/* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 05b98289093e..f076debf392d 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -83,9 +83,9 @@ efi_head:
 	b	stext				// branch to kernel start, magic
 	.long	0				// reserved
 #endif
-	.quad	_kernel_offset_le		// Image load offset from start of RAM, little-endian
-	.quad	_kernel_size_le			// Effective size of kernel image, little-endian
-	.quad	_kernel_flags_le		// Informative flags, little-endian
+	le64sym	_kernel_offset_le		// Image load offset from start of RAM, little-endian
+	le64sym	_kernel_size_le			// Effective size of kernel image, little-endian
+	le64sym	_kernel_flags_le		// Informative flags, little-endian
 	.quad	0				// reserved
 	.quad	0				// reserved
 	.quad	0				// reserved
diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index c9c62cab25a4..db1bf57948f1 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -26,21 +26,27 @@
  * There aren't any ELF relocations we can use to endian-swap values known only
  * at link time (e.g. the subtraction of two symbol addresses), so we must get
  * the linker to endian-swap certain values before emitting them.
+ *
+ * Note that, in order for this to work when building the ELF64 PIE executable
+ * (for KASLR), these values should not be referenced via R_AARCH64_ABS64
+ * relocations, since these are fixed up at runtime rather than at build time
+ * when PIE is in effect. So we need to split them up in 32-bit high and low
+ * words.
  */
 #ifdef CONFIG_CPU_BIG_ENDIAN
-#define DATA_LE64(data)					\
-	((((data) & 0x00000000000000ff) << 56) |	\
-	 (((data) & 0x000000000000ff00) << 40) |	\
-	 (((data) & 0x0000000000ff0000) << 24) |	\
-	 (((data) & 0x00000000ff000000) << 8)  |	\
-	 (((data) & 0x000000ff00000000) >> 8)  |	\
-	 (((data) & 0x0000ff0000000000) >> 24) |	\
-	 (((data) & 0x00ff000000000000) >> 40) |	\
-	 (((data) & 0xff00000000000000) >> 56))
+#define DATA_LE32(data)				\
+	((((data) & 0x000000ff) << 24) |	\
+	 (((data) & 0x0000ff00) << 8)  |	\
+	 (((data) & 0x00ff0000) >> 8)  |	\
+	 (((data) & 0xff000000) >> 24))
 #else
-#define DATA_LE64(data) ((data) & 0xffffffffffffffff)
+#define DATA_LE32(data) ((data) & 0xffffffff)
 #endif
 
+#define DEFINE_IMAGE_LE64(sym, data)				\
+	sym##_lo32 = DATA_LE32((data) & 0xffffffff);		\
+	sym##_hi32 = DATA_LE32((data) >> 32)
+
 #ifdef CONFIG_CPU_BIG_ENDIAN
 #define __HEAD_FLAG_BE		1
 #else
@@ -61,9 +67,9 @@
  * endian swapped in head.S, all are done here for consistency.
  */
 #define HEAD_SYMBOLS						\
-	_kernel_size_le		= DATA_LE64(_end - _text);	\
-	_kernel_offset_le	= DATA_LE64(TEXT_OFFSET);	\
-	_kernel_flags_le	= DATA_LE64(__HEAD_FLAGS);
+	DEFINE_IMAGE_LE64(_kernel_size_le, _end - _text);	\
+	DEFINE_IMAGE_LE64(_kernel_offset_le, TEXT_OFFSET);	\
+	DEFINE_IMAGE_LE64(_kernel_flags_le, __HEAD_FLAGS);
 
 #ifdef CONFIG_EFI
 

From 63f9fbe469f17deee50491bc293bb3d2843f3e4a Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Sat, 26 Dec 2015 12:46:40 +0100
Subject: [PATCH 775/797] arm64: avoid dynamic relocations in early boot code

Before implementing KASLR for arm64 by building a self-relocating PIE
executable, we have to ensure that values we use before the relocation
routine is executed are not subject to dynamic relocation themselves.
This applies not only to virtual addresses, but also to values that are
supplied by the linker at build time and relocated using R_AARCH64_ABS64
relocations.

So instead, use assemble time constants, or force the use of static
relocations by folding the constants into the instructions.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 2bf31a4a05f5b00f37d65ba029d36a0230286cb7)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/efi-entry.S |  2 +-
 arch/arm64/kernel/head.S      | 39 +++++++++++++++++++++++------------
 2 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
index a773db92908b..f82036e02485 100644
--- a/arch/arm64/kernel/efi-entry.S
+++ b/arch/arm64/kernel/efi-entry.S
@@ -61,7 +61,7 @@ ENTRY(entry)
 	 */
 	mov	x20, x0		// DTB address
 	ldr	x0, [sp, #16]	// relocated _text address
-	ldr	x21, =stext_offset
+	movz	x21, #:abs_g0:stext_offset
 	add	x21, x0, x21
 
 	/*
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index f076debf392d..4cad8f9f2268 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -67,12 +67,11 @@
  * in the entry routines.
  */
 	__HEAD
-
+_head:
 	/*
 	 * DO NOT MODIFY. Image header expected by Linux boot-loaders.
 	 */
 #ifdef CONFIG_EFI
-efi_head:
 	/*
 	 * This add instruction has no meaningful effect except that
 	 * its opcode forms the magic "MZ" signature required by UEFI.
@@ -94,14 +93,14 @@ efi_head:
 	.byte	0x4d
 	.byte	0x64
 #ifdef CONFIG_EFI
-	.long	pe_header - efi_head		// Offset to the PE header.
+	.long	pe_header - _head		// Offset to the PE header.
 #else
 	.word	0				// reserved
 #endif
 
 #ifdef CONFIG_EFI
 	.globl	__efistub_stext_offset
-	.set	__efistub_stext_offset, stext - efi_head
+	.set	__efistub_stext_offset, stext - _head
 	.align 3
 pe_header:
 	.ascii	"PE"
@@ -124,7 +123,7 @@ optional_header:
 	.long	_end - stext			// SizeOfCode
 	.long	0				// SizeOfInitializedData
 	.long	0				// SizeOfUninitializedData
-	.long	__efistub_entry - efi_head	// AddressOfEntryPoint
+	.long	__efistub_entry - _head		// AddressOfEntryPoint
 	.long	__efistub_stext_offset		// BaseOfCode
 
 extra_header_fields:
@@ -139,7 +138,7 @@ extra_header_fields:
 	.short	0				// MinorSubsystemVersion
 	.long	0				// Win32VersionValue
 
-	.long	_end - efi_head			// SizeOfImage
+	.long	_end - _head			// SizeOfImage
 
 	// Everything before the kernel image is considered part of the header
 	.long	__efistub_stext_offset		// SizeOfHeaders
@@ -219,11 +218,13 @@ ENTRY(stext)
 	 * On return, the CPU will be ready for the MMU to be turned on and
 	 * the TCR will have been set.
 	 */
-	ldr	x27, =__mmap_switched		// address to jump to after
+	ldr	x27, 0f				// address to jump to after
 						// MMU has been enabled
 	adr_l	lr, __enable_mmu		// return (PIC) address
 	b	__cpu_setup			// initialise processor
 ENDPROC(stext)
+	.align	3
+0:	.quad	__mmap_switched - (_head - TEXT_OFFSET) + KIMAGE_VADDR
 
 /*
  * Preserve the arguments passed by the bootloader in x0 .. x3
@@ -391,7 +392,8 @@ __create_page_tables:
 	mov	x0, x26				// swapper_pg_dir
 	ldr	x5, =KIMAGE_VADDR
 	create_pgd_entry x0, x5, x3, x6
-	ldr	x6, =KERNEL_END			// __va(KERNEL_END)
+	ldr	w6, kernel_img_size
+	add	x6, x6, x5
 	mov	x3, x24				// phys offset
 	create_block_map x0, x7, x3, x5, x6
 
@@ -408,6 +410,9 @@ __create_page_tables:
 	mov	lr, x27
 	ret
 ENDPROC(__create_page_tables)
+
+kernel_img_size:
+	.long	_end - (_head - TEXT_OFFSET)
 	.ltorg
 
 /*
@@ -415,6 +420,10 @@ ENDPROC(__create_page_tables)
  */
 	.set	initial_sp, init_thread_union + THREAD_START_SP
 __mmap_switched:
+	adr_l	x8, vectors			// load VBAR_EL1 with virtual
+	msr	vbar_el1, x8			// vector table address
+	isb
+
 	// Clear BSS
 	adr_l	x0, __bss_start
 	mov	x1, xzr
@@ -610,13 +619,19 @@ ENTRY(secondary_startup)
 	adrp	x26, swapper_pg_dir
 	bl	__cpu_setup			// initialise processor
 
-	ldr	x21, =secondary_data
-	ldr	x27, =__secondary_switched	// address to jump to after enabling the MMU
+	ldr	x8, =KIMAGE_VADDR
+	ldr	w9, 0f
+	sub	x27, x8, w9, sxtw		// address to jump to after enabling the MMU
 	b	__enable_mmu
 ENDPROC(secondary_startup)
+0:	.long	(_text - TEXT_OFFSET) - __secondary_switched
 
 ENTRY(__secondary_switched)
-	ldr	x0, [x21]			// get secondary_data.stack
+	adr_l	x5, vectors
+	msr	vbar_el1, x5
+	isb
+
+	ldr_l	x0, secondary_data		// get secondary_data.stack
 	mov	sp, x0
 	and	x0, x0, #~(THREAD_SIZE - 1)
 	msr	sp_el0, x0			// save thread_info
@@ -641,8 +656,6 @@ __enable_mmu:
 	ubfx	x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4
 	cmp	x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
 	b.ne	__no_granule_support
-	ldr	x5, =vectors
-	msr	vbar_el1, x5
 	msr	ttbr0_el1, x25			// load TTBR0
 	msr	ttbr1_el1, x26			// load TTBR1
 	isb

From 632fd2f00cb569f65adc7d2c67282b70a6af8634 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 11 Jan 2016 17:08:26 +0100
Subject: [PATCH 776/797] arm64: make asm/elf.h available to asm files

This reshuffles some code in asm/elf.h and puts a #ifndef __ASSEMBLY__
around its C definitions so that the CPP defines can be used in asm
source files as well.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 4a2e034e5cdadde4c712f79bdd57d1455c76a3db)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/elf.h | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index faad6df49e5b..435f55952e1f 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -24,15 +24,6 @@
 #include <asm/ptrace.h>
 #include <asm/user.h>
 
-typedef unsigned long elf_greg_t;
-
-#define ELF_NGREG (sizeof(struct user_pt_regs) / sizeof(elf_greg_t))
-#define ELF_CORE_COPY_REGS(dest, regs)	\
-	*(struct user_pt_regs *)&(dest) = (regs)->user_regs;
-
-typedef elf_greg_t elf_gregset_t[ELF_NGREG];
-typedef struct user_fpsimd_state elf_fpregset_t;
-
 /*
  * AArch64 static relocation types.
  */
@@ -127,6 +118,17 @@ typedef struct user_fpsimd_state elf_fpregset_t;
  */
 #define ELF_ET_DYN_BASE	(2 * TASK_SIZE_64 / 3)
 
+#ifndef __ASSEMBLY__
+
+typedef unsigned long elf_greg_t;
+
+#define ELF_NGREG (sizeof(struct user_pt_regs) / sizeof(elf_greg_t))
+#define ELF_CORE_COPY_REGS(dest, regs)	\
+	*(struct user_pt_regs *)&(dest) = (regs)->user_regs;
+
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+typedef struct user_fpsimd_state elf_fpregset_t;
+
 /*
  * When the program starts, a1 contains a pointer to a function to be
  * registered with atexit, as per the SVR4 ABI.  A value of 0 means we have no
@@ -186,4 +188,6 @@ extern int aarch32_setup_vectors_page(struct linux_binprm *bprm,
 
 #endif /* CONFIG_COMPAT */
 
+#endif /* !__ASSEMBLY__ */
+
 #endif

From 6ef77fd5962d90de7957ef9eafddd659c7375a4f Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Tue, 2 Feb 2016 15:53:59 +0000
Subject: [PATCH 777/797] arm64: futex.h: Add missing PAN toggling

futex.h's futex_atomic_cmpxchg_inatomic() does not use the
__futex_atomic_op() macro and needs its own PAN toggling. This was missed
when the feature was implemented.

Fixes: 338d4f49d6f ("arm64: kernel: Add support for Privileged Access Never")
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 811d61e384e24759372bb3f01772f3744b0a8327)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/futex.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index 007a69fc4f40..5f3ab8c1db55 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -121,6 +121,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 		return -EFAULT;
 
 	asm volatile("// futex_atomic_cmpxchg_inatomic\n"
+ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
 "	prfm	pstl1strm, %2\n"
 "1:	ldxr	%w1, %2\n"
 "	sub	%w3, %w1, %w4\n"
@@ -137,6 +138,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 "	.align	3\n"
 "	.quad	1b, 4b, 2b, 4b\n"
 "	.popsection\n"
+ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
 	: "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp)
 	: "r" (oldval), "r" (newval), "Ir" (-EFAULT)
 	: "memory");

From afc69bdc2ac79942a59f67296738ba8e85e784fb Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Sun, 10 Jan 2016 11:42:28 +0100
Subject: [PATCH 778/797] scripts/sortextable: add support for ET_DYN binaries

Add support to scripts/sortextable for handling relocatable (PIE)
executables, whose ELF type is ET_DYN, not ET_EXEC. Other than adding
support for the new type, no changes are needed.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 7b957b6e603623ef8b2e8222fa94b976df613fa2)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 scripts/sortextable.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/sortextable.c b/scripts/sortextable.c
index c2423d913b46..ecefa0a634f8 100644
--- a/scripts/sortextable.c
+++ b/scripts/sortextable.c
@@ -266,9 +266,9 @@ do_file(char const *const fname)
 		break;
 	}  /* end switch */
 	if (memcmp(ELFMAG, ehdr->e_ident, SELFMAG) != 0
-	||  r2(&ehdr->e_type) != ET_EXEC
+	||  (r2(&ehdr->e_type) != ET_EXEC && r2(&ehdr->e_type) != ET_DYN)
 	||  ehdr->e_ident[EI_VERSION] != EV_CURRENT) {
-		fprintf(stderr, "unrecognized ET_EXEC file %s\n", fname);
+		fprintf(stderr, "unrecognized ET_EXEC/ET_DYN file %s\n", fname);
 		fail_file();
 	}
 
@@ -304,7 +304,7 @@ do_file(char const *const fname)
 		if (r2(&ehdr->e_ehsize) != sizeof(Elf32_Ehdr)
 		||  r2(&ehdr->e_shentsize) != sizeof(Elf32_Shdr)) {
 			fprintf(stderr,
-				"unrecognized ET_EXEC file: %s\n", fname);
+				"unrecognized ET_EXEC/ET_DYN file: %s\n", fname);
 			fail_file();
 		}
 		do32(ehdr, fname, custom_sort);
@@ -314,7 +314,7 @@ do_file(char const *const fname)
 		if (r2(&ghdr->e_ehsize) != sizeof(Elf64_Ehdr)
 		||  r2(&ghdr->e_shentsize) != sizeof(Elf64_Shdr)) {
 			fprintf(stderr,
-				"unrecognized ET_EXEC file: %s\n", fname);
+				"unrecognized ET_EXEC/ET_DYN file: %s\n", fname);
 			fail_file();
 		}
 		do64(ghdr, fname, custom_sort);

From 5f1dcb6d02a6f6dd104539a19f7db8a189ba4bb8 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 1 Jan 2016 12:39:09 +0100
Subject: [PATCH 779/797] extable: add support for relative extables to search
 and sort routines

This adds support to the generic search_extable() and sort_extable()
implementations for dealing with exception table entries whose fields
contain relative offsets rather than absolute addresses.

Acked-by: Helge Deller <deller@gmx.de>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
Acked-by: Tony Luck <tony.luck@intel.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit a272858a3c1ecd4a935ba23c66668f81214bd110)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 lib/extable.c | 50 +++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 41 insertions(+), 9 deletions(-)

diff --git a/lib/extable.c b/lib/extable.c
index 4cac81ec225e..0be02ad561e9 100644
--- a/lib/extable.c
+++ b/lib/extable.c
@@ -14,7 +14,37 @@
 #include <linux/sort.h>
 #include <asm/uaccess.h>
 
+#ifndef ARCH_HAS_RELATIVE_EXTABLE
+#define ex_to_insn(x)	((x)->insn)
+#else
+static inline unsigned long ex_to_insn(const struct exception_table_entry *x)
+{
+	return (unsigned long)&x->insn + x->insn;
+}
+#endif
+
 #ifndef ARCH_HAS_SORT_EXTABLE
+#ifndef ARCH_HAS_RELATIVE_EXTABLE
+#define swap_ex		NULL
+#else
+static void swap_ex(void *a, void *b, int size)
+{
+	struct exception_table_entry *x = a, *y = b, tmp;
+	int delta = b - a;
+
+	tmp = *x;
+	x->insn = y->insn + delta;
+	y->insn = tmp.insn - delta;
+
+#ifdef swap_ex_entry_fixup
+	swap_ex_entry_fixup(x, y, tmp, delta);
+#else
+	x->fixup = y->fixup + delta;
+	y->fixup = tmp.fixup - delta;
+#endif
+}
+#endif /* ARCH_HAS_RELATIVE_EXTABLE */
+
 /*
  * The exception table needs to be sorted so that the binary
  * search that we use to find entries in it works properly.
@@ -26,9 +56,9 @@ static int cmp_ex(const void *a, const void *b)
 	const struct exception_table_entry *x = a, *y = b;
 
 	/* avoid overflow */
-	if (x->insn > y->insn)
+	if (ex_to_insn(x) > ex_to_insn(y))
 		return 1;
-	if (x->insn < y->insn)
+	if (ex_to_insn(x) < ex_to_insn(y))
 		return -1;
 	return 0;
 }
@@ -37,7 +67,7 @@ void sort_extable(struct exception_table_entry *start,
 		  struct exception_table_entry *finish)
 {
 	sort(start, finish - start, sizeof(struct exception_table_entry),
-	     cmp_ex, NULL);
+	     cmp_ex, swap_ex);
 }
 
 #ifdef CONFIG_MODULES
@@ -48,13 +78,15 @@ void sort_extable(struct exception_table_entry *start,
 void trim_init_extable(struct module *m)
 {
 	/*trim the beginning*/
-	while (m->num_exentries && within_module_init(m->extable[0].insn, m)) {
+	while (m->num_exentries &&
+	       within_module_init(ex_to_insn(&m->extable[0]), m)) {
 		m->extable++;
 		m->num_exentries--;
 	}
 	/*trim the end*/
 	while (m->num_exentries &&
-		within_module_init(m->extable[m->num_exentries-1].insn, m))
+	       within_module_init(ex_to_insn(&m->extable[m->num_exentries - 1]),
+				  m))
 		m->num_exentries--;
 }
 #endif /* CONFIG_MODULES */
@@ -81,13 +113,13 @@ search_extable(const struct exception_table_entry *first,
 		 * careful, the distance between value and insn
 		 * can be larger than MAX_LONG:
 		 */
-		if (mid->insn < value)
+		if (ex_to_insn(mid) < value)
 			first = mid + 1;
-		else if (mid->insn > value)
+		else if (ex_to_insn(mid) > value)
 			last = mid - 1;
 		else
 			return mid;
-        }
-        return NULL;
+	}
+	return NULL;
 }
 #endif

From 525787eea48f8c6a1630e6dab07313896cfc6b8d Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 1 Jan 2016 15:02:12 +0100
Subject: [PATCH 780/797] arm64: switch to relative exception tables

Instead of using absolute addresses for both the exception location
and the fixup, use offsets relative to the exception table entry values.
Not only does this cut the size of the exception table in half, it is
also a prerequisite for KASLR, since absolute exception table entries
are subject to dynamic relocation, which is incompatible with the sorting
of the exception table that occurs at build time.

This patch also introduces the _ASM_EXTABLE preprocessor macro (which
exists on x86 as well) and its _asm_extable assembly counterpart, as
shorthands to emit exception table entries.

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 6c94f27ac847ff8ef15b3da5b200574923bd6287)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/alternative.h    | 19 +++++-----------
 arch/arm64/include/asm/assembler.h      | 15 +++++++++----
 arch/arm64/include/asm/futex.h          | 12 ++++------
 arch/arm64/include/asm/uaccess.h        | 30 +++++++++++++------------
 arch/arm64/include/asm/word-at-a-time.h |  7 +++---
 arch/arm64/kernel/armv8_deprecated.c    |  7 ++----
 arch/arm64/mm/extable.c                 |  2 +-
 scripts/sortextable.c                   |  2 +-
 8 files changed, 43 insertions(+), 51 deletions(-)

diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index a9fc24ec1aa9..beccbdefa106 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -157,11 +157,8 @@ void apply_alternatives(void *start, size_t length);
 			add	\addr, \addr, \post_inc;
 		alternative_endif
 
-		.section __ex_table,"a";
-		.align	3;
-		.quad	8888b,\l;
-		.quad	8889b,\l;
-		.previous;
+		_asm_extable	8888b,\l;
+		_asm_extable	8889b,\l;
 	.endm
 
 	.macro uao_stp l, reg1, reg2, addr, post_inc
@@ -175,11 +172,8 @@ void apply_alternatives(void *start, size_t length);
 			add	\addr, \addr, \post_inc;
 		alternative_endif
 
-		.section __ex_table,"a";
-		.align	3;
-		.quad	8888b,\l;
-		.quad	8889b,\l;
-		.previous
+		_asm_extable	8888b,\l;
+		_asm_extable	8889b,\l;
 	.endm
 
 	.macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
@@ -191,10 +185,7 @@ void apply_alternatives(void *start, size_t length);
 			add		\addr, \addr, \post_inc;
 		alternative_endif
 
-		.section __ex_table,"a";
-		.align	3;
-		.quad	8888b,\l;
-		.previous
+		_asm_extable	8888b,\l;
 	.endm
 #else
 	.macro uao_ldp l, reg1, reg2, addr, post_inc
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index ba5aff6c830e..70f7b9e04598 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -94,12 +94,19 @@
 	dmb	\opt
 	.endm
 
+/*
+ * Emit an entry into the exception table
+ */
+	.macro		_asm_extable, from, to
+	.pushsection	__ex_table, "a"
+	.align		3
+	.long		(\from - .), (\to - .)
+	.popsection
+	.endm
+
 #define USER(l, x...)				\
 9999:	x;					\
-	.section __ex_table,"a";		\
-	.align	3;				\
-	.quad	9999b,l;			\
-	.previous
+	_asm_extable	9999b, l
 
 /*
  * Register aliases.
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index 5f3ab8c1db55..f2585cdd32c2 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -42,10 +42,8 @@
 "4:	mov	%w0, %w5\n"						\
 "	b	3b\n"							\
 "	.popsection\n"							\
-"	.pushsection __ex_table,\"a\"\n"				\
-"	.align	3\n"							\
-"	.quad	1b, 4b, 2b, 4b\n"					\
-"	.popsection\n"							\
+	_ASM_EXTABLE(1b, 4b)						\
+	_ASM_EXTABLE(2b, 4b)						\
 	ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN,		\
 		    CONFIG_ARM64_PAN)					\
 	: "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp)	\
@@ -134,10 +132,8 @@ ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
 "4:	mov	%w0, %w6\n"
 "	b	3b\n"
 "	.popsection\n"
-"	.pushsection __ex_table,\"a\"\n"
-"	.align	3\n"
-"	.quad	1b, 4b, 2b, 4b\n"
-"	.popsection\n"
+	_ASM_EXTABLE(1b, 4b)
+	_ASM_EXTABLE(2b, 4b)
 ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
 	: "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp)
 	: "r" (oldval), "r" (newval), "Ir" (-EFAULT)
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 16ba0d5c9740..0685d74572af 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -36,11 +36,11 @@
 #define VERIFY_WRITE 1
 
 /*
- * The exception table consists of pairs of addresses: the first is the
- * address of an instruction that is allowed to fault, and the second is
- * the address at which the program should continue.  No registers are
- * modified, so it is entirely up to the continuation code to figure out
- * what to do.
+ * The exception table consists of pairs of relative offsets: the first
+ * is the relative offset to an instruction that is allowed to fault,
+ * and the second is the relative offset at which the program should
+ * continue. No registers are modified, so it is entirely up to the
+ * continuation code to figure out what to do.
  *
  * All the routines below use bits of fixup code that are out of line
  * with the main instruction path.  This means when everything is well,
@@ -50,9 +50,11 @@
 
 struct exception_table_entry
 {
-	unsigned long insn, fixup;
+	int insn, fixup;
 };
 
+#define ARCH_HAS_RELATIVE_EXTABLE
+
 extern int fixup_exception(struct pt_regs *regs);
 
 #define KERNEL_DS	(-1UL)
@@ -115,6 +117,12 @@ static inline void set_fs(mm_segment_t fs)
 #define access_ok(type, addr, size)	__range_ok(addr, size)
 #define user_addr_max			get_fs
 
+#define _ASM_EXTABLE(from, to)						\
+	"	.pushsection	__ex_table, \"a\"\n"			\
+	"	.align		3\n"					\
+	"	.long		(" #from " - .), (" #to " - .)\n"	\
+	"	.popsection\n"
+
 /*
  * The "__xxx" versions of the user access functions do not verify the address
  * space - it must have been done previously with a separate "access_ok()"
@@ -134,10 +142,7 @@ static inline void set_fs(mm_segment_t fs)
 	"	mov	%1, #0\n"					\
 	"	b	2b\n"						\
 	"	.previous\n"						\
-	"	.section __ex_table,\"a\"\n"				\
-	"	.align	3\n"						\
-	"	.quad	1b, 3b\n"					\
-	"	.previous"						\
+	_ASM_EXTABLE(1b, 3b)						\
 	: "+r" (err), "=&r" (x)						\
 	: "r" (addr), "i" (-EFAULT))
 
@@ -206,10 +211,7 @@ do {									\
 	"3:	mov	%w0, %3\n"					\
 	"	b	2b\n"						\
 	"	.previous\n"						\
-	"	.section __ex_table,\"a\"\n"				\
-	"	.align	3\n"						\
-	"	.quad	1b, 3b\n"					\
-	"	.previous"						\
+	_ASM_EXTABLE(1b, 3b)						\
 	: "+r" (err)							\
 	: "r" (x), "r" (addr), "i" (-EFAULT))
 
diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h
index aab5bf09e9d9..2b79b8a89457 100644
--- a/arch/arm64/include/asm/word-at-a-time.h
+++ b/arch/arm64/include/asm/word-at-a-time.h
@@ -16,6 +16,8 @@
 #ifndef __ASM_WORD_AT_A_TIME_H
 #define __ASM_WORD_AT_A_TIME_H
 
+#include <asm/uaccess.h>
+
 #ifndef __AARCH64EB__
 
 #include <linux/kernel.h>
@@ -81,10 +83,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr)
 #endif
 	"	b	2b\n"
 	"	.popsection\n"
-	"	.pushsection __ex_table,\"a\"\n"
-	"	.align	3\n"
-	"	.quad	1b, 3b\n"
-	"	.popsection"
+	_ASM_EXTABLE(1b, 3b)
 	: "=&r" (ret), "=&r" (offset)
 	: "r" (addr), "Q" (*(unsigned long *)addr));
 
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index 3e01207917b1..c37202c0c838 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -297,11 +297,8 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table)
 	"4:	mov		%w0, %w5\n"			\
 	"	b		3b\n"				\
 	"	.popsection"					\
-	"	.pushsection	 __ex_table,\"a\"\n"		\
-	"	.align		3\n"				\
-	"	.quad		0b, 4b\n"			\
-	"	.quad		1b, 4b\n"			\
-	"	.popsection\n"					\
+	_ASM_EXTABLE(0b, 4b)					\
+	_ASM_EXTABLE(1b, 4b)					\
 	ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN,	\
 		CONFIG_ARM64_PAN)				\
 	: "=&r" (res), "+r" (data), "=&r" (temp)		\
diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c
index 79444279ba8c..81acd4706878 100644
--- a/arch/arm64/mm/extable.c
+++ b/arch/arm64/mm/extable.c
@@ -11,7 +11,7 @@ int fixup_exception(struct pt_regs *regs)
 
 	fixup = search_exception_tables(instruction_pointer(regs));
 	if (fixup)
-		regs->pc = fixup->fixup;
+		regs->pc = (unsigned long)&fixup->fixup + fixup->fixup;
 
 	return fixup != NULL;
 }
diff --git a/scripts/sortextable.c b/scripts/sortextable.c
index ecefa0a634f8..19d83647846c 100644
--- a/scripts/sortextable.c
+++ b/scripts/sortextable.c
@@ -282,12 +282,12 @@ do_file(char const *const fname)
 	case EM_386:
 	case EM_X86_64:
 	case EM_S390:
+	case EM_AARCH64:
 		custom_sort = sort_relative_table;
 		break;
 	case EM_ARCOMPACT:
 	case EM_ARCV2:
 	case EM_ARM:
-	case EM_AARCH64:
 	case EM_MICROBLAZE:
 	case EM_MIPS:
 	case EM_XTENSA:

From 89328d41aa99df071dadb43c722cd88ffafc77e2 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 26 Jan 2016 09:13:44 +0100
Subject: [PATCH 781/797] arm64: add support for building vmlinux as a
 relocatable PIE binary

This implements CONFIG_RELOCATABLE, which links the final vmlinux
image with a dynamic relocation section, allowing the early boot code
to perform a relocation to a different virtual address at runtime.

This is a prerequisite for KASLR (CONFIG_RANDOMIZE_BASE).

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 1e48ef7fcc374051730381a2a05da77eb4eafdb0)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/Kconfig              | 11 +++++++++++
 arch/arm64/Makefile             |  4 ++++
 arch/arm64/include/asm/elf.h    |  2 ++
 arch/arm64/kernel/head.S        | 32 ++++++++++++++++++++++++++++++++
 arch/arm64/kernel/vmlinux.lds.S | 16 ++++++++++++++++
 5 files changed, 65 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 22db20491733..ac1475f559e6 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -737,6 +737,17 @@ config ARM64_MODULE_PLTS
 	select ARM64_MODULE_CMODEL_LARGE
 	select HAVE_MOD_ARCH_SPECIFIC
 
+config RELOCATABLE
+	bool
+	help
+	  This builds the kernel as a Position Independent Executable (PIE),
+	  which retains all relocation metadata required to relocate the
+	  kernel binary at runtime to a different virtual address than the
+	  address it was linked at.
+	  Since AArch64 uses the RELA relocation format, this requires a
+	  relocation pass at runtime even if the kernel is loaded at the
+	  same address it was linked at.
+
 endmenu
 
 menu "Boot options"
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 71054a38decf..304dcc3da06f 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -15,6 +15,10 @@ CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET)
 OBJCOPYFLAGS	:=-O binary -R .note -R .note.gnu.build-id -R .comment -S
 GZFLAGS		:=-9
 
+ifneq ($(CONFIG_RELOCATABLE),)
+LDFLAGS_vmlinux		+= -pie
+endif
+
 KBUILD_DEFCONFIG := defconfig
 
 # Check for binutils support for specific extensions
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index 435f55952e1f..24ed037f09fd 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -77,6 +77,8 @@
 #define R_AARCH64_MOVW_PREL_G2_NC	292
 #define R_AARCH64_MOVW_PREL_G3		293
 
+#define R_AARCH64_RELATIVE		1027
+
 /*
  * These are used to set parameters in the core dumps.
  */
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 4cad8f9f2268..4e69412a7323 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -29,6 +29,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/cache.h>
 #include <asm/cputype.h>
+#include <asm/elf.h>
 #include <asm/kernel-pgtable.h>
 #include <asm/memory.h>
 #include <asm/pgtable-hwdef.h>
@@ -432,6 +433,37 @@ __mmap_switched:
 	bl	__pi_memset
 	dsb	ishst				// Make zero page visible to PTW
 
+#ifdef CONFIG_RELOCATABLE
+
+	/*
+	 * Iterate over each entry in the relocation table, and apply the
+	 * relocations in place.
+	 */
+	adr_l	x8, __dynsym_start		// start of symbol table
+	adr_l	x9, __reloc_start		// start of reloc table
+	adr_l	x10, __reloc_end		// end of reloc table
+
+0:	cmp	x9, x10
+	b.hs	2f
+	ldp	x11, x12, [x9], #24
+	ldr	x13, [x9, #-8]
+	cmp	w12, #R_AARCH64_RELATIVE
+	b.ne	1f
+	str	x13, [x11]
+	b	0b
+
+1:	cmp	w12, #R_AARCH64_ABS64
+	b.ne	0b
+	add	x12, x12, x12, lsl #1		// symtab offset: 24x top word
+	add	x12, x8, x12, lsr #(32 - 3)	// ... shifted into bottom word
+	ldr	x15, [x12, #8]			// Elf64_Sym::st_value
+	add	x15, x13, x15
+	str	x15, [x11]
+	b	0b
+
+2:
+#endif
+
 	adr_l	sp, initial_sp, x4
 	mov	x4, sp
 	and	x4, x4, #~(THREAD_SIZE - 1)
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 282e3e64a17e..e3f6cd740ea3 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -87,6 +87,7 @@ SECTIONS
 		EXIT_CALL
 		*(.discard)
 		*(.discard.*)
+		*(.interp .dynamic)
 	}
 
 	. = KIMAGE_VADDR + TEXT_OFFSET;
@@ -149,6 +150,21 @@ SECTIONS
 	.altinstr_replacement : {
 		*(.altinstr_replacement)
 	}
+	.rela : ALIGN(8) {
+		__reloc_start = .;
+		*(.rela .rela*)
+		__reloc_end = .;
+	}
+	.dynsym : ALIGN(8) {
+		__dynsym_start = .;
+		*(.dynsym)
+	}
+	.dynstr : {
+		*(.dynstr)
+	}
+	.hash : {
+		*(.hash)
+	}
 
 	. = ALIGN(PAGE_SIZE);
 	__init_end = .;

From d0a12e9199c75cad71361f746ac40e4612945a43 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 26 Jan 2016 14:12:01 +0100
Subject: [PATCH 782/797] arm64: add support for kernel ASLR

This adds support for KASLR is implemented, based on entropy provided by
the bootloader in the /chosen/kaslr-seed DT property. Depending on the size
of the address space (VA_BITS) and the page size, the entropy in the
virtual displacement is up to 13 bits (16k/2 levels) and up to 25 bits (all
4 levels), with the sidenote that displacements that result in the kernel
image straddling a 1GB/32MB/512MB alignment boundary (for 4KB/16KB/64KB
granule kernels, respectively) are not allowed, and will be rounded up to
an acceptable value.

If CONFIG_RANDOMIZE_MODULE_REGION_FULL is enabled, the module region is
randomized independently from the core kernel. This makes it less likely
that the location of core kernel data structures can be determined by an
adversary, but causes all function calls from modules into the core kernel
to be resolved via entries in the module PLTs.

If CONFIG_RANDOMIZE_MODULE_REGION_FULL is not enabled, the module region is
randomized by choosing a page aligned 128 MB region inside the interval
[_etext - 128 MB, _stext + 128 MB). This gives between 10 and 14 bits of
entropy (depending on page size), independently of the kernel randomization,
but still guarantees that modules are within the range of relative branch
and jump instructions (with the caveat that, since the module region is
shared with other uses of the vmalloc area, modules may need to be loaded
further away if the module region is exhausted)

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit f80fb3a3d50843a401dac4b566b3b131da8077a2)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/Kconfig              |  29 ++++++
 arch/arm64/include/asm/memory.h |   5 +-
 arch/arm64/include/asm/module.h |   6 ++
 arch/arm64/kernel/Makefile      |   1 +
 arch/arm64/kernel/head.S        |  59 +++++++++--
 arch/arm64/kernel/kaslr.c       | 173 ++++++++++++++++++++++++++++++++
 arch/arm64/kernel/module.c      |   3 +-
 arch/arm64/kernel/setup.c       |  29 ++++++
 arch/arm64/mm/kasan_init.c      |  17 +++-
 arch/arm64/mm/mmu.c             |  29 ++++--
 10 files changed, 329 insertions(+), 22 deletions(-)
 create mode 100644 arch/arm64/kernel/kaslr.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index ac1475f559e6..b311ac23c989 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -748,6 +748,35 @@ config RELOCATABLE
 	  relocation pass at runtime even if the kernel is loaded at the
 	  same address it was linked at.
 
+config RANDOMIZE_BASE
+	bool "Randomize the address of the kernel image"
+	select ARM64_MODULE_PLTS
+	select RELOCATABLE
+	help
+	  Randomizes the virtual address at which the kernel image is
+	  loaded, as a security feature that deters exploit attempts
+	  relying on knowledge of the location of kernel internals.
+
+	  It is the bootloader's job to provide entropy, by passing a
+	  random u64 value in /chosen/kaslr-seed at kernel entry.
+
+	  If unsure, say N.
+
+config RANDOMIZE_MODULE_REGION_FULL
+	bool "Randomize the module region independently from the core kernel"
+	depends on RANDOMIZE_BASE
+	default y
+	help
+	  Randomizes the location of the module region without considering the
+	  location of the core kernel. This way, it is impossible for modules
+	  to leak information about the location of core kernel data structures
+	  but it does imply that function calls between modules and the core
+	  kernel will need to be resolved via veneers in the module PLT.
+
+	  When this option is not set, the module region will be randomized over
+	  a limited range that contains the [_stext, _etext] interval of the
+	  core kernel, so branch relocations are always in range.
+
 endmenu
 
 menu "Boot options"
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index eb798156cf56..5f8667a99e41 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -53,7 +53,7 @@
 #define KIMAGE_VADDR		(MODULES_END)
 #define MODULES_END		(MODULES_VADDR + MODULES_VSIZE)
 #define MODULES_VADDR		(VA_START + KASAN_SHADOW_SIZE)
-#define MODULES_VSIZE		(SZ_64M)
+#define MODULES_VSIZE		(SZ_128M)
 #define PCI_IO_END		(PAGE_OFFSET - SZ_2M)
 #define PCI_IO_START		(PCI_IO_END - PCI_IO_SIZE)
 #define FIXADDR_TOP		(PCI_IO_START - SZ_2M)
@@ -139,6 +139,9 @@ extern phys_addr_t		memstart_addr;
 /* PHYS_OFFSET - the physical address of the start of memory. */
 #define PHYS_OFFSET		({ VM_BUG_ON(memstart_addr & 1); memstart_addr; })
 
+/* the virtual base of the kernel image (minus TEXT_OFFSET) */
+extern u64			kimage_vaddr;
+
 /* the offset between the kernel virtual and physical mappings */
 extern u64			kimage_voffset;
 
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index 8652fb613304..e12af6754634 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -31,4 +31,10 @@ struct mod_arch_specific {
 u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela,
 			  Elf64_Sym *sym);
 
+#ifdef CONFIG_RANDOMIZE_BASE
+extern u64 module_alloc_base;
+#else
+#define module_alloc_base	((u64)_etext - MODULES_VSIZE)
+#endif
+
 #endif /* __ASM_MODULE_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 8d971f9c6ed5..49a2430b0786 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -43,6 +43,7 @@ arm64-obj-$(CONFIG_PCI)			+= pci.o
 arm64-obj-$(CONFIG_ARMV8_DEPRECATED)	+= armv8_deprecated.o
 arm64-obj-$(CONFIG_ACPI)		+= acpi.o
 arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL)	+= acpi_parking_protocol.o
+arm64-obj-$(CONFIG_RANDOMIZE_BASE)	+= kaslr.o
 
 obj-y					+= $(arm64-obj-y) vdso/
 obj-m					+= $(arm64-obj-m)
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 4e69412a7323..319f896c6e74 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -210,6 +210,7 @@ section_table:
 ENTRY(stext)
 	bl	preserve_boot_args
 	bl	el2_setup			// Drop to EL1, w20=cpu_boot_mode
+	mov	x23, xzr			// KASLR offset, defaults to 0
 	adrp	x24, __PHYS_OFFSET
 	bl	set_cpu_boot_mode_flag
 	bl	__create_page_tables		// x25=TTBR0, x26=TTBR1
@@ -313,7 +314,7 @@ ENDPROC(preserve_boot_args)
 __create_page_tables:
 	adrp	x25, idmap_pg_dir
 	adrp	x26, swapper_pg_dir
-	mov	x27, lr
+	mov	x28, lr
 
 	/*
 	 * Invalidate the idmap and swapper page tables to avoid potential
@@ -392,6 +393,7 @@ __create_page_tables:
 	 */
 	mov	x0, x26				// swapper_pg_dir
 	ldr	x5, =KIMAGE_VADDR
+	add	x5, x5, x23			// add KASLR displacement
 	create_pgd_entry x0, x5, x3, x6
 	ldr	w6, kernel_img_size
 	add	x6, x6, x5
@@ -408,8 +410,7 @@ __create_page_tables:
 	dmb	sy
 	bl	__inval_cache_range
 
-	mov	lr, x27
-	ret
+	ret	x28
 ENDPROC(__create_page_tables)
 
 kernel_img_size:
@@ -421,6 +422,7 @@ kernel_img_size:
  */
 	.set	initial_sp, init_thread_union + THREAD_START_SP
 __mmap_switched:
+	mov	x28, lr				// preserve LR
 	adr_l	x8, vectors			// load VBAR_EL1 with virtual
 	msr	vbar_el1, x8			// vector table address
 	isb
@@ -449,19 +451,26 @@ __mmap_switched:
 	ldr	x13, [x9, #-8]
 	cmp	w12, #R_AARCH64_RELATIVE
 	b.ne	1f
-	str	x13, [x11]
+	add	x13, x13, x23			// relocate
+	str	x13, [x11, x23]
 	b	0b
 
 1:	cmp	w12, #R_AARCH64_ABS64
 	b.ne	0b
 	add	x12, x12, x12, lsl #1		// symtab offset: 24x top word
 	add	x12, x8, x12, lsr #(32 - 3)	// ... shifted into bottom word
+	ldrsh	w14, [x12, #6]			// Elf64_Sym::st_shndx
 	ldr	x15, [x12, #8]			// Elf64_Sym::st_value
+	cmp	w14, #-0xf			// SHN_ABS (0xfff1) ?
+	add	x14, x15, x23			// relocate
+	csel	x15, x14, x15, ne
 	add	x15, x13, x15
-	str	x15, [x11]
+	str	x15, [x11, x23]
 	b	0b
 
-2:
+2:	adr_l	x8, kimage_vaddr		// make relocated kimage_vaddr
+	dc	cvac, x8			// value visible to secondaries
+	dsb	sy				// with MMU off
 #endif
 
 	adr_l	sp, initial_sp, x4
@@ -470,13 +479,23 @@ __mmap_switched:
 	msr	sp_el0, x4			// Save thread_info
 	str_l	x21, __fdt_pointer, x5		// Save FDT pointer
 
-	ldr	x4, =KIMAGE_VADDR		// Save the offset between
+	ldr_l	x4, kimage_vaddr		// Save the offset between
 	sub	x4, x4, x24			// the kernel virtual and
 	str_l	x4, kimage_voffset, x5		// physical mappings
 
 	mov	x29, #0
 #ifdef CONFIG_KASAN
 	bl	kasan_early_init
+#endif
+#ifdef CONFIG_RANDOMIZE_BASE
+	cbnz	x23, 0f				// already running randomized?
+	mov	x0, x21				// pass FDT address in x0
+	bl	kaslr_early_init		// parse FDT for KASLR options
+	cbz	x0, 0f				// KASLR disabled? just proceed
+	mov	x23, x0				// record KASLR offset
+	ret	x28				// we must enable KASLR, return
+						// to __enable_mmu()
+0:
 #endif
 	b	start_kernel
 ENDPROC(__mmap_switched)
@@ -486,6 +505,10 @@ ENDPROC(__mmap_switched)
  * hotplug and needs to have the same protections as the text region
  */
 	.section ".text","ax"
+
+ENTRY(kimage_vaddr)
+	.quad		_text - TEXT_OFFSET
+
 /*
  * If we're fortunate enough to boot at EL2, ensure that the world is
  * sane before dropping to EL1.
@@ -651,7 +674,7 @@ ENTRY(secondary_startup)
 	adrp	x26, swapper_pg_dir
 	bl	__cpu_setup			// initialise processor
 
-	ldr	x8, =KIMAGE_VADDR
+	ldr	x8, kimage_vaddr
 	ldr	w9, 0f
 	sub	x27, x8, w9, sxtw		// address to jump to after enabling the MMU
 	b	__enable_mmu
@@ -684,6 +707,7 @@ ENDPROC(__secondary_switched)
  */
 	.section	".idmap.text", "ax"
 __enable_mmu:
+	mrs	x18, sctlr_el1			// preserve old SCTLR_EL1 value
 	mrs	x1, ID_AA64MMFR0_EL1
 	ubfx	x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4
 	cmp	x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
@@ -701,6 +725,25 @@ __enable_mmu:
 	ic	iallu
 	dsb	nsh
 	isb
+#ifdef CONFIG_RANDOMIZE_BASE
+	mov	x19, x0				// preserve new SCTLR_EL1 value
+	blr	x27
+
+	/*
+	 * If we return here, we have a KASLR displacement in x23 which we need
+	 * to take into account by discarding the current kernel mapping and
+	 * creating a new one.
+	 */
+	msr	sctlr_el1, x18			// disable the MMU
+	isb
+	bl	__create_page_tables		// recreate kernel mapping
+
+	msr	sctlr_el1, x19			// re-enable the MMU
+	isb
+	ic	ialluis				// flush instructions fetched
+	isb					// via old mapping
+	add	x27, x27, x23			// relocated __mmap_switched
+#endif
 	br	x27
 ENDPROC(__enable_mmu)
 
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
new file mode 100644
index 000000000000..8b32a1f8f09f
--- /dev/null
+++ b/arch/arm64/kernel/kaslr.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/crc32.h>
+#include <linux/init.h>
+#include <linux/libfdt.h>
+#include <linux/mm_types.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+
+#include <asm/fixmap.h>
+#include <asm/kernel-pgtable.h>
+#include <asm/memory.h>
+#include <asm/mmu.h>
+#include <asm/pgtable.h>
+#include <asm/sections.h>
+
+u64 __read_mostly module_alloc_base;
+
+static __init u64 get_kaslr_seed(void *fdt)
+{
+	int node, len;
+	u64 *prop;
+	u64 ret;
+
+	node = fdt_path_offset(fdt, "/chosen");
+	if (node < 0)
+		return 0;
+
+	prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
+	if (!prop || len != sizeof(u64))
+		return 0;
+
+	ret = fdt64_to_cpu(*prop);
+	*prop = 0;
+	return ret;
+}
+
+static __init const u8 *get_cmdline(void *fdt)
+{
+	static __initconst const u8 default_cmdline[] = CONFIG_CMDLINE;
+
+	if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
+		int node;
+		const u8 *prop;
+
+		node = fdt_path_offset(fdt, "/chosen");
+		if (node < 0)
+			goto out;
+
+		prop = fdt_getprop(fdt, node, "bootargs", NULL);
+		if (!prop)
+			goto out;
+		return prop;
+	}
+out:
+	return default_cmdline;
+}
+
+extern void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size,
+				       pgprot_t prot);
+
+/*
+ * This routine will be executed with the kernel mapped at its default virtual
+ * address, and if it returns successfully, the kernel will be remapped, and
+ * start_kernel() will be executed from a randomized virtual offset. The
+ * relocation will result in all absolute references (e.g., static variables
+ * containing function pointers) to be reinitialized, and zero-initialized
+ * .bss variables will be reset to 0.
+ */
+u64 __init kaslr_early_init(u64 dt_phys)
+{
+	void *fdt;
+	u64 seed, offset, mask, module_range;
+	const u8 *cmdline, *str;
+	int size;
+
+	/*
+	 * Set a reasonable default for module_alloc_base in case
+	 * we end up running with module randomization disabled.
+	 */
+	module_alloc_base = (u64)_etext - MODULES_VSIZE;
+
+	/*
+	 * Try to map the FDT early. If this fails, we simply bail,
+	 * and proceed with KASLR disabled. We will make another
+	 * attempt at mapping the FDT in setup_machine()
+	 */
+	early_fixmap_init();
+	fdt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL);
+	if (!fdt)
+		return 0;
+
+	/*
+	 * Retrieve (and wipe) the seed from the FDT
+	 */
+	seed = get_kaslr_seed(fdt);
+	if (!seed)
+		return 0;
+
+	/*
+	 * Check if 'nokaslr' appears on the command line, and
+	 * return 0 if that is the case.
+	 */
+	cmdline = get_cmdline(fdt);
+	str = strstr(cmdline, "nokaslr");
+	if (str == cmdline || (str > cmdline && *(str - 1) == ' '))
+		return 0;
+
+	/*
+	 * OK, so we are proceeding with KASLR enabled. Calculate a suitable
+	 * kernel image offset from the seed. Let's place the kernel in the
+	 * lower half of the VMALLOC area (VA_BITS - 2).
+	 * Even if we could randomize at page granularity for 16k and 64k pages,
+	 * let's always round to 2 MB so we don't interfere with the ability to
+	 * map using contiguous PTEs
+	 */
+	mask = ((1UL << (VA_BITS - 2)) - 1) & ~(SZ_2M - 1);
+	offset = seed & mask;
+
+	/*
+	 * The kernel Image should not extend across a 1GB/32MB/512MB alignment
+	 * boundary (for 4KB/16KB/64KB granule kernels, respectively). If this
+	 * happens, increase the KASLR offset by the size of the kernel image.
+	 */
+	if ((((u64)_text + offset) >> SWAPPER_TABLE_SHIFT) !=
+	    (((u64)_end + offset) >> SWAPPER_TABLE_SHIFT))
+		offset = (offset + (u64)(_end - _text)) & mask;
+
+	if (IS_ENABLED(CONFIG_KASAN))
+		/*
+		 * KASAN does not expect the module region to intersect the
+		 * vmalloc region, since shadow memory is allocated for each
+		 * module at load time, whereas the vmalloc region is shadowed
+		 * by KASAN zero pages. So keep modules out of the vmalloc
+		 * region if KASAN is enabled.
+		 */
+		return offset;
+
+	if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) {
+		/*
+		 * Randomize the module region independently from the core
+		 * kernel. This prevents modules from leaking any information
+		 * about the address of the kernel itself, but results in
+		 * branches between modules and the core kernel that are
+		 * resolved via PLTs. (Branches between modules will be
+		 * resolved normally.)
+		 */
+		module_range = VMALLOC_END - VMALLOC_START - MODULES_VSIZE;
+		module_alloc_base = VMALLOC_START;
+	} else {
+		/*
+		 * Randomize the module region by setting module_alloc_base to
+		 * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE,
+		 * _stext) . This guarantees that the resulting region still
+		 * covers [_stext, _etext], and that all relative branches can
+		 * be resolved without veneers.
+		 */
+		module_range = MODULES_VSIZE - (u64)(_etext - _stext);
+		module_alloc_base = (u64)_etext + offset - MODULES_VSIZE;
+	}
+
+	/* use the lower 21 bits to randomize the base of the module region */
+	module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21;
+	module_alloc_base &= PAGE_MASK;
+
+	return offset;
+}
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index a9dde97f5ca5..7f316982ce00 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -34,7 +34,8 @@ void *module_alloc(unsigned long size)
 {
 	void *p;
 
-	p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END,
+	p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
+				module_alloc_base + MODULES_VSIZE,
 				GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
 				NUMA_NO_NODE, __builtin_return_address(0));
 
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index cfed56f0ad26..42371f69def3 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -388,3 +388,32 @@ static int __init topology_init(void)
 	return 0;
 }
 subsys_initcall(topology_init);
+
+/*
+ * Dump out kernel offset information on panic.
+ */
+static int dump_kernel_offset(struct notifier_block *self, unsigned long v,
+			      void *p)
+{
+	u64 const kaslr_offset = kimage_vaddr - KIMAGE_VADDR;
+
+	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset > 0) {
+		pr_emerg("Kernel Offset: 0x%llx from 0x%lx\n",
+			 kaslr_offset, KIMAGE_VADDR);
+	} else {
+		pr_emerg("Kernel Offset: disabled\n");
+	}
+	return 0;
+}
+
+static struct notifier_block kernel_offset_notifier = {
+	.notifier_call = dump_kernel_offset
+};
+
+static int __init register_kernel_offset_dumper(void)
+{
+	atomic_notifier_chain_register(&panic_notifier_list,
+				       &kernel_offset_notifier);
+	return 0;
+}
+__initcall(register_kernel_offset_dumper);
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index 7f10cc91fa8a..56e19d150c21 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -129,12 +129,16 @@ static void __init clear_pgds(unsigned long start,
 void __init kasan_init(void)
 {
 	u64 kimg_shadow_start, kimg_shadow_end;
+	u64 mod_shadow_start, mod_shadow_end;
 	struct memblock_region *reg;
 	int i;
 
 	kimg_shadow_start = (u64)kasan_mem_to_shadow(_text);
 	kimg_shadow_end = (u64)kasan_mem_to_shadow(_end);
 
+	mod_shadow_start = (u64)kasan_mem_to_shadow((void *)MODULES_VADDR);
+	mod_shadow_end = (u64)kasan_mem_to_shadow((void *)MODULES_END);
+
 	/*
 	 * We are going to perform proper setup of shadow memory.
 	 * At first we should unmap early shadow (clear_pgds() call bellow).
@@ -158,13 +162,20 @@ void __init kasan_init(void)
 	 * with PMD table mappings at the edges of the shadow region for the
 	 * kernel image.
 	 */
-	if (ARM64_SWAPPER_USES_SECTION_MAPS)
+	if (ARM64_SWAPPER_USES_SECTION_MAPS) {
+		kimg_shadow_start = round_down(kimg_shadow_start,
+					       SWAPPER_BLOCK_SIZE);
 		kimg_shadow_end = round_up(kimg_shadow_end, SWAPPER_BLOCK_SIZE);
+	}
 
 	kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
-			kasan_mem_to_shadow((void *)MODULES_VADDR));
+				   (void *)mod_shadow_start);
 	kasan_populate_zero_shadow((void *)kimg_shadow_end,
-			kasan_mem_to_shadow((void *)PAGE_OFFSET));
+				   kasan_mem_to_shadow((void *)PAGE_OFFSET));
+
+	if (kimg_shadow_start > mod_shadow_end)
+		kasan_populate_zero_shadow((void *)mod_shadow_end,
+					   (void *)kimg_shadow_start);
 
 	for_each_memblock(memory, reg) {
 		void *start = (void *)__phys_to_virt(reg->base);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index fb5c872fe3d6..ff0f5a46b552 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -676,7 +676,8 @@ void __init early_fixmap_init(void)
 	unsigned long addr = FIXADDR_START;
 
 	pgd = pgd_offset_k(addr);
-	if (CONFIG_PGTABLE_LEVELS > 3 && !pgd_none(*pgd)) {
+	if (CONFIG_PGTABLE_LEVELS > 3 &&
+	    !(pgd_none(*pgd) || pgd_page_paddr(*pgd) == __pa(bm_pud))) {
 		/*
 		 * We only end up here if the kernel mapping and the fixmap
 		 * share the top level pgd entry, which should only happen on
@@ -733,11 +734,10 @@ void __set_fixmap(enum fixed_addresses idx,
 	}
 }
 
-void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
+void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot)
 {
 	const u64 dt_virt_base = __fix_to_virt(FIX_FDT);
-	pgprot_t prot = PAGE_KERNEL_RO;
-	int size, offset;
+	int offset;
 	void *dt_virt;
 
 	/*
@@ -776,16 +776,27 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
 	if (fdt_check_header(dt_virt) != 0)
 		return NULL;
 
-	size = fdt_totalsize(dt_virt);
-	if (size > MAX_FDT_SIZE)
+	*size = fdt_totalsize(dt_virt);
+	if (*size > MAX_FDT_SIZE)
 		return NULL;
 
-	if (offset + size > SWAPPER_BLOCK_SIZE)
+	if (offset + *size > SWAPPER_BLOCK_SIZE)
 		create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
-			       round_up(offset + size, SWAPPER_BLOCK_SIZE), prot);
+			       round_up(offset + *size, SWAPPER_BLOCK_SIZE), prot);
+
+	return dt_virt;
+}
+
+void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
+{
+	void *dt_virt;
+	int size;
+
+	dt_virt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL_RO);
+	if (!dt_virt)
+		return NULL;
 
 	memblock_reserve(dt_phys, size);
-
 	return dt_virt;
 }
 

From 98e23ea3a3dd23269a69282291f9bef53e262bc2 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 29 Jan 2016 11:59:03 +0100
Subject: [PATCH 783/797] arm64: kaslr: randomize the linear region

When KASLR is enabled (CONFIG_RANDOMIZE_BASE=y), and entropy has been
provided by the bootloader, randomize the placement of RAM inside the
linear region if sufficient space is available. For instance, on a 4KB
granule/3 levels kernel, the linear region is 256 GB in size, and we can
choose any 1 GB aligned offset that is far enough from the top of the
address space to fit the distance between the start of the lowest memblock
and the top of the highest memblock.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit c031a4213c11a5db475f528c182f7b3858df11db)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/kaslr.c |  4 ++++
 arch/arm64/mm/init.c      | 22 ++++++++++++++++++++--
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 8b32a1f8f09f..582983920054 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -21,6 +21,7 @@
 #include <asm/sections.h>
 
 u64 __read_mostly module_alloc_base;
+u16 __initdata memstart_offset_seed;
 
 static __init u64 get_kaslr_seed(void *fdt)
 {
@@ -123,6 +124,9 @@ u64 __init kaslr_early_init(u64 dt_phys)
 	mask = ((1UL << (VA_BITS - 2)) - 1) & ~(SZ_2M - 1);
 	offset = seed & mask;
 
+	/* use the top 16 bits to randomize the linear region */
+	memstart_offset_seed = seed >> 48;
+
 	/*
 	 * The kernel Image should not extend across a 1GB/32MB/512MB alignment
 	 * boundary (for 4KB/16KB/64KB granule kernels, respectively). If this
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 2c7a3c2868e4..58a6d3f7525c 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -196,6 +196,23 @@ void __init arm64_memblock_init(void)
 		memblock_add(__pa(_text), (u64)(_end - _text));
 	}
 
+	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
+		extern u16 memstart_offset_seed;
+		u64 range = linear_region_size -
+			    (memblock_end_of_DRAM() - memblock_start_of_DRAM());
+
+		/*
+		 * If the size of the linear region exceeds, by a sufficient
+		 * margin, the size of the region that the available physical
+		 * memory spans, randomize the linear region as well.
+		 */
+		if (memstart_offset_seed > 0 && range >= ARM64_MEMSTART_ALIGN) {
+			range = range / ARM64_MEMSTART_ALIGN + 1;
+			memstart_addr -= ARM64_MEMSTART_ALIGN *
+					 ((range * memstart_offset_seed) >> 16);
+		}
+	}
+
 	/*
 	 * Register the kernel text, kernel data, initrd, and initial
 	 * pagetables with memblock.
@@ -365,12 +382,13 @@ void __init mem_init(void)
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 		  MLG(VMEMMAP_START,
 		      VMEMMAP_START + VMEMMAP_SIZE),
-		  MLM((unsigned long)virt_to_page(PAGE_OFFSET),
+		  MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()),
 		      (unsigned long)virt_to_page(high_memory)),
 #endif
 		  MLK(FIXADDR_START, FIXADDR_TOP),
 		  MLM(PCI_IO_START, PCI_IO_END),
-		  MLM(PAGE_OFFSET, (unsigned long)high_memory));
+		  MLM(__phys_to_virt(memblock_start_of_DRAM()),
+		      (unsigned long)high_memory));
 
 #undef MLK
 #undef MLM

From ee6457583818600e4c9b7f3a09d358d6ae3727b8 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Sun, 10 Jan 2016 11:29:07 +0100
Subject: [PATCH 784/797] efi: stub: implement efi_get_random_bytes() based on
 EFI_RNG_PROTOCOL

This exposes the firmware's implementation of EFI_RNG_PROTOCOL via a new
function efi_get_random_bytes().

Reviewed-by: Matt Fleming <matt@codeblueprint.co.uk>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit e4fbf4767440472f9d23b0f25a2b905e1c63b6a8)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/firmware/efi/libstub/Makefile  |  3 ++-
 drivers/firmware/efi/libstub/efistub.h |  3 +++
 drivers/firmware/efi/libstub/random.c  | 35 ++++++++++++++++++++++++++
 include/linux/efi.h                    |  6 ++++-
 4 files changed, 45 insertions(+), 2 deletions(-)
 create mode 100644 drivers/firmware/efi/libstub/random.c

diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index c0ddd1b8dca3..c4098748e1fe 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -34,7 +34,8 @@ $(obj)/lib-%.o: $(srctree)/lib/%.c FORCE
 lib-$(CONFIG_EFI_ARMSTUB)	+= arm-stub.o fdt.o string.o \
 				   $(patsubst %.c,lib-%.o,$(arm-deps))
 
-lib-$(CONFIG_ARM64)		+= arm64-stub.o
+lib-$(CONFIG_ARM)		+= arm32-stub.o
+lib-$(CONFIG_ARM64)		+= arm64-stub.o random.o
 CFLAGS_arm64-stub.o 		:= -DTEXT_OFFSET=$(TEXT_OFFSET)
 
 #
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index 6b6548fda089..206b7252b9d1 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -43,4 +43,7 @@ void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size,
 		     unsigned long desc_size, efi_memory_desc_t *runtime_map,
 		     int *count);
 
+efi_status_t efi_get_random_bytes(efi_system_table_t *sys_table,
+				  unsigned long size, u8 *out);
+
 #endif
diff --git a/drivers/firmware/efi/libstub/random.c b/drivers/firmware/efi/libstub/random.c
new file mode 100644
index 000000000000..97941ee5954f
--- /dev/null
+++ b/drivers/firmware/efi/libstub/random.c
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2016 Linaro Ltd;  <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/efi.h>
+#include <asm/efi.h>
+
+#include "efistub.h"
+
+struct efi_rng_protocol {
+	efi_status_t (*get_info)(struct efi_rng_protocol *,
+				 unsigned long *, efi_guid_t *);
+	efi_status_t (*get_rng)(struct efi_rng_protocol *,
+				efi_guid_t *, unsigned long, u8 *out);
+};
+
+efi_status_t efi_get_random_bytes(efi_system_table_t *sys_table_arg,
+				  unsigned long size, u8 *out)
+{
+	efi_guid_t rng_proto = EFI_RNG_PROTOCOL_GUID;
+	efi_status_t status;
+	struct efi_rng_protocol *rng;
+
+	status = efi_call_early(locate_protocol, &rng_proto, NULL,
+				(void **)&rng);
+	if (status != EFI_SUCCESS)
+		return status;
+
+	return rng->get_rng(rng, NULL, size, out);
+}
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 47be3ad7d3e5..333d0ca6940f 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -299,7 +299,7 @@ typedef struct {
 	void *open_protocol_information;
 	void *protocols_per_handle;
 	void *locate_handle_buffer;
-	void *locate_protocol;
+	efi_status_t (*locate_protocol)(efi_guid_t *, void *, void **);
 	void *install_multiple_protocol_interfaces;
 	void *uninstall_multiple_protocol_interfaces;
 	void *calculate_crc32;
@@ -599,6 +599,10 @@ void efi_native_runtime_setup(void);
 #define EFI_PROPERTIES_TABLE_GUID \
     EFI_GUID(  0x880aaca3, 0x4adc, 0x4a04, 0x90, 0x79, 0xb7, 0x47, 0x34, 0x08, 0x25, 0xe5 )
 
+#define EFI_RNG_PROTOCOL_GUID \
+	EFI_GUID(0x3152bca5, 0xeade, 0x433d, \
+		 0x86, 0x2e, 0xc0, 0x1c, 0xdc, 0x29, 0x1f, 0x44)
+
 typedef struct {
 	efi_guid_t guid;
 	u64 table;

From 0f01a865b4feb17ff014717ed2745a845e0c0ee3 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 11 Jan 2016 10:43:16 +0100
Subject: [PATCH 785/797] efi: stub: add implementation of efi_random_alloc()

This implements efi_random_alloc(), which allocates a chunk of memory of
a certain size at a certain alignment, and uses the random_seed argument
it receives to randomize the address of the allocation.

This is implemented by iterating over the UEFI memory map, counting the
number of suitable slots (aligned offsets) within each region, and picking
a random number between 0 and 'number of slots - 1' to select the slot,
This should guarantee that each possible offset is chosen equally likely.

Suggested-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Matt Fleming <matt@codeblueprint.co.uk>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 2ddbfc81eac84a299cb4747a8764bc43f23e9008)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/firmware/efi/libstub/efistub.h |   4 +
 drivers/firmware/efi/libstub/random.c  | 100 +++++++++++++++++++++++++
 2 files changed, 104 insertions(+)

diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index 206b7252b9d1..5ed3d3f38166 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -46,4 +46,8 @@ void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size,
 efi_status_t efi_get_random_bytes(efi_system_table_t *sys_table,
 				  unsigned long size, u8 *out);
 
+efi_status_t efi_random_alloc(efi_system_table_t *sys_table_arg,
+			      unsigned long size, unsigned long align,
+			      unsigned long *addr, unsigned long random_seed);
+
 #endif
diff --git a/drivers/firmware/efi/libstub/random.c b/drivers/firmware/efi/libstub/random.c
index 97941ee5954f..53f6d3fe6d86 100644
--- a/drivers/firmware/efi/libstub/random.c
+++ b/drivers/firmware/efi/libstub/random.c
@@ -33,3 +33,103 @@ efi_status_t efi_get_random_bytes(efi_system_table_t *sys_table_arg,
 
 	return rng->get_rng(rng, NULL, size, out);
 }
+
+/*
+ * Return the number of slots covered by this entry, i.e., the number of
+ * addresses it covers that are suitably aligned and supply enough room
+ * for the allocation.
+ */
+static unsigned long get_entry_num_slots(efi_memory_desc_t *md,
+					 unsigned long size,
+					 unsigned long align)
+{
+	u64 start, end;
+
+	if (md->type != EFI_CONVENTIONAL_MEMORY)
+		return 0;
+
+	start = round_up(md->phys_addr, align);
+	end = round_down(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - size,
+			 align);
+
+	if (start > end)
+		return 0;
+
+	return (end - start + 1) / align;
+}
+
+/*
+ * The UEFI memory descriptors have a virtual address field that is only used
+ * when installing the virtual mapping using SetVirtualAddressMap(). Since it
+ * is unused here, we can reuse it to keep track of each descriptor's slot
+ * count.
+ */
+#define MD_NUM_SLOTS(md)	((md)->virt_addr)
+
+efi_status_t efi_random_alloc(efi_system_table_t *sys_table_arg,
+			      unsigned long size,
+			      unsigned long align,
+			      unsigned long *addr,
+			      unsigned long random_seed)
+{
+	unsigned long map_size, desc_size, total_slots = 0, target_slot;
+	efi_status_t status;
+	efi_memory_desc_t *memory_map;
+	int map_offset;
+
+	status = efi_get_memory_map(sys_table_arg, &memory_map, &map_size,
+				    &desc_size, NULL, NULL);
+	if (status != EFI_SUCCESS)
+		return status;
+
+	if (align < EFI_ALLOC_ALIGN)
+		align = EFI_ALLOC_ALIGN;
+
+	/* count the suitable slots in each memory map entry */
+	for (map_offset = 0; map_offset < map_size; map_offset += desc_size) {
+		efi_memory_desc_t *md = (void *)memory_map + map_offset;
+		unsigned long slots;
+
+		slots = get_entry_num_slots(md, size, align);
+		MD_NUM_SLOTS(md) = slots;
+		total_slots += slots;
+	}
+
+	/* find a random number between 0 and total_slots */
+	target_slot = (total_slots * (u16)random_seed) >> 16;
+
+	/*
+	 * target_slot is now a value in the range [0, total_slots), and so
+	 * it corresponds with exactly one of the suitable slots we recorded
+	 * when iterating over the memory map the first time around.
+	 *
+	 * So iterate over the memory map again, subtracting the number of
+	 * slots of each entry at each iteration, until we have found the entry
+	 * that covers our chosen slot. Use the residual value of target_slot
+	 * to calculate the randomly chosen address, and allocate it directly
+	 * using EFI_ALLOCATE_ADDRESS.
+	 */
+	for (map_offset = 0; map_offset < map_size; map_offset += desc_size) {
+		efi_memory_desc_t *md = (void *)memory_map + map_offset;
+		efi_physical_addr_t target;
+		unsigned long pages;
+
+		if (target_slot >= MD_NUM_SLOTS(md)) {
+			target_slot -= MD_NUM_SLOTS(md);
+			continue;
+		}
+
+		target = round_up(md->phys_addr, align) + target_slot * align;
+		pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE;
+
+		status = efi_call_early(allocate_pages, EFI_ALLOCATE_ADDRESS,
+					EFI_LOADER_DATA, pages, &target);
+		if (status == EFI_SUCCESS)
+			*addr = target;
+		break;
+	}
+
+	efi_call_early(free_pool, memory_map);
+
+	return status;
+}

From 4a9c1460b2b904c4a9b6438a14d10c56e0e9ab78 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 11 Jan 2016 11:47:49 +0100
Subject: [PATCH 786/797] efi: stub: use high allocation for converted command
 line

Before we can move the command line processing before the allocation
of the kernel, which is required for detecting the 'nokaslr' option
which controls that allocation, move the converted command line higher
up in memory, to prevent it from interfering with the kernel itself.

Since x86 needs the address to fit in 32 bits, use UINT_MAX as the upper
bound there. Otherwise, use ULONG_MAX (i.e., no limit)

Reviewed-by: Matt Fleming <matt@codeblueprint.co.uk>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 48fcb2d0216103d15306caa4814e2381104df6d8)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/x86/include/asm/efi.h                     | 2 ++
 drivers/firmware/efi/libstub/efi-stub-helper.c | 7 ++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 0010c78c4998..08b1f2f6ea50 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -25,6 +25,8 @@
 #define EFI32_LOADER_SIGNATURE	"EL32"
 #define EFI64_LOADER_SIGNATURE	"EL64"
 
+#define MAX_CMDLINE_ADDRESS	UINT_MAX
+
 #ifdef CONFIG_X86_32
 
 
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index f07d4a67fa76..29ed2f9b218c 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -649,6 +649,10 @@ static u8 *efi_utf16_to_utf8(u8 *dst, const u16 *src, int n)
 	return dst;
 }
 
+#ifndef MAX_CMDLINE_ADDRESS
+#define MAX_CMDLINE_ADDRESS	ULONG_MAX
+#endif
+
 /*
  * Convert the unicode UEFI command line to ASCII to pass to kernel.
  * Size of memory allocated return in *cmd_line_len.
@@ -684,7 +688,8 @@ char *efi_convert_cmdline(efi_system_table_t *sys_table_arg,
 
 	options_bytes++;	/* NUL termination */
 
-	status = efi_low_alloc(sys_table_arg, options_bytes, 0, &cmdline_addr);
+	status = efi_high_alloc(sys_table_arg, options_bytes, 0,
+				&cmdline_addr, MAX_CMDLINE_ADDRESS);
 	if (status != EFI_SUCCESS)
 		return NULL;
 

From e009472925ee90986397518ef6796e6f8d12e1da Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 26 Jan 2016 14:48:29 +0100
Subject: [PATCH 787/797] arm64: efi: invoke EFI_RNG_PROTOCOL to supply KASLR
 randomness

Since arm64 does not use a decompressor that supplies an execution
environment where it is feasible to some extent to provide a source of
randomness, the arm64 KASLR kernel depends on the bootloader to supply
some random bits in the /chosen/kaslr-seed DT property upon kernel entry.

On UEFI systems, we can use the EFI_RNG_PROTOCOL, if supplied, to obtain
some random bits. At the same time, use it to randomize the offset of the
kernel Image in physical memory.

Reviewed-by: Matt Fleming <matt@codeblueprint.co.uk>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 2b5fe07a78a09a32002642b8a823428ade611f16)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/Kconfig                        |  5 ++
 drivers/firmware/efi/libstub/arm-stub.c   | 40 +++++++----
 drivers/firmware/efi/libstub/arm64-stub.c | 84 ++++++++++++++++-------
 drivers/firmware/efi/libstub/fdt.c        | 14 ++++
 4 files changed, 105 insertions(+), 38 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b311ac23c989..97583a1878db 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -760,6 +760,11 @@ config RANDOMIZE_BASE
 	  It is the bootloader's job to provide entropy, by passing a
 	  random u64 value in /chosen/kaslr-seed at kernel entry.
 
+	  When booting via the UEFI stub, it will invoke the firmware's
+	  EFI_RNG_PROTOCOL implementation (if available) to supply entropy
+	  to the kernel proper. In addition, it will randomise the physical
+	  location of the kernel Image as well.
+
 	  If unsure, say N.
 
 config RANDOMIZE_MODULE_REGION_FULL
diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c
index 950c87f5d279..d5aa1d16154f 100644
--- a/drivers/firmware/efi/libstub/arm-stub.c
+++ b/drivers/firmware/efi/libstub/arm-stub.c
@@ -18,6 +18,8 @@
 
 #include "efistub.h"
 
+bool __nokaslr;
+
 static int efi_secureboot_enabled(efi_system_table_t *sys_table_arg)
 {
 	static efi_guid_t const var_guid = EFI_GLOBAL_VARIABLE_GUID;
@@ -207,14 +209,6 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
 		pr_efi_err(sys_table, "Failed to find DRAM base\n");
 		goto fail;
 	}
-	status = handle_kernel_image(sys_table, image_addr, &image_size,
-				     &reserve_addr,
-				     &reserve_size,
-				     dram_base, image);
-	if (status != EFI_SUCCESS) {
-		pr_efi_err(sys_table, "Failed to relocate kernel\n");
-		goto fail;
-	}
 
 	/*
 	 * Get the command line from EFI, using the LOADED_IMAGE
@@ -224,7 +218,28 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
 	cmdline_ptr = efi_convert_cmdline(sys_table, image, &cmdline_size);
 	if (!cmdline_ptr) {
 		pr_efi_err(sys_table, "getting command line via LOADED_IMAGE_PROTOCOL\n");
-		goto fail_free_image;
+		goto fail;
+	}
+
+	/* check whether 'nokaslr' was passed on the command line */
+	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
+		static const u8 default_cmdline[] = CONFIG_CMDLINE;
+		const u8 *str, *cmdline = cmdline_ptr;
+
+		if (IS_ENABLED(CONFIG_CMDLINE_FORCE))
+			cmdline = default_cmdline;
+		str = strstr(cmdline, "nokaslr");
+		if (str == cmdline || (str > cmdline && *(str - 1) == ' '))
+			__nokaslr = true;
+	}
+
+	status = handle_kernel_image(sys_table, image_addr, &image_size,
+				     &reserve_addr,
+				     &reserve_size,
+				     dram_base, image);
+	if (status != EFI_SUCCESS) {
+		pr_efi_err(sys_table, "Failed to relocate kernel\n");
+		goto fail_free_cmdline;
 	}
 
 	status = efi_parse_options(cmdline_ptr);
@@ -244,7 +259,7 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
 
 		if (status != EFI_SUCCESS) {
 			pr_efi_err(sys_table, "Failed to load device tree!\n");
-			goto fail_free_cmdline;
+			goto fail_free_image;
 		}
 	}
 
@@ -286,12 +301,11 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
 	efi_free(sys_table, initrd_size, initrd_addr);
 	efi_free(sys_table, fdt_size, fdt_addr);
 
-fail_free_cmdline:
-	efi_free(sys_table, cmdline_size, (unsigned long)cmdline_ptr);
-
 fail_free_image:
 	efi_free(sys_table, image_size, *image_addr);
 	efi_free(sys_table, reserve_size, reserve_addr);
+fail_free_cmdline:
+	efi_free(sys_table, cmdline_size, (unsigned long)cmdline_ptr);
 fail:
 	return EFI_ERROR;
 }
diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c
index 78dfbd34b6bf..e0e6b74fef8f 100644
--- a/drivers/firmware/efi/libstub/arm64-stub.c
+++ b/drivers/firmware/efi/libstub/arm64-stub.c
@@ -13,6 +13,10 @@
 #include <asm/efi.h>
 #include <asm/sections.h>
 
+#include "efistub.h"
+
+extern bool __nokaslr;
+
 efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table_arg,
 					unsigned long *image_addr,
 					unsigned long *image_size,
@@ -23,26 +27,52 @@ efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table_arg,
 {
 	efi_status_t status;
 	unsigned long kernel_size, kernel_memsize = 0;
-	unsigned long nr_pages;
 	void *old_image_addr = (void *)*image_addr;
 	unsigned long preferred_offset;
+	u64 phys_seed = 0;
+
+	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
+		if (!__nokaslr) {
+			status = efi_get_random_bytes(sys_table_arg,
+						      sizeof(phys_seed),
+						      (u8 *)&phys_seed);
+			if (status == EFI_NOT_FOUND) {
+				pr_efi(sys_table_arg, "EFI_RNG_PROTOCOL unavailable, no randomness supplied\n");
+			} else if (status != EFI_SUCCESS) {
+				pr_efi_err(sys_table_arg, "efi_get_random_bytes() failed\n");
+				return status;
+			}
+		} else {
+			pr_efi(sys_table_arg, "KASLR disabled on kernel command line\n");
+		}
+	}
 
 	/*
 	 * The preferred offset of the kernel Image is TEXT_OFFSET bytes beyond
 	 * a 2 MB aligned base, which itself may be lower than dram_base, as
 	 * long as the resulting offset equals or exceeds it.
 	 */
-	preferred_offset = round_down(dram_base, SZ_2M) + TEXT_OFFSET;
+	preferred_offset = round_down(dram_base, MIN_KIMG_ALIGN) + TEXT_OFFSET;
 	if (preferred_offset < dram_base)
-		preferred_offset += SZ_2M;
+		preferred_offset += MIN_KIMG_ALIGN;
 
-	/* Relocate the image, if required. */
 	kernel_size = _edata - _text;
-	if (*image_addr != preferred_offset) {
-		kernel_memsize = kernel_size + (_end - _edata);
+	kernel_memsize = kernel_size + (_end - _edata);
 
+	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && phys_seed != 0) {
 		/*
-		 * First, try a straight allocation at the preferred offset.
+		 * If KASLR is enabled, and we have some randomness available,
+		 * locate the kernel at a randomized offset in physical memory.
+		 */
+		*reserve_size = kernel_memsize + TEXT_OFFSET;
+		status = efi_random_alloc(sys_table_arg, *reserve_size,
+					  MIN_KIMG_ALIGN, reserve_addr,
+					  phys_seed);
+
+		*image_addr = *reserve_addr + TEXT_OFFSET;
+	} else {
+		/*
+		 * Else, try a straight allocation at the preferred offset.
 		 * This will work around the issue where, if dram_base == 0x0,
 		 * efi_low_alloc() refuses to allocate at 0x0 (to prevent the
 		 * address of the allocation to be mistaken for a FAIL return
@@ -52,27 +82,31 @@ efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table_arg,
 		 * Mustang), we can still place the kernel at the address
 		 * 'dram_base + TEXT_OFFSET'.
 		 */
-		*image_addr = *reserve_addr = preferred_offset;
-		nr_pages = round_up(kernel_memsize, EFI_ALLOC_ALIGN) /
-			   EFI_PAGE_SIZE;
-		status = efi_call_early(allocate_pages, EFI_ALLOCATE_ADDRESS,
-					EFI_LOADER_DATA, nr_pages,
-					(efi_physical_addr_t *)reserve_addr);
-		if (status != EFI_SUCCESS) {
-			kernel_memsize += TEXT_OFFSET;
-			status = efi_low_alloc(sys_table_arg, kernel_memsize,
-					       SZ_2M, reserve_addr);
+		if (*image_addr == preferred_offset)
+			return EFI_SUCCESS;
 
-			if (status != EFI_SUCCESS) {
-				pr_efi_err(sys_table_arg, "Failed to relocate kernel\n");
-				return status;
-			}
-			*image_addr = *reserve_addr + TEXT_OFFSET;
-		}
-		memcpy((void *)*image_addr, old_image_addr, kernel_size);
-		*reserve_size = kernel_memsize;
+		*image_addr = *reserve_addr = preferred_offset;
+		*reserve_size = round_up(kernel_memsize, EFI_ALLOC_ALIGN);
+
+		status = efi_call_early(allocate_pages, EFI_ALLOCATE_ADDRESS,
+					EFI_LOADER_DATA,
+					*reserve_size / EFI_PAGE_SIZE,
+					(efi_physical_addr_t *)reserve_addr);
 	}
 
+	if (status != EFI_SUCCESS) {
+		*reserve_size = kernel_memsize + TEXT_OFFSET;
+		status = efi_low_alloc(sys_table_arg, *reserve_size,
+				       MIN_KIMG_ALIGN, reserve_addr);
+
+		if (status != EFI_SUCCESS) {
+			pr_efi_err(sys_table_arg, "Failed to relocate kernel\n");
+			*reserve_size = 0;
+			return status;
+		}
+		*image_addr = *reserve_addr + TEXT_OFFSET;
+	}
+	memcpy((void *)*image_addr, old_image_addr, kernel_size);
 
 	return EFI_SUCCESS;
 }
diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c
index b62e2f5dcab3..b1c22cf18f7d 100644
--- a/drivers/firmware/efi/libstub/fdt.c
+++ b/drivers/firmware/efi/libstub/fdt.c
@@ -147,6 +147,20 @@ efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
 	if (status)
 		goto fdt_set_fail;
 
+	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
+		efi_status_t efi_status;
+
+		efi_status = efi_get_random_bytes(sys_table, sizeof(fdt_val64),
+						  (u8 *)&fdt_val64);
+		if (efi_status == EFI_SUCCESS) {
+			status = fdt_setprop(fdt, node, "kaslr-seed",
+					     &fdt_val64, sizeof(fdt_val64));
+			if (status)
+				goto fdt_set_fail;
+		} else if (efi_status != EFI_NOT_FOUND) {
+			return efi_status;
+		}
+	}
 	return EFI_SUCCESS;
 
 fdt_set_fail:

From 885af350336f3e3256999234cd4948210df6c946 Mon Sep 17 00:00:00 2001
From: Yang Shi <yang.shi@linaro.org>
Date: Thu, 11 Feb 2016 13:53:10 -0800
Subject: [PATCH 788/797] arm64: make irq_stack_ptr more robust

Switching between stacks is only valid if we are tracing ourselves while on the
irq_stack, so it is only valid when in current and non-preemptible context,
otherwise is is just zeroed off.

Fixes: 132cd887b5c5 ("arm64: Modify stack trace and dump for use with irq_stack")
Acked-by: James Morse <james.morse@arm.com>
Tested-by: James Morse <james.morse@arm.com>
Signed-off-by: Yang Shi <yang.shi@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit a80a0eb70c358f8c7dda4bb62b2278dc6285217b)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/stacktrace.c | 13 ++++++-------
 arch/arm64/kernel/traps.c      | 11 ++++++++++-
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 4fad9787ab46..cfd46c227c8c 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -44,14 +44,13 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 	unsigned long irq_stack_ptr;
 
 	/*
-	 * Use raw_smp_processor_id() to avoid false-positives from
-	 * CONFIG_DEBUG_PREEMPT. get_wchan() calls unwind_frame() on sleeping
-	 * task stacks, we can be pre-empted in this case, so
-	 * {raw_,}smp_processor_id() may give us the wrong value. Sleeping
-	 * tasks can't ever be on an interrupt stack, so regardless of cpu,
-	 * the checks will always fail.
+	 * Switching between stacks is valid when tracing current and in
+	 * non-preemptible context.
 	 */
-	irq_stack_ptr = IRQ_STACK_PTR(raw_smp_processor_id());
+	if (tsk == current && !preemptible())
+		irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id());
+	else
+		irq_stack_ptr = 0;
 
 	low  = frame->sp;
 	/* irq stacks are not THREAD_SIZE aligned */
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index cbedd724f48e..c5392081b49b 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -146,9 +146,18 @@ static void dump_instr(const char *lvl, struct pt_regs *regs)
 static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 {
 	struct stackframe frame;
-	unsigned long irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id());
+	unsigned long irq_stack_ptr;
 	int skip;
 
+	/*
+	 * Switching between stacks is valid when tracing current and in
+	 * non-preemptible context.
+	 */
+	if (tsk == current && !preemptible())
+		irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id());
+	else
+		irq_stack_ptr = 0;
+
 	pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
 
 	if (!tsk)

From 37829fdb8c27a5a506cb535db156c917a6e0061a Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 9 Mar 2016 15:22:55 +0000
Subject: [PATCH 789/797] arm64: hugetlb: partial revert of 66b3923a1a0f

Commit 66b3923a1a0f ("arm64: hugetlb: add support for PTE contiguous bit")
introduced support for huge pages using the contiguous bit in the PTE
as opposed to block mappings, which may be slightly unwieldy (512M) in
64k page configurations.

Unfortunately, this support has resulted in some late regressions when
running the libhugetlbfs test suite with 64k pages and CONFIG_DEBUG_VM
as a result of a BUG:

 | readback (2M: 64):	------------[ cut here ]------------
 | kernel BUG at fs/hugetlbfs/inode.c:446!
 | Internal error: Oops - BUG: 0 [#1] SMP
 | Modules linked in:
 | CPU: 7 PID: 1448 Comm: readback Not tainted 4.5.0-rc7 #148
 | Hardware name: linux,dummy-virt (DT)
 | task: fffffe0040964b00 ti: fffffe00c2668000 task.ti: fffffe00c2668000
 | PC is at remove_inode_hugepages+0x44c/0x480
 | LR is at remove_inode_hugepages+0x264/0x480

Rather than revert the entire patch, simply avoid advertising the
contiguous huge page sizes for now while people are actively working on
a fix. This patch can then be reverted once things have been sorted out.

Cc: David Woods <dwoods@ezchip.com>
Reported-by: Steve Capper <steve.capper@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit ff7925848b50050732ac0401e0acf27e8b241d7b)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/hugetlbpage.c | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 82d607c3614e..da30529bb1f6 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -306,10 +306,6 @@ static __init int setup_hugepagesz(char *opt)
 		hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
 	} else if (ps == PUD_SIZE) {
 		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
-	} else if (ps == (PAGE_SIZE * CONT_PTES)) {
-		hugetlb_add_hstate(CONT_PTE_SHIFT);
-	} else if (ps == (PMD_SIZE * CONT_PMDS)) {
-		hugetlb_add_hstate((PMD_SHIFT + CONT_PMD_SHIFT) - PAGE_SHIFT);
 	} else {
 		pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10);
 		return 0;
@@ -317,13 +313,3 @@ static __init int setup_hugepagesz(char *opt)
 	return 1;
 }
 __setup("hugepagesz=", setup_hugepagesz);
-
-#ifdef CONFIG_ARM64_64K_PAGES
-static __init int add_default_hugepagesz(void)
-{
-	if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL)
-		hugetlb_add_hstate(CONT_PMD_SHIFT);
-	return 0;
-}
-arch_initcall(add_default_hugepagesz);
-#endif

From f2971e0e6c42c2b0197e43280ef6a48d8a46097e Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 15 Mar 2016 11:22:57 +0000
Subject: [PATCH 790/797] arm64: fix KASLR boot-time I-cache maintenance

Commit f80fb3a3d50843a4 ("arm64: add support for kernel ASLR") missed a
DSB necessary to complete I-cache maintenance in the primary boot path,
and hence stale instructions may still be present in the I-cache and may
be executed until the I-cache maintenance naturally completes.

Since commit 8ec41987436d566f ("arm64: mm: ensure patched kernel text is
fetched from PoU"), all CPUs invalidate their I-caches after their MMU
is enabled. Prior a CPU's MMU having been enabled, arbitrary lines may
have been fetched from the PoC into I-caches. We never patch text
expected to be executed with the MMU off. Thus, it is unnecessary to
perform broadcast I-cache maintenance in the primary boot path.

This patch reduces the scope of the I-cache maintenance to the local
CPU, and adds the missing DSB with similar scope, matching prior
maintenance in the primary boot path.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Ard Biesehvuel <ard.biesheuvel@linaro.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit b90b4a608ea2401cc491828f7a385edd2e236e37)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/head.S | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 319f896c6e74..a88a15447c3b 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -740,8 +740,9 @@ __enable_mmu:
 
 	msr	sctlr_el1, x19			// re-enable the MMU
 	isb
-	ic	ialluis				// flush instructions fetched
-	isb					// via old mapping
+	ic	iallu				// flush instructions fetched
+	dsb	nsh				// via old mapping
+	isb
 	add	x27, x27, x23			// relocated __mmap_switched
 #endif
 	br	x27

From 2426266ca6318722160645a720bd02bece8400c7 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 22 Mar 2016 10:11:45 +0000
Subject: [PATCH 791/797] arm64: consistently use p?d_set_huge

Commit 324420bf91f60582 ("arm64: add support for ioremap() block
mappings") added new p?d_set_huge functions which do the hard work to
generate and set a correct block entry.

These differ from open-coded huge page creation in the early page table
code by explicitly setting the P?D_TYPE_SECT bits (which are implicitly
retained by mk_sect_prot() for any valid prot), but are otherwise
identical (and cannot fail on arm64).

For simplicity and consistency, make use of these in the initial page
table creation code.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit c661cb1c537e2364bfdabb298fb934fd77445e98)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/mmu.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index ff0f5a46b552..41421c724fb9 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -211,8 +211,7 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
 		if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
 		      block_mappings_allowed(pgtable_alloc)) {
 			pmd_t old_pmd =*pmd;
-			set_pmd(pmd, __pmd(phys |
-					   pgprot_val(mk_sect_prot(prot))));
+			pmd_set_huge(pmd, phys, prot);
 			/*
 			 * Check for previous table entries created during
 			 * boot (__create_page_tables) and flush them.
@@ -272,8 +271,7 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
 		if (use_1G_block(addr, next, phys) &&
 		    block_mappings_allowed(pgtable_alloc)) {
 			pud_t old_pud = *pud;
-			set_pud(pud, __pud(phys |
-					   pgprot_val(mk_sect_prot(prot))));
+			pud_set_huge(pud, phys, prot);
 
 			/*
 			 * If we have an old value for a pud, it will

From 9ca29910090bc04686fbed05306131093da667f1 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 10 Mar 2016 18:41:16 +0000
Subject: [PATCH 792/797] arm64: kasan: Fix zero shadow mapping overriding
 kernel image shadow

With the 16KB and 64KB page size configurations, SWAPPER_BLOCK_SIZE is
PAGE_SIZE and ARM64_SWAPPER_USES_SECTION_MAPS is 0. Since
kimg_shadow_end is not page aligned (_end shifted by
KASAN_SHADOW_SCALE_SHIFT), the edges of previously mapped kernel image
shadow via vmemmap_populate() may be overridden by subsequent calls to
kasan_populate_zero_shadow(), leading to kernel panics like below:

------------------------------------------------------------------------------
Unable to handle kernel paging request at virtual address fffffc100135068c
pgd = fffffc8009ac0000
[fffffc100135068c] *pgd=00000009ffee0003, *pud=00000009ffee0003, *pmd=00000009ffee0003, *pte=00e0000081a00793
Internal error: Oops: 9600004f [#1] PREEMPT SMP
Modules linked in:
CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.5.0-rc4+ #1984
Hardware name: Juno (DT)
task: fffffe09001a0000 ti: fffffe0900200000 task.ti: fffffe0900200000
PC is at __memset+0x4c/0x200
LR is at kasan_unpoison_shadow+0x34/0x50
pc : [<fffffc800846f1cc>] lr : [<fffffc800821ff54>] pstate: 00000245
sp : fffffe0900203db0
x29: fffffe0900203db0 x28: 0000000000000000
x27: 0000000000000000 x26: 0000000000000000
x25: fffffc80099b69d0 x24: 0000000000000001
x23: 0000000000000000 x22: 0000000000002000
x21: dffffc8000000000 x20: 1fffff9001350a8c
x19: 0000000000002000 x18: 0000000000000008
x17: 0000000000000147 x16: ffffffffffffffff
x15: 79746972100e041d x14: ffffff0000000000
x13: ffff000000000000 x12: 0000000000000000
x11: 0101010101010101 x10: 1fffffc11c000000
x9 : 0000000000000000 x8 : fffffc100135068c
x7 : 0000000000000000 x6 : 000000000000003f
x5 : 0000000000000040 x4 : 0000000000000004
x3 : fffffc100134f651 x2 : 0000000000000400
x1 : 0000000000000000 x0 : fffffc100135068c

Process swapper/0 (pid: 1, stack limit = 0xfffffe0900200020)
Call trace:
[<fffffc800846f1cc>] __memset+0x4c/0x200
[<fffffc8008220044>] __asan_register_globals+0x5c/0xb0
[<fffffc8008a09d34>] _GLOBAL__sub_I_65535_1_sunrpc_cache_lookup+0x1c/0x28
[<fffffc8008f20d28>] kernel_init_freeable+0x104/0x274
[<fffffc80089e1948>] kernel_init+0x10/0xf8
[<fffffc8008093a00>] ret_from_fork+0x10/0x50
------------------------------------------------------------------------------

This patch aligns kimg_shadow_start and kimg_shadow_end to
SWAPPER_BLOCK_SIZE in all configurations.

Fixes: f9040773b7bb ("arm64: move kernel image to base of vmalloc area")
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
(cherry picked from commit 2776e0e8ef683a42fe3e9a5facf576b73579700e)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/kasan_init.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index 56e19d150c21..206dd95ea292 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -158,15 +158,12 @@ void __init kasan_init(void)
 	 * vmemmap_populate() has populated the shadow region that covers the
 	 * kernel image with SWAPPER_BLOCK_SIZE mappings, so we have to round
 	 * the start and end addresses to SWAPPER_BLOCK_SIZE as well, to prevent
-	 * kasan_populate_zero_shadow() from replacing the PMD block mappings
-	 * with PMD table mappings at the edges of the shadow region for the
-	 * kernel image.
+	 * kasan_populate_zero_shadow() from replacing the page table entries
+	 * (PMD or PTE) at the edges of the shadow region for the kernel
+	 * image.
 	 */
-	if (ARM64_SWAPPER_USES_SECTION_MAPS) {
-		kimg_shadow_start = round_down(kimg_shadow_start,
-					       SWAPPER_BLOCK_SIZE);
-		kimg_shadow_end = round_up(kimg_shadow_end, SWAPPER_BLOCK_SIZE);
-	}
+	kimg_shadow_start = round_down(kimg_shadow_start, SWAPPER_BLOCK_SIZE);
+	kimg_shadow_end = round_up(kimg_shadow_end, SWAPPER_BLOCK_SIZE);
 
 	kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
 				   (void *)mod_shadow_start);

From 3d3fe7cf1fbc97ea285fd1ee3ed55b7527e4d1ae Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 2 Mar 2016 09:47:13 +0100
Subject: [PATCH 793/797] arm64: mm: check at build time that PAGE_OFFSET
 divides the VA space evenly

Commit 8439e62a1561 ("arm64: mm: use bit ops rather than arithmetic in
pa/va translations") changed the boundary check against PAGE_OFFSET from
an arithmetic comparison to a bit test. This means we now silently assume
that PAGE_OFFSET is a power of 2 that divides the kernel virtual address
space into two equal halves. So make that assumption explicit.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 6d2aa549de1fc998581d216de3853aa131aa4446)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/init.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 58a6d3f7525c..19ccdb73c680 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -170,6 +170,13 @@ void __init arm64_memblock_init(void)
 {
 	const s64 linear_region_size = -(s64)PAGE_OFFSET;
 
+	/*
+	 * Ensure that the linear region takes up exactly half of the kernel
+	 * virtual address space. This way, we can distinguish a linear address
+	 * from a kernel/module/vmalloc address by testing a single bit.
+	 */
+	BUILD_BUG_ON(linear_region_size != BIT(VA_BITS - 1));
+
 	/*
 	 * Select a suitable value for the base of physical memory.
 	 */

From 27fa6e51b8ddfd0224dad957b85fd8097caa5978 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Thu, 25 Feb 2016 20:48:53 +0100
Subject: [PATCH 794/797] arm64: lse: deal with clobbered IP registers after
 branch via PLT

The LSE atomics implementation uses runtime patching to patch in calls
to out of line non-LSE atomics implementations on cores that lack hardware
support for LSE. To avoid paying the overhead cost of a function call even
if no call ends up being made, the bl instruction is kept invisible to the
compiler, and the out of line implementations preserve all registers, not
just the ones that they are required to preserve as per the AAPCS64.

However, commit fd045f6cd98e ("arm64: add support for module PLTs") added
support for routing branch instructions via veneers if the branch target
offset exceeds the range of the ordinary relative branch instructions.
Since this deals with jump and call instructions that are exposed to ELF
relocations, the PLT code uses x16 to hold the address of the branch target
when it performs an indirect branch-to-register, something which is
explicitly allowed by the AAPCS64 (and ordinary compiler generated code
does not expect register x16 or x17 to retain their values across a bl
instruction).

Since the lse runtime patched bl instructions don't adhere to the AAPCS64,
they don't deal with this clobbering of registers x16 and x17. So add them
to the clobber list of the asm() statements that perform the call
instructions, and drop x16 and x17 from the list of registers that are
callee saved in the out of line non-LSE implementations.

In addition, since we have given these functions two scratch registers,
they no longer need to stack/unstack temp registers.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
[will: factored clobber list into #define, updated Makefile comment]
Signed-off-by: Will Deacon <will.deacon@arm.com>

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 5be8b70af1ca78cefb8b756d157532360a5fd663)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/atomic_lse.h | 38 ++++++++++++++---------------
 arch/arm64/include/asm/lse.h        |  1 +
 arch/arm64/lib/Makefile             | 13 +++++-----
 3 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 197e06afbf71..39c1d340fec5 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -36,7 +36,7 @@ static inline void atomic_andnot(int i, atomic_t *v)
 	"	stclr	%w[i], %[v]\n")
 	: [i] "+r" (w0), [v] "+Q" (v->counter)
 	: "r" (x1)
-	: "x30");
+	: __LL_SC_CLOBBERS);
 }
 
 static inline void atomic_or(int i, atomic_t *v)
@@ -48,7 +48,7 @@ static inline void atomic_or(int i, atomic_t *v)
 	"	stset	%w[i], %[v]\n")
 	: [i] "+r" (w0), [v] "+Q" (v->counter)
 	: "r" (x1)
-	: "x30");
+	: __LL_SC_CLOBBERS);
 }
 
 static inline void atomic_xor(int i, atomic_t *v)
@@ -60,7 +60,7 @@ static inline void atomic_xor(int i, atomic_t *v)
 	"	steor	%w[i], %[v]\n")
 	: [i] "+r" (w0), [v] "+Q" (v->counter)
 	: "r" (x1)
-	: "x30");
+	: __LL_SC_CLOBBERS);
 }
 
 static inline void atomic_add(int i, atomic_t *v)
@@ -72,7 +72,7 @@ static inline void atomic_add(int i, atomic_t *v)
 	"	stadd	%w[i], %[v]\n")
 	: [i] "+r" (w0), [v] "+Q" (v->counter)
 	: "r" (x1)
-	: "x30");
+	: __LL_SC_CLOBBERS);
 }
 
 #define ATOMIC_OP_ADD_RETURN(name, mb, cl...)				\
@@ -90,7 +90,7 @@ static inline int atomic_add_return##name(int i, atomic_t *v)		\
 	"	add	%w[i], %w[i], w30")				\
 	: [i] "+r" (w0), [v] "+Q" (v->counter)				\
 	: "r" (x1)							\
-	: "x30" , ##cl);						\
+	: __LL_SC_CLOBBERS, ##cl);					\
 									\
 	return w0;							\
 }
@@ -116,7 +116,7 @@ static inline void atomic_and(int i, atomic_t *v)
 	"	stclr	%w[i], %[v]")
 	: [i] "+r" (w0), [v] "+Q" (v->counter)
 	: "r" (x1)
-	: "x30");
+	: __LL_SC_CLOBBERS);
 }
 
 static inline void atomic_sub(int i, atomic_t *v)
@@ -133,7 +133,7 @@ static inline void atomic_sub(int i, atomic_t *v)
 	"	stadd	%w[i], %[v]")
 	: [i] "+r" (w0), [v] "+Q" (v->counter)
 	: "r" (x1)
-	: "x30");
+	: __LL_SC_CLOBBERS);
 }
 
 #define ATOMIC_OP_SUB_RETURN(name, mb, cl...)				\
@@ -153,7 +153,7 @@ static inline int atomic_sub_return##name(int i, atomic_t *v)		\
 	"	add	%w[i], %w[i], w30")				\
 	: [i] "+r" (w0), [v] "+Q" (v->counter)				\
 	: "r" (x1)							\
-	: "x30" , ##cl);						\
+	: __LL_SC_CLOBBERS , ##cl);					\
 									\
 	return w0;							\
 }
@@ -177,7 +177,7 @@ static inline void atomic64_andnot(long i, atomic64_t *v)
 	"	stclr	%[i], %[v]\n")
 	: [i] "+r" (x0), [v] "+Q" (v->counter)
 	: "r" (x1)
-	: "x30");
+	: __LL_SC_CLOBBERS);
 }
 
 static inline void atomic64_or(long i, atomic64_t *v)
@@ -189,7 +189,7 @@ static inline void atomic64_or(long i, atomic64_t *v)
 	"	stset	%[i], %[v]\n")
 	: [i] "+r" (x0), [v] "+Q" (v->counter)
 	: "r" (x1)
-	: "x30");
+	: __LL_SC_CLOBBERS);
 }
 
 static inline void atomic64_xor(long i, atomic64_t *v)
@@ -201,7 +201,7 @@ static inline void atomic64_xor(long i, atomic64_t *v)
 	"	steor	%[i], %[v]\n")
 	: [i] "+r" (x0), [v] "+Q" (v->counter)
 	: "r" (x1)
-	: "x30");
+	: __LL_SC_CLOBBERS);
 }
 
 static inline void atomic64_add(long i, atomic64_t *v)
@@ -213,7 +213,7 @@ static inline void atomic64_add(long i, atomic64_t *v)
 	"	stadd	%[i], %[v]\n")
 	: [i] "+r" (x0), [v] "+Q" (v->counter)
 	: "r" (x1)
-	: "x30");
+	: __LL_SC_CLOBBERS);
 }
 
 #define ATOMIC64_OP_ADD_RETURN(name, mb, cl...)				\
@@ -231,7 +231,7 @@ static inline long atomic64_add_return##name(long i, atomic64_t *v)	\
 	"	add	%[i], %[i], x30")				\
 	: [i] "+r" (x0), [v] "+Q" (v->counter)				\
 	: "r" (x1)							\
-	: "x30" , ##cl);						\
+	: __LL_SC_CLOBBERS, ##cl);					\
 									\
 	return x0;							\
 }
@@ -257,7 +257,7 @@ static inline void atomic64_and(long i, atomic64_t *v)
 	"	stclr	%[i], %[v]")
 	: [i] "+r" (x0), [v] "+Q" (v->counter)
 	: "r" (x1)
-	: "x30");
+	: __LL_SC_CLOBBERS);
 }
 
 static inline void atomic64_sub(long i, atomic64_t *v)
@@ -274,7 +274,7 @@ static inline void atomic64_sub(long i, atomic64_t *v)
 	"	stadd	%[i], %[v]")
 	: [i] "+r" (x0), [v] "+Q" (v->counter)
 	: "r" (x1)
-	: "x30");
+	: __LL_SC_CLOBBERS);
 }
 
 #define ATOMIC64_OP_SUB_RETURN(name, mb, cl...)				\
@@ -294,7 +294,7 @@ static inline long atomic64_sub_return##name(long i, atomic64_t *v)	\
 	"	add	%[i], %[i], x30")				\
 	: [i] "+r" (x0), [v] "+Q" (v->counter)				\
 	: "r" (x1)							\
-	: "x30" , ##cl);						\
+	: __LL_SC_CLOBBERS, ##cl);					\
 									\
 	return x0;							\
 }
@@ -330,7 +330,7 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
 	"2:")
 	: [ret] "+&r" (x0), [v] "+Q" (v->counter)
 	:
-	: "x30", "cc", "memory");
+	: __LL_SC_CLOBBERS, "cc", "memory");
 
 	return x0;
 }
@@ -359,7 +359,7 @@ static inline unsigned long __cmpxchg_case_##name(volatile void *ptr,	\
 	"	mov	%" #w "[ret], " #w "30")			\
 	: [ret] "+r" (x0), [v] "+Q" (*(unsigned long *)ptr)		\
 	: [old] "r" (x1), [new] "r" (x2)				\
-	: "x30" , ##cl);						\
+	: __LL_SC_CLOBBERS, ##cl);					\
 									\
 	return x0;							\
 }
@@ -416,7 +416,7 @@ static inline long __cmpxchg_double##name(unsigned long old1,		\
 	  [v] "+Q" (*(unsigned long *)ptr)				\
 	: [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4),		\
 	  [oldval1] "r" (oldval1), [oldval2] "r" (oldval2)		\
-	: "x30" , ##cl);						\
+	: __LL_SC_CLOBBERS, ##cl);					\
 									\
 	return x0;							\
 }
diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h
index 3de42d68611d..23acc00be32d 100644
--- a/arch/arm64/include/asm/lse.h
+++ b/arch/arm64/include/asm/lse.h
@@ -26,6 +26,7 @@ __asm__(".arch_extension	lse");
 
 /* Macro for constructing calls to out-of-line ll/sc atomics */
 #define __LL_SC_CALL(op)	"bl\t" __stringify(__LL_SC_PREFIX(op)) "\n"
+#define __LL_SC_CLOBBERS	"x16", "x17", "x30"
 
 /* In-line patching at runtime */
 #define ARM64_LSE_ATOMIC_INSN(llsc, lse)				\
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 1a811ecf71da..c86b7909ef31 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -4,15 +4,16 @@ lib-y		:= bitops.o clear_user.o delay.o copy_from_user.o	\
 		   memcmp.o strcmp.o strncmp.o strlen.o strnlen.o	\
 		   strchr.o strrchr.o
 
-# Tell the compiler to treat all general purpose registers as
-# callee-saved, which allows for efficient runtime patching of the bl
-# instruction in the caller with an atomic instruction when supported by
-# the CPU. Result and argument registers are handled correctly, based on
-# the function prototype.
+# Tell the compiler to treat all general purpose registers (with the
+# exception of the IP registers, which are already handled by the caller
+# in case of a PLT) as callee-saved, which allows for efficient runtime
+# patching of the bl instruction in the caller with an atomic instruction
+# when supported by the CPU. Result and argument registers are handled
+# correctly, based on the function prototype.
 lib-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o
 CFLAGS_atomic_ll_sc.o	:= -fcall-used-x0 -ffixed-x1 -ffixed-x2		\
 		   -ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6		\
 		   -ffixed-x7 -fcall-saved-x8 -fcall-saved-x9		\
 		   -fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12	\
 		   -fcall-saved-x13 -fcall-saved-x14 -fcall-saved-x15	\
-		   -fcall-saved-x16 -fcall-saved-x17 -fcall-saved-x18
+		   -fcall-saved-x18

From b78c702db9fefface6f68cfade7a1afbadd829ab Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 26 Feb 2016 17:57:14 +0100
Subject: [PATCH 795/797] arm64: mm: treat memstart_addr as a signed quantity

Commit c031a4213c11 ("arm64: kaslr: randomize the linear region")
implements randomization of the linear region, by subtracting a random
multiple of PUD_SIZE from memstart_addr. This causes the virtual mapping
of system RAM to move upwards in the linear region, and at the same time
causes memstart_addr to assume a value which may be negative if the offset
of system RAM in the physical space is smaller than its offset relative to
PAGE_OFFSET in the virtual space.

Since memstart_addr is effectively an offset now, redefine its type as s64
so that expressions involving shifting or division preserve its sign.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 020d044f66874eba058ce8264fc550f3eca67879)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/memory.h | 2 +-
 arch/arm64/mm/init.c            | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 5f8667a99e41..12f8a00fb3f1 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -135,7 +135,7 @@
 #include <linux/bitops.h>
 #include <linux/mmdebug.h>
 
-extern phys_addr_t		memstart_addr;
+extern s64			memstart_addr;
 /* PHYS_OFFSET - the physical address of the start of memory. */
 #define PHYS_OFFSET		({ VM_BUG_ON(memstart_addr & 1); memstart_addr; })
 
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 19ccdb73c680..9db46dfb6afb 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -54,7 +54,7 @@
  * executes, which assigns it its actual value. So use a default value
  * that cannot be mistaken for a real physical address.
  */
-phys_addr_t memstart_addr __read_mostly = ~0ULL;
+s64 memstart_addr __read_mostly = -1;
 phys_addr_t arm64_dma_phys_limit __read_mostly;
 
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -188,7 +188,7 @@ void __init arm64_memblock_init(void)
 	 * linear mapping. Take care not to clip the kernel which may be
 	 * high in memory.
 	 */
-	memblock_remove(max(memstart_addr + linear_region_size, __pa(_end)),
+	memblock_remove(max_t(u64, memstart_addr + linear_region_size, __pa(_end)),
 			ULLONG_MAX);
 	if (memblock_end_of_DRAM() > linear_region_size)
 		memblock_remove(0, memblock_end_of_DRAM() - linear_region_size);

From 200d9e78dba04ae2a5ee4b847f389758db5152dd Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 10 Mar 2016 18:30:56 +0000
Subject: [PATCH 796/797] arm64: kasan: Use actual memory node when populating
 the kernel image shadow

With the 16KB or 64KB page configurations, the generic
vmemmap_populate() implementation warns on potential offnode
page_structs via vmemmap_verify() because the arm64 kasan_init() passes
NUMA_NO_NODE instead of the actual node for the kernel image memory.

Fixes: f9040773b7bb ("arm64: move kernel image to base of vmalloc area")
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: James Morse <james.morse@arm.com>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
(cherry picked from commit 2f76969f2eef051bdd63d38b08d78e790440b0ad)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/kasan_init.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index 206dd95ea292..757009daa9ed 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -152,7 +152,8 @@ void __init kasan_init(void)
 
 	clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
 
-	vmemmap_populate(kimg_shadow_start, kimg_shadow_end, NUMA_NO_NODE);
+	vmemmap_populate(kimg_shadow_start, kimg_shadow_end,
+			 pfn_to_nid(virt_to_pfn(_text)));
 
 	/*
 	 * vmemmap_populate() has populated the shadow region that covers the

From 5dd612ebfad71f5463d89ff92d1bc307cd286b5d Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Wed, 23 Mar 2016 16:00:46 +0100
Subject: [PATCH 797/797] parisc: Use generic extable search and sort routines

Switch to the generic extable search and sort routines which were introduced
with commit a272858 from Ard Biesheuvel. This saves quite some memory in the
vmlinux binary with the 64bit kernel.

Signed-off-by: Helge Deller <deller@gmx.de>
(cherry picked from commit 0de798584bdedfdad19db21e3c7aec84f252f4f3)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/parisc/Kconfig                | 1 +
 arch/parisc/include/asm/assembly.h | 2 +-
 arch/parisc/include/asm/uaccess.h  | 7 ++++---
 arch/parisc/mm/fault.c             | 9 ++-------
 scripts/sortextable.c              | 1 +
 5 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 729f89163bc3..d2256fa97ea0 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -11,6 +11,7 @@ config PARISC
 	select RTC_DRV_GENERIC
 	select INIT_ALL_POSSIBLE
 	select BUG
+	select BUILDTIME_EXTABLE_SORT
 	select HAVE_PERF_EVENTS
 	select GENERIC_ATOMIC64 if !64BIT
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h
index b3069fd83468..60e6f07b7e32 100644
--- a/arch/parisc/include/asm/assembly.h
+++ b/arch/parisc/include/asm/assembly.h
@@ -523,7 +523,7 @@
 	 */
 #define ASM_EXCEPTIONTABLE_ENTRY(fault_addr, except_addr)	\
 	.section __ex_table,"aw"			!	\
-	ASM_ULONG_INSN	fault_addr, except_addr		!	\
+	.word (fault_addr - .), (except_addr - .)	!	\
 	.previous
 
 
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index 1960b87c1c8b..6f893d29f1b2 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -60,14 +60,15 @@ static inline long access_ok(int type, const void __user * addr,
  * use a 32bit (unsigned int) address here.
  */
 
+#define ARCH_HAS_RELATIVE_EXTABLE
 struct exception_table_entry {
-	unsigned long insn;	/* address of insn that is allowed to fault. */
-	unsigned long fixup;	/* fixup routine */
+	int insn;	/* relative address of insn that is allowed to fault. */
+	int fixup;	/* relative address of fixup routine */
 };
 
 #define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr )\
 	".section __ex_table,\"aw\"\n"			   \
-	ASM_WORD_INSN #fault_addr ", " #except_addr "\n\t" \
+	".word (" #fault_addr " - .), (" #except_addr " - .)\n\t" \
 	".previous\n"
 
 /*
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index f9064449908a..16dbe81c97c9 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -140,12 +140,6 @@ int fixup_exception(struct pt_regs *regs)
 {
 	const struct exception_table_entry *fix;
 
-	/* If we only stored 32bit addresses in the exception table we can drop
-	 * out if we faulted on a 64bit address. */
-	if ((sizeof(regs->iaoq[0]) > sizeof(fix->insn))
-		&& (regs->iaoq[0] >> 32))
-			return 0;
-
 	fix = search_exception_tables(regs->iaoq[0]);
 	if (fix) {
 		struct exception_data *d;
@@ -155,7 +149,8 @@ int fixup_exception(struct pt_regs *regs)
 		d->fault_space = regs->isr;
 		d->fault_addr = regs->ior;
 
-		regs->iaoq[0] = ((fix->fixup) & ~3);
+		regs->iaoq[0] = (unsigned long)&fix->fixup + fix->fixup;
+		regs->iaoq[0] &= ~3;
 		/*
 		 * NOTE: In some cases the faulting instruction
 		 * may be in the delay slot of a branch. We
diff --git a/scripts/sortextable.c b/scripts/sortextable.c
index 19d83647846c..a2c0d620ca80 100644
--- a/scripts/sortextable.c
+++ b/scripts/sortextable.c
@@ -283,6 +283,7 @@ do_file(char const *const fname)
 	case EM_X86_64:
 	case EM_S390:
 	case EM_AARCH64:
+	case EM_PARISC:
 		custom_sort = sort_relative_table;
 		break;
 	case EM_ARCOMPACT: